@@ -43,10 +43,11 @@ headers: { 'Authorization': `Bearer ${token}` }
|
||||
```
|
||||
dashboard/src/
|
||||
├── app/ # Next.js App Router pages
|
||||
│ ├── control/ # Mission control panel (chat interface)
|
||||
│ ├── history/ # Agents page - mission history + tree modal
|
||||
│ ├── console/ # SSH terminal + file explorer
|
||||
│ ├── control/ # Task control panel
|
||||
│ ├── agents/ # Agent tree visualization
|
||||
│ ├── modules/ # MCP module management
|
||||
│ ├── files/ # File explorer
|
||||
│ └── settings/ # Configuration
|
||||
├── components/
|
||||
│ ├── ui/ # Generic UI components
|
||||
@@ -56,6 +57,17 @@ dashboard/src/
|
||||
└── lib/ # Utilities (api, auth, settings)
|
||||
```
|
||||
|
||||
## Navigation Structure
|
||||
|
||||
| Nav Item | Route | Description |
|
||||
|----------|-------|-------------|
|
||||
| Overview | `/` | Global stats and recent missions |
|
||||
| Mission | `/control` | Active mission chat interface |
|
||||
| Agents | `/history` | Mission history with tree visualization modal |
|
||||
| Console | `/console` | SSH terminal access |
|
||||
| Modules | `/modules` | MCP server management |
|
||||
| Settings | `/settings` | Configuration |
|
||||
|
||||
## Refresh Resilience Pattern
|
||||
|
||||
The dashboard maintains state snapshots on the backend so users can refresh or navigate away without losing visual state.
|
||||
@@ -96,7 +108,11 @@ useEffect(() => {
|
||||
|
||||
## Agent Tree Visualization
|
||||
|
||||
Dynamic, animated tree visualization for hierarchical agent execution. Uses SVG + framer-motion for smooth animations.
|
||||
Dynamic, animated tree visualization for hierarchical agent execution. Uses SVG + framer-motion for smooth animations.
|
||||
|
||||
The tree is accessed via a **modal** from the Agents page (formerly History):
|
||||
- Click the tree icon on any mission row to open the full-screen modal
|
||||
- Modal shows the agent execution tree with pan/zoom controls
|
||||
|
||||
### Component Structure
|
||||
|
||||
@@ -114,48 +130,45 @@ components/agent-tree/
|
||||
```tsx
|
||||
import {
|
||||
AgentTreeCanvas,
|
||||
generateComplexTree,
|
||||
simulateTreeUpdates,
|
||||
type AgentNode
|
||||
} from '@/components/agent-tree';
|
||||
|
||||
// With real data from backend
|
||||
// Full mode (in Control page panel)
|
||||
<AgentTreeCanvas
|
||||
tree={agentTree}
|
||||
selectedNodeId={selectedId}
|
||||
onSelectNode={(node) => setSelectedId(node?.id ?? null)}
|
||||
/>
|
||||
|
||||
// Demo mode (for testing without API)
|
||||
const [tree, setTree] = useState(generateComplexTree());
|
||||
useEffect(() => simulateTreeUpdates(tree, setTree), []);
|
||||
<AgentTreeCanvas tree={tree} />
|
||||
// Compact mode (in History page preview, side panels)
|
||||
<AgentTreeCanvas tree={agentTree} compact className="w-full h-full" />
|
||||
```
|
||||
|
||||
### Props
|
||||
|
||||
| Prop | Type | Description |
|
||||
|------|------|-------------|
|
||||
| `tree` | `AgentNode \| null` | Tree data to visualize |
|
||||
| `onSelectNode` | `(node: AgentNode \| null) => void` | Node selection callback |
|
||||
| `selectedNodeId` | `string \| null` | Currently selected node ID |
|
||||
| `compact` | `boolean` | Compact mode - hides minimap and details panel |
|
||||
| `className` | `string` | Additional CSS classes |
|
||||
|
||||
### Node Display
|
||||
|
||||
Each node shows:
|
||||
- **Icon**: Agent type (Bot, Brain, Cpu, Zap, Target, GitBranch)
|
||||
- **Name**: Truncated agent name
|
||||
- **Model**: LLM model used (e.g., `claude-sonnet-4.5`)
|
||||
- **Model**: LLM model used (e.g., `gemini-3-flash`)
|
||||
- **Status**: Running (pulse), Completed (✓), Failed (✗), Pending (clock)
|
||||
- **Budget**: Spent / Allocated (e.g., `$0.35 / $9.00`)
|
||||
|
||||
### Demo Modes
|
||||
|
||||
Three demo tree generators for testing:
|
||||
- `generateSimpleTree()` – Basic orchestrator (5 nodes)
|
||||
- `generateComplexTree()` – Subtask decomposition (10-15 nodes)
|
||||
- `generateDeepTree(depth)` – Recursive nesting (50+ nodes)
|
||||
|
||||
Use `simulateTreeUpdates(tree, setTree)` to simulate live status changes.
|
||||
|
||||
### Interactions
|
||||
|
||||
- **Pan**: Click and drag to move the tree
|
||||
- **Zoom**: Mouse wheel or +/- buttons
|
||||
- **Select**: Click a node to show details panel
|
||||
- **Reset**: Reset view button restores initial position
|
||||
- **Select**: Click a node to show details panel (full mode only)
|
||||
- **Fit**: Reset view button fits tree to viewport
|
||||
|
||||
## Environment Variables
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
description: Core Open Agent architecture - hierarchical agent system with full machine access
|
||||
description: Core Open Agent architecture - SimpleAgent system with full machine access
|
||||
alwaysApply: true
|
||||
---
|
||||
|
||||
@@ -13,26 +13,30 @@ Minimal autonomous coding agent in Rust with **full machine access** (not sandbo
|
||||
|-----------|----------|---------|
|
||||
| Backend (Rust) | `src/` | HTTP API + agent system |
|
||||
| Dashboard (Next.js) | `dashboard/` | Web UI (Bun, not npm) |
|
||||
| iOS Dashboard | `ios_dashboard/` | Native iOS app (Swift/SwiftUI) |
|
||||
| MCP configs | `.open_agent/mcp/config.json` | Model Context Protocol servers |
|
||||
| Tuning | `.open_agent/tuning.json` | Calibration data |
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
RootAgent (orchestrator)
|
||||
├── ComplexityEstimator (leaf) → estimates task difficulty 0-1
|
||||
├── ModelSelector (leaf) → U-curve cost optimization
|
||||
├── TaskExecutor (leaf) → runs tools in a loop
|
||||
└── Verifier (leaf) → hybrid programmatic + LLM verification
|
||||
SimpleAgent
|
||||
└── TaskExecutor → runs tools in a loop with auto-upgrade
|
||||
```
|
||||
|
||||
The agent system was simplified from a complex hierarchical orchestrator to a single `SimpleAgent` that:
|
||||
- Automatically upgrades outdated model names via `ModelResolver`
|
||||
- Uses `TaskExecutor` for tool-based execution
|
||||
- Supports model overrides per mission/message
|
||||
- Handles parallel mission execution
|
||||
|
||||
### Module Map
|
||||
|
||||
```
|
||||
src/
|
||||
├── agents/ # Hierarchical agent system
|
||||
│ ├── orchestrator/ # RootAgent, NodeAgent
|
||||
│ └── leaf/ # ComplexityEstimator, ModelSelector, TaskExecutor, Verifier
|
||||
├── agents/ # Agent system
|
||||
│ ├── simple.rs # SimpleAgent (main entry point)
|
||||
│ └── leaf/ # TaskExecutor
|
||||
├── budget/ # Cost tracking, pricing, smart retry
|
||||
│ ├── benchmarks.rs # Model capability scores from llm-stats.com
|
||||
│ ├── pricing.rs # OpenRouter pricing + model allowlist
|
||||
@@ -79,19 +83,44 @@ src/
|
||||
| Method | Path | Purpose |
|
||||
|--------|------|---------|
|
||||
| `POST` | `/api/control/message` | Send message to agent |
|
||||
| `POST` | `/api/control/tool_result` | Submit frontend tool result |
|
||||
| `GET` | `/api/control/stream` | SSE event stream |
|
||||
| `POST` | `/api/control/cancel` | Cancel current execution |
|
||||
| `GET` | `/api/control/tree` | Get agent tree snapshot (refresh resilience) |
|
||||
| `GET` | `/api/control/progress` | Get execution progress ("Subtask X/Y") |
|
||||
|
||||
### Mission Endpoints
|
||||
|
||||
| Method | Path | Purpose |
|
||||
|--------|------|---------|
|
||||
| `GET` | `/api/control/missions` | List all missions |
|
||||
| `POST` | `/api/control/missions` | Create new mission |
|
||||
| `GET` | `/api/control/missions/current` | Get current mission |
|
||||
| `POST` | `/api/control/missions` | Create new mission (optional: title, model_override) |
|
||||
| `GET` | `/api/control/missions/current` | Get current active mission |
|
||||
| `GET` | `/api/control/missions/:id` | Get specific mission |
|
||||
| `GET` | `/api/control/missions/:id/tree` | Get mission's agent tree |
|
||||
| `POST` | `/api/control/missions/:id/load` | Switch to mission |
|
||||
| `POST` | `/api/control/missions/:id/status` | Set mission status |
|
||||
| `POST` | `/api/control/missions/:id/cancel` | Cancel specific mission |
|
||||
| `POST` | `/api/control/missions/:id/resume` | Resume interrupted mission |
|
||||
| `POST` | `/api/control/missions/:id/parallel` | Start mission in parallel |
|
||||
|
||||
### Parallel Execution Endpoints
|
||||
|
||||
| Method | Path | Purpose |
|
||||
|--------|------|---------|
|
||||
| `GET` | `/api/control/running` | List running missions |
|
||||
| `GET` | `/api/control/parallel/config` | Get parallel execution config |
|
||||
|
||||
### Mission Statuses
|
||||
|
||||
Missions can be in one of these states:
|
||||
- `active` - Currently being worked on
|
||||
- `completed` - Successfully finished
|
||||
- `failed` - Failed with errors
|
||||
- `interrupted` - Stopped due to server shutdown/cancellation (resumable)
|
||||
- `blocked` - Blocked by external factors (resumable)
|
||||
- `not_feasible` - Cannot be completed as specified
|
||||
|
||||
## Model Selection (U-Curve)
|
||||
|
||||
- **Cheap models**: low token cost, high failure rate, more retries
|
||||
|
||||
@@ -1,594 +0,0 @@
|
||||
'use client';
|
||||
|
||||
/**
|
||||
* Agent Tree Page
|
||||
*
|
||||
* Dynamic visualization of the hierarchical agent execution tree.
|
||||
* Shows real-time updates as agents are created, run, and complete.
|
||||
*/
|
||||
|
||||
import { useEffect, useMemo, useState, useRef, useCallback } from 'react';
|
||||
import Link from 'next/link';
|
||||
import { toast } from 'sonner';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { listMissions, getCurrentMission, streamControl, getAgentTree, getProgress, getMissionTree, Mission, ControlRunState, ExecutionProgress } from '@/lib/api';
|
||||
import { ShimmerSidebarItem } from '@/components/ui/shimmer';
|
||||
import {
|
||||
AgentTreeCanvas,
|
||||
generateSimpleTree,
|
||||
generateComplexTree,
|
||||
generateDeepTree,
|
||||
simulateTreeUpdates,
|
||||
type AgentNode,
|
||||
} from '@/components/agent-tree';
|
||||
import {
|
||||
Bot,
|
||||
CheckCircle,
|
||||
XCircle,
|
||||
Loader,
|
||||
Clock,
|
||||
Search,
|
||||
Layers,
|
||||
FlaskConical,
|
||||
Play,
|
||||
Pause,
|
||||
MessageSquare,
|
||||
} from 'lucide-react';
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null;
|
||||
}
|
||||
|
||||
type DemoMode = 'off' | 'simple' | 'complex' | 'deep';
|
||||
|
||||
export default function AgentsPage() {
|
||||
const [missions, setMissions] = useState<Mission[]>([]);
|
||||
const [currentMission, setCurrentMission] = useState<Mission | null>(null);
|
||||
const [controlState, setControlState] = useState<ControlRunState>('idle');
|
||||
const [selectedMissionId, setSelectedMissionId] = useState<string | null>(null);
|
||||
const [searchQuery, setSearchQuery] = useState('');
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [realTree, setRealTree] = useState<AgentNode | null>(null);
|
||||
const [selectedNodeId, setSelectedNodeId] = useState<string | null>(null);
|
||||
const [progress, setProgress] = useState<ExecutionProgress | null>(null);
|
||||
|
||||
// Demo mode state
|
||||
const [demoMode, setDemoMode] = useState<DemoMode>('off');
|
||||
const [demoTree, setDemoTree] = useState<AgentNode | null>(null);
|
||||
const [demoRunning, setDemoRunning] = useState(false);
|
||||
const demoCleanupRef = useRef<(() => void) | null>(null);
|
||||
|
||||
const fetchedRef = useRef(false);
|
||||
const streamCleanupRef = useRef<null | (() => void)>(null);
|
||||
|
||||
const selectedMission = useMemo(
|
||||
() => missions.find((m) => m.id === selectedMissionId) ?? currentMission,
|
||||
[missions, selectedMissionId, currentMission]
|
||||
);
|
||||
|
||||
// Convert backend tree node to frontend AgentNode
|
||||
const convertTreeNode = useCallback((node: Record<string, unknown>): AgentNode => {
|
||||
const children = (node['children'] as Record<string, unknown>[] | undefined) ?? [];
|
||||
return {
|
||||
id: String(node['id'] ?? ''),
|
||||
type: (String(node['node_type'] ?? 'Node') as AgentNode['type']),
|
||||
status: (String(node['status'] ?? 'pending') as AgentNode['status']),
|
||||
name: String(node['name'] ?? ''),
|
||||
description: String(node['description'] ?? ''),
|
||||
model: node['selected_model'] != null ? String(node['selected_model']) : undefined,
|
||||
budgetAllocated: Number(node['budget_allocated'] ?? 0),
|
||||
budgetSpent: Number(node['budget_spent'] ?? 0),
|
||||
complexity: node['complexity'] != null ? Number(node['complexity']) : undefined,
|
||||
children: children.map((c) => convertTreeNode(c)),
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Stream control events for real-time status and tree updates
|
||||
// First fetch snapshot, then subscribe to live updates
|
||||
useEffect(() => {
|
||||
streamCleanupRef.current?.();
|
||||
let mounted = true;
|
||||
|
||||
// Fetch initial snapshot for refresh resilience
|
||||
const fetchSnapshot = async () => {
|
||||
try {
|
||||
const [treeSnapshot, progressSnapshot] = await Promise.all([
|
||||
getAgentTree().catch(() => null),
|
||||
getProgress().catch(() => null),
|
||||
]);
|
||||
if (!mounted) return;
|
||||
|
||||
if (treeSnapshot) {
|
||||
setRealTree(convertTreeNode(treeSnapshot as unknown as Record<string, unknown>));
|
||||
}
|
||||
if (progressSnapshot) {
|
||||
setProgress(progressSnapshot);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to fetch snapshot:', e);
|
||||
}
|
||||
};
|
||||
|
||||
fetchSnapshot();
|
||||
|
||||
const cleanup = streamControl((event) => {
|
||||
const data: unknown = event.data;
|
||||
if (event.type === 'status' && isRecord(data)) {
|
||||
const st = data['state'];
|
||||
setControlState(typeof st === 'string' ? (st as ControlRunState) : 'idle');
|
||||
|
||||
// Clear real tree and progress when idle
|
||||
if (st === 'idle') {
|
||||
setRealTree(null);
|
||||
setProgress(null);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle real-time tree updates
|
||||
if (event.type === 'agent_tree' && isRecord(data)) {
|
||||
const tree = data['tree'];
|
||||
if (isRecord(tree)) {
|
||||
const converted = convertTreeNode(tree);
|
||||
setRealTree(converted);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle progress updates
|
||||
if (event.type === 'progress' && isRecord(data)) {
|
||||
setProgress({
|
||||
total_subtasks: Number(data['total_subtasks'] ?? 0),
|
||||
completed_subtasks: Number(data['completed_subtasks'] ?? 0),
|
||||
current_subtask: data['current_subtask'] as string | null,
|
||||
current_depth: Number(data['depth'] ?? 0),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
streamCleanupRef.current = cleanup;
|
||||
return () => {
|
||||
mounted = false;
|
||||
streamCleanupRef.current?.();
|
||||
streamCleanupRef.current = null;
|
||||
};
|
||||
}, [convertTreeNode]);
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false;
|
||||
let hasShownError = false;
|
||||
|
||||
const fetchData = async () => {
|
||||
try {
|
||||
const [missionsData, currentMissionData] = await Promise.all([
|
||||
listMissions().catch(() => []),
|
||||
getCurrentMission().catch(() => null),
|
||||
]);
|
||||
if (cancelled) return;
|
||||
|
||||
fetchedRef.current = true;
|
||||
setMissions(missionsData);
|
||||
setCurrentMission(currentMissionData);
|
||||
|
||||
if (!selectedMissionId && currentMissionData) {
|
||||
setSelectedMissionId(currentMissionData.id);
|
||||
}
|
||||
hasShownError = false;
|
||||
} catch (error) {
|
||||
if (!hasShownError) {
|
||||
toast.error('Failed to fetch missions');
|
||||
hasShownError = true;
|
||||
}
|
||||
console.error('Failed to fetch data:', error);
|
||||
} finally {
|
||||
if (!cancelled) {
|
||||
setLoading(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
fetchData();
|
||||
const interval = setInterval(fetchData, 5000);
|
||||
return () => {
|
||||
cancelled = true;
|
||||
clearInterval(interval);
|
||||
};
|
||||
}, [selectedMissionId]);
|
||||
|
||||
const filteredMissions = useMemo(() => {
|
||||
if (!searchQuery.trim()) return missions;
|
||||
const query = searchQuery.toLowerCase();
|
||||
return missions.filter((m) =>
|
||||
m.title?.toLowerCase().includes(query) ||
|
||||
m.id.toLowerCase().includes(query)
|
||||
);
|
||||
}, [missions, searchQuery]);
|
||||
|
||||
const controlStateToStatus = (state: ControlRunState, missionStatus?: string): AgentNode['status'] => {
|
||||
if (state === 'running' || state === 'waiting_for_tool') return 'running';
|
||||
if (missionStatus === 'completed') return 'completed';
|
||||
if (missionStatus === 'failed') return 'failed';
|
||||
if (missionStatus === 'interrupted') return 'pending'; // Show as pending (resumable)
|
||||
return 'pending';
|
||||
};
|
||||
|
||||
// Build a basic agent tree from mission data when no real tree is available
|
||||
const buildFallbackTree = useCallback((): AgentNode | null => {
|
||||
if (!selectedMission) return null;
|
||||
|
||||
const rootStatus = controlStateToStatus(controlState, selectedMission.status);
|
||||
|
||||
return {
|
||||
id: 'root',
|
||||
type: 'Root',
|
||||
status: rootStatus,
|
||||
name: 'Root Agent',
|
||||
description: selectedMission.title?.slice(0, 50) || 'Mission ' + selectedMission.id.slice(0, 8),
|
||||
model: 'claude-sonnet-4.5',
|
||||
budgetAllocated: 1000,
|
||||
budgetSpent: 50,
|
||||
children: [
|
||||
{
|
||||
id: 'complexity',
|
||||
type: 'ComplexityEstimator',
|
||||
status: 'completed',
|
||||
name: 'Complexity Estimator',
|
||||
description: 'Estimate task difficulty',
|
||||
model: 'claude-3.5-haiku',
|
||||
budgetAllocated: 10,
|
||||
budgetSpent: 5,
|
||||
complexity: 0.7,
|
||||
},
|
||||
{
|
||||
id: 'model-selector',
|
||||
type: 'ModelSelector',
|
||||
status: 'completed',
|
||||
name: 'Model Selector',
|
||||
description: 'Select optimal model for task',
|
||||
model: 'claude-3.5-haiku',
|
||||
budgetAllocated: 10,
|
||||
budgetSpent: 3,
|
||||
},
|
||||
{
|
||||
id: 'executor',
|
||||
type: 'TaskExecutor',
|
||||
status: rootStatus,
|
||||
name: 'Task Executor',
|
||||
description: 'Execute task using tools',
|
||||
model: 'claude-sonnet-4.5',
|
||||
budgetAllocated: 900,
|
||||
budgetSpent: 35,
|
||||
},
|
||||
{
|
||||
id: 'verifier',
|
||||
type: 'Verifier',
|
||||
status: selectedMission.status === 'completed' ? 'completed' :
|
||||
selectedMission.status === 'failed' ? 'failed' : 'pending',
|
||||
name: 'Verifier',
|
||||
description: 'Verify task completion',
|
||||
model: 'claude-3.5-haiku',
|
||||
budgetAllocated: 80,
|
||||
budgetSpent: selectedMission.status === 'completed' ? 7 : 0,
|
||||
},
|
||||
] as AgentNode[],
|
||||
};
|
||||
}, [selectedMission, controlState]);
|
||||
|
||||
// Load tree for a specific mission
|
||||
const loadMissionTree = useCallback(async (missionId: string) => {
|
||||
try {
|
||||
const tree = await getMissionTree(missionId);
|
||||
if (tree) {
|
||||
setRealTree(convertTreeNode(tree as unknown as Record<string, unknown>));
|
||||
} else {
|
||||
setRealTree(null);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load mission tree:', e);
|
||||
setRealTree(null);
|
||||
}
|
||||
}, [convertTreeNode]);
|
||||
|
||||
// Demo mode handlers
|
||||
const startDemo = useCallback((mode: DemoMode) => {
|
||||
// Cleanup previous demo
|
||||
demoCleanupRef.current?.();
|
||||
|
||||
if (mode === 'off') {
|
||||
setDemoMode('off');
|
||||
setDemoTree(null);
|
||||
setDemoRunning(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate demo tree
|
||||
let tree: AgentNode;
|
||||
switch (mode) {
|
||||
case 'simple':
|
||||
tree = generateSimpleTree();
|
||||
break;
|
||||
case 'complex':
|
||||
tree = generateComplexTree();
|
||||
break;
|
||||
case 'deep':
|
||||
tree = generateDeepTree(4);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
setDemoMode(mode);
|
||||
setDemoTree(tree);
|
||||
setDemoRunning(true);
|
||||
|
||||
// Start simulation
|
||||
const cleanup = simulateTreeUpdates(tree, setDemoTree);
|
||||
demoCleanupRef.current = cleanup;
|
||||
}, []);
|
||||
|
||||
const toggleDemoRunning = useCallback(() => {
|
||||
if (demoRunning) {
|
||||
demoCleanupRef.current?.();
|
||||
demoCleanupRef.current = null;
|
||||
setDemoRunning(false);
|
||||
} else if (demoTree) {
|
||||
const cleanup = simulateTreeUpdates(demoTree, setDemoTree);
|
||||
demoCleanupRef.current = cleanup;
|
||||
setDemoRunning(true);
|
||||
}
|
||||
}, [demoRunning, demoTree]);
|
||||
|
||||
// Load tree when selected mission changes (e.g., on initial page load)
|
||||
// Track which mission's tree we last loaded to avoid redundant fetches
|
||||
const lastLoadedMissionRef = useRef<string | null>(null);
|
||||
useEffect(() => {
|
||||
if (selectedMissionId && selectedMissionId !== lastLoadedMissionRef.current) {
|
||||
lastLoadedMissionRef.current = selectedMissionId;
|
||||
loadMissionTree(selectedMissionId);
|
||||
}
|
||||
}, [selectedMissionId, loadMissionTree]);
|
||||
|
||||
// Cleanup on unmount
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
demoCleanupRef.current?.();
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Use demo tree when in demo mode, otherwise use real tree or fallback
|
||||
const displayTree = useMemo(() => {
|
||||
if (demoMode !== 'off' && demoTree) {
|
||||
return demoTree;
|
||||
}
|
||||
return realTree ?? buildFallbackTree();
|
||||
}, [demoMode, demoTree, realTree, buildFallbackTree]);
|
||||
|
||||
const isActive = controlState !== 'idle';
|
||||
|
||||
return (
|
||||
<div className="flex h-screen">
|
||||
{/* Mission selector sidebar */}
|
||||
<div className="w-64 border-r border-white/[0.06] glass-panel p-4 flex flex-col">
|
||||
<h2 className="mb-3 text-sm font-medium text-white">Missions</h2>
|
||||
|
||||
<div className="relative mb-4">
|
||||
<Search className="absolute left-2.5 top-1/2 h-3.5 w-3.5 -translate-y-1/2 text-white/30" />
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Search missions..."
|
||||
value={searchQuery}
|
||||
onChange={(e) => setSearchQuery(e.target.value)}
|
||||
className="w-full rounded-lg border border-white/[0.06] bg-white/[0.02] py-2 pl-8 pr-3 text-xs text-white placeholder-white/30 focus:border-indigo-500/50 focus:outline-none transition-colors"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{isActive && currentMission && (
|
||||
<div className="mb-4 p-3 rounded-xl bg-indigo-500/10 border border-indigo-500/30">
|
||||
<div className="flex items-center gap-2">
|
||||
<Loader className="h-3 w-3 animate-spin text-indigo-400" />
|
||||
<span className="text-xs font-medium text-indigo-400">Active</span>
|
||||
</div>
|
||||
<p className="mt-1 text-xs text-white/60 truncate">
|
||||
{currentMission.title || 'Mission ' + currentMission.id.slice(0, 8)}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex-1 overflow-y-auto space-y-2">
|
||||
{loading ? (
|
||||
<>
|
||||
<ShimmerSidebarItem />
|
||||
<ShimmerSidebarItem />
|
||||
<ShimmerSidebarItem />
|
||||
</>
|
||||
) : filteredMissions.length === 0 && !currentMission ? (
|
||||
<p className="text-xs text-white/40 py-2">
|
||||
{searchQuery ? 'No missions found' : 'No missions yet'}
|
||||
</p>
|
||||
) : (
|
||||
<>
|
||||
{currentMission && (!searchQuery || currentMission.title?.toLowerCase().includes(searchQuery.toLowerCase())) && (
|
||||
<button
|
||||
key={currentMission.id}
|
||||
onClick={() => {
|
||||
setSelectedMissionId(currentMission.id);
|
||||
// Load tree for this mission (either live or saved)
|
||||
if (selectedMissionId !== currentMission.id) {
|
||||
loadMissionTree(currentMission.id);
|
||||
}
|
||||
if (demoMode !== 'off') startDemo('off');
|
||||
}}
|
||||
className={cn(
|
||||
'w-full rounded-xl p-3 text-left transition-all',
|
||||
selectedMissionId === currentMission.id && demoMode === 'off'
|
||||
? 'bg-white/[0.08] border border-indigo-500/50'
|
||||
: 'bg-white/[0.02] border border-white/[0.04] hover:bg-white/[0.04] hover:border-white/[0.08]'
|
||||
)}
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
{controlState !== 'idle' ? (
|
||||
<Loader className="h-3 w-3 animate-spin text-indigo-400" />
|
||||
) : currentMission.status === 'completed' ? (
|
||||
<CheckCircle className="h-3 w-3 text-emerald-400" />
|
||||
) : currentMission.status === 'failed' ? (
|
||||
<XCircle className="h-3 w-3 text-red-400" />
|
||||
) : (
|
||||
<Clock className="h-3 w-3 text-indigo-400" />
|
||||
)}
|
||||
<span className="truncate text-sm text-white/80">
|
||||
{currentMission.title?.slice(0, 25) || 'Current Mission'}
|
||||
</span>
|
||||
</div>
|
||||
</button>
|
||||
)}
|
||||
|
||||
{filteredMissions.filter(m => m.id !== currentMission?.id).map((mission) => (
|
||||
<button
|
||||
key={mission.id}
|
||||
onClick={() => {
|
||||
// Load tree for this mission (either live or saved from database)
|
||||
if (selectedMissionId !== mission.id) {
|
||||
loadMissionTree(mission.id);
|
||||
}
|
||||
setSelectedMissionId(mission.id);
|
||||
if (demoMode !== 'off') startDemo('off');
|
||||
}}
|
||||
className={cn(
|
||||
'w-full rounded-xl p-3 text-left transition-all',
|
||||
selectedMissionId === mission.id && demoMode === 'off'
|
||||
? 'bg-white/[0.08] border border-indigo-500/50'
|
||||
: 'bg-white/[0.02] border border-white/[0.04] hover:bg-white/[0.04] hover:border-white/[0.08]'
|
||||
)}
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
{mission.status === 'active' ? (
|
||||
<Clock className="h-3 w-3 text-indigo-400" />
|
||||
) : mission.status === 'completed' ? (
|
||||
<CheckCircle className="h-3 w-3 text-emerald-400" />
|
||||
) : (
|
||||
<XCircle className="h-3 w-3 text-red-400" />
|
||||
)}
|
||||
<span className="truncate text-sm text-white/80">
|
||||
{mission.title?.slice(0, 25) || 'Mission ' + mission.id.slice(0, 8)}
|
||||
</span>
|
||||
</div>
|
||||
</button>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Demo mode controls */}
|
||||
<div className="mt-4 pt-4 border-t border-white/[0.06]">
|
||||
<div className="flex items-center gap-2 mb-3">
|
||||
<FlaskConical className="h-4 w-4 text-amber-400" />
|
||||
<span className="text-xs font-medium text-white/60">Demo Mode</span>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<div className="flex gap-1.5">
|
||||
{(['simple', 'complex', 'deep'] as const).map((mode) => (
|
||||
<button
|
||||
key={mode}
|
||||
onClick={() => startDemo(mode)}
|
||||
className={cn(
|
||||
'flex-1 px-2 py-1.5 rounded-lg text-xs font-medium transition-all capitalize',
|
||||
demoMode === mode
|
||||
? 'bg-amber-500/20 text-amber-400 border border-amber-500/30'
|
||||
: 'bg-white/[0.02] text-white/50 border border-white/[0.04] hover:bg-white/[0.04]'
|
||||
)}
|
||||
>
|
||||
{mode}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{demoMode !== 'off' && (
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={toggleDemoRunning}
|
||||
className={cn(
|
||||
'flex-1 flex items-center justify-center gap-1.5 px-3 py-1.5 rounded-lg text-xs font-medium transition-all',
|
||||
demoRunning
|
||||
? 'bg-amber-500/20 text-amber-400 border border-amber-500/30'
|
||||
: 'bg-white/[0.02] text-white/50 border border-white/[0.04]'
|
||||
)}
|
||||
>
|
||||
{demoRunning ? (
|
||||
<>
|
||||
<Pause className="h-3 w-3" />
|
||||
Pause
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Play className="h-3 w-3" />
|
||||
Resume
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => startDemo('off')}
|
||||
className="px-3 py-1.5 rounded-lg text-xs font-medium bg-white/[0.02] text-white/50 border border-white/[0.04] hover:bg-white/[0.04] transition-all"
|
||||
>
|
||||
Stop
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Agent tree visualization */}
|
||||
<div className="flex-1 flex flex-col overflow-hidden">
|
||||
{/* Header */}
|
||||
<div className="shrink-0 p-6 pb-0">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="flex h-10 w-10 items-center justify-center rounded-xl bg-indigo-500/10">
|
||||
<Layers className="h-5 w-5 text-indigo-400" />
|
||||
</div>
|
||||
<div>
|
||||
<div className="flex items-center gap-2">
|
||||
<h1 className="text-xl font-semibold text-white">Agent Tree</h1>
|
||||
{demoMode !== 'off' && (
|
||||
<span className="px-2 py-0.5 rounded-full text-xs font-medium bg-amber-500/20 text-amber-400 border border-amber-500/30">
|
||||
Demo: {demoMode}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<p className="text-sm text-white/50">
|
||||
{demoMode !== 'off'
|
||||
? 'Simulated agent tree with live updates'
|
||||
: 'Hierarchical agent execution visualization'
|
||||
}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Tree canvas */}
|
||||
<div className="flex-1 p-6 min-h-0">
|
||||
{!displayTree && (missions.length === 0 && !currentMission) ? (
|
||||
<div className="flex flex-col items-center justify-center h-full">
|
||||
<div className="flex h-16 w-16 items-center justify-center rounded-2xl bg-white/[0.02] mb-4">
|
||||
<MessageSquare className="h-8 w-8 text-white/30" />
|
||||
</div>
|
||||
<p className="text-white/80">No active missions</p>
|
||||
<p className="mt-2 text-sm text-white/40 text-center max-w-sm">
|
||||
Start a conversation in the{' '}
|
||||
<Link href="/control" className="text-indigo-400 hover:text-indigo-300">
|
||||
Control
|
||||
</Link>{' '}
|
||||
page or try <span className="text-amber-400">Demo Mode</span> in the sidebar
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<AgentTreeCanvas
|
||||
tree={displayTree}
|
||||
selectedNodeId={selectedNodeId}
|
||||
onSelectNode={(node) => setSelectedNodeId(node?.id ?? null)}
|
||||
className="w-full h-full rounded-2xl border border-white/[0.06]"
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -14,3 +14,6 @@ export function ConsoleWrapper() {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ import {
|
||||
getProgress,
|
||||
getRunningMissions,
|
||||
cancelMission,
|
||||
listModels,
|
||||
getModelDisplayName,
|
||||
type ControlRunState,
|
||||
type Mission,
|
||||
type MissionStatus,
|
||||
@@ -403,6 +405,9 @@ export default function ControlClient() {
|
||||
>([]);
|
||||
const [uploadQueue, setUploadQueue] = useState<string[]>([]);
|
||||
|
||||
// Model selection state
|
||||
const [availableModels, setAvailableModels] = useState<string[]>([]);
|
||||
|
||||
// Check if the mission we're viewing is actually running (not just any mission)
|
||||
const viewingMissionIsRunning = useMemo(() => {
|
||||
if (!viewingMissionId) return runState !== "idle";
|
||||
@@ -489,6 +494,31 @@ export default function ControlClient() {
|
||||
return () => document.removeEventListener("mousedown", handleClickOutside);
|
||||
}, []);
|
||||
|
||||
// Handle file upload - wrapped in useCallback to avoid stale closures
|
||||
const handleFileUpload = useCallback(async (file: File) => {
|
||||
setUploadQueue((prev) => [...prev, file.name]);
|
||||
|
||||
try {
|
||||
// Upload to mission-specific context folder if we have a mission
|
||||
const contextPath = currentMission?.id
|
||||
? `/root/context/${currentMission.id}/`
|
||||
: "/root/context/";
|
||||
const result = await uploadFile(file, contextPath);
|
||||
toast.success(`Uploaded ${result.name}`);
|
||||
|
||||
// Add a message about the upload
|
||||
setInput((prev) => {
|
||||
const uploadNote = `[Uploaded: ${result.name}]`;
|
||||
return prev ? `${prev}\n${uploadNote}` : uploadNote;
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Upload failed:", error);
|
||||
toast.error(`Failed to upload ${file.name}`);
|
||||
} finally {
|
||||
setUploadQueue((prev) => prev.filter((name) => name !== file.name));
|
||||
}
|
||||
}, [currentMission?.id]);
|
||||
|
||||
// Handle paste to upload files
|
||||
useEffect(() => {
|
||||
const textarea = textareaRef.current;
|
||||
@@ -519,28 +549,7 @@ export default function ControlClient() {
|
||||
|
||||
textarea.addEventListener("paste", handlePaste);
|
||||
return () => textarea.removeEventListener("paste", handlePaste);
|
||||
}, []);
|
||||
|
||||
// Handle file upload
|
||||
const handleFileUpload = async (file: File) => {
|
||||
setUploadQueue((prev) => [...prev, file.name]);
|
||||
|
||||
try {
|
||||
const result = await uploadFile(file, "/root/context/");
|
||||
toast.success(`Uploaded ${result.name} to /root/context/`);
|
||||
|
||||
// Add a message about the upload
|
||||
setInput((prev) => {
|
||||
const uploadNote = `[Uploaded: ${result.name}]`;
|
||||
return prev ? `${prev}\n${uploadNote}` : uploadNote;
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Upload failed:", error);
|
||||
toast.error(`Failed to upload ${file.name}`);
|
||||
} finally {
|
||||
setUploadQueue((prev) => prev.filter((name) => name !== file.name));
|
||||
}
|
||||
};
|
||||
}, [handleFileUpload]);
|
||||
|
||||
// Handle file input change
|
||||
const handleFileChange = async (
|
||||
@@ -629,6 +638,17 @@ export default function ControlClient() {
|
||||
return () => clearInterval(interval);
|
||||
}, []);
|
||||
|
||||
// Fetch available models for mission creation
|
||||
useEffect(() => {
|
||||
listModels()
|
||||
.then((data) => {
|
||||
setAvailableModels(data.models);
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error("Failed to fetch models:", err);
|
||||
});
|
||||
}, []);
|
||||
|
||||
// Handle cancelling a parallel mission
|
||||
const handleCancelMission = async (missionId: string) => {
|
||||
try {
|
||||
@@ -643,21 +663,37 @@ export default function ControlClient() {
|
||||
}
|
||||
};
|
||||
|
||||
// Track the mission ID being fetched to prevent race conditions
|
||||
const fetchingMissionIdRef = useRef<string | null>(null);
|
||||
|
||||
// Handle switching which mission we're viewing
|
||||
const handleViewMission = useCallback(
|
||||
async (missionId: string) => {
|
||||
setViewingMissionId(missionId);
|
||||
fetchingMissionIdRef.current = missionId;
|
||||
|
||||
// Always load fresh history from API when switching missions
|
||||
// This ensures we don't show stale cached events
|
||||
try {
|
||||
const mission = await getMission(missionId);
|
||||
|
||||
// Race condition guard: only update if this is still the mission we want
|
||||
if (fetchingMissionIdRef.current !== missionId) {
|
||||
return; // Another mission was requested, discard this response
|
||||
}
|
||||
|
||||
const historyItems = missionHistoryToItems(mission);
|
||||
setItems(historyItems);
|
||||
// Update cache with fresh data
|
||||
setMissionItems((prev) => ({ ...prev, [missionId]: historyItems }));
|
||||
} catch (err) {
|
||||
console.error("Failed to load mission:", err);
|
||||
|
||||
// Race condition guard: only update if this is still the mission we want
|
||||
if (fetchingMissionIdRef.current !== missionId) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback to cached items if API fails
|
||||
if (missionItems[missionId]) {
|
||||
setItems(missionItems[missionId]);
|
||||
@@ -1141,27 +1177,41 @@ export default function ControlClient() {
|
||||
<span className="hidden sm:inline">New</span> Mission
|
||||
</button>
|
||||
{showNewMissionDialog && (
|
||||
<div className="absolute right-0 top-full mt-1 w-72 rounded-lg border border-white/[0.06] bg-[#1a1a1a] p-4 shadow-xl z-10">
|
||||
<div className="absolute right-0 top-full mt-1 w-80 rounded-lg border border-white/[0.06] bg-[#1a1a1a] p-4 shadow-xl z-10">
|
||||
<h3 className="text-sm font-medium text-white mb-3">
|
||||
Create New Mission
|
||||
</h3>
|
||||
<div className="space-y-3">
|
||||
<div>
|
||||
<label className="block text-xs text-white/50 mb-1">
|
||||
Model Override (optional)
|
||||
<label className="block text-xs text-white/50 mb-1.5">
|
||||
Model
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
<select
|
||||
value={newMissionModel}
|
||||
onChange={(e) => setNewMissionModel(e.target.value)}
|
||||
placeholder="e.g., deepseek/deepseek-v3.2"
|
||||
className="w-full rounded-lg border border-white/[0.06] bg-white/[0.02] px-3 py-2 text-sm text-white placeholder-white/30 focus:border-indigo-500/50 focus:outline-none"
|
||||
/>
|
||||
<p className="text-xs text-white/30 mt-1">
|
||||
Leave empty to use default model
|
||||
className="w-full rounded-lg border border-white/[0.06] bg-white/[0.02] px-3 py-2.5 text-sm text-white focus:border-indigo-500/50 focus:outline-none appearance-none cursor-pointer"
|
||||
style={{
|
||||
backgroundImage: `url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e")`,
|
||||
backgroundPosition: "right 0.5rem center",
|
||||
backgroundRepeat: "no-repeat",
|
||||
backgroundSize: "1.5em 1.5em",
|
||||
paddingRight: "2.5rem",
|
||||
}}
|
||||
>
|
||||
<option value="" className="bg-[#1a1a1a]">
|
||||
Auto (default)
|
||||
</option>
|
||||
{availableModels.map((model) => (
|
||||
<option key={model} value={model} className="bg-[#1a1a1a]">
|
||||
{getModelDisplayName(model)}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
<p className="text-xs text-white/30 mt-1.5">
|
||||
Auto uses the configured default model
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<div className="flex gap-2 pt-1">
|
||||
<button
|
||||
onClick={() => {
|
||||
setShowNewMissionDialog(false);
|
||||
@@ -1410,7 +1460,7 @@ export default function ControlClient() {
|
||||
it's busy
|
||||
</p>
|
||||
<p className="mt-1 text-xs text-white/30">
|
||||
Tip: Paste files directly to upload to /root/context/
|
||||
Tip: Paste files directly to upload to context folder
|
||||
</p>
|
||||
</>
|
||||
)}
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useState, useRef, useMemo } from "react";
|
||||
import { useEffect, useState, useRef, useMemo, useCallback } from "react";
|
||||
import Link from "next/link";
|
||||
import { toast } from "sonner";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { listTasks, listRuns, listMissions, TaskState, Run, Mission } from "@/lib/api";
|
||||
import { listMissions, getMissionTree, Mission } from "@/lib/api";
|
||||
import { ShimmerTableRow } from "@/components/ui/shimmer";
|
||||
import { CopyButton } from "@/components/ui/copy-button";
|
||||
import { RelativeTime } from "@/components/ui/relative-time";
|
||||
import { AgentTreeCanvas, type AgentNode } from "@/components/agent-tree";
|
||||
import {
|
||||
CheckCircle,
|
||||
XCircle,
|
||||
@@ -21,9 +22,11 @@ import {
|
||||
ArrowUpDown,
|
||||
ArrowUp,
|
||||
ArrowDown,
|
||||
Network,
|
||||
X,
|
||||
} from "lucide-react";
|
||||
|
||||
const statusIcons = {
|
||||
const statusIcons: Record<string, typeof Clock> = {
|
||||
pending: Clock,
|
||||
running: Loader,
|
||||
completed: CheckCircle,
|
||||
@@ -35,7 +38,7 @@ const statusIcons = {
|
||||
not_feasible: XCircle,
|
||||
};
|
||||
|
||||
const statusConfig = {
|
||||
const statusConfig: Record<string, { color: string; bg: string }> = {
|
||||
pending: { color: "text-amber-400", bg: "bg-amber-500/10" },
|
||||
running: { color: "text-indigo-400", bg: "bg-indigo-500/10" },
|
||||
completed: { color: "text-emerald-400", bg: "bg-emerald-500/10" },
|
||||
@@ -47,6 +50,10 @@ const statusConfig = {
|
||||
not_feasible: { color: "text-rose-400", bg: "bg-rose-500/10" },
|
||||
};
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null;
|
||||
}
|
||||
|
||||
type SortField = 'date' | 'status' | 'messages';
|
||||
type SortDirection = 'asc' | 'desc';
|
||||
|
||||
@@ -80,30 +87,47 @@ function SortButton({
|
||||
);
|
||||
}
|
||||
|
||||
// Convert backend tree node to frontend AgentNode
|
||||
function convertTreeNode(node: Record<string, unknown>): AgentNode {
|
||||
const children = (node["children"] as Record<string, unknown>[] | undefined) ?? [];
|
||||
return {
|
||||
id: String(node["id"] ?? ""),
|
||||
type: String(node["node_type"] ?? "Node") as AgentNode["type"],
|
||||
status: String(node["status"] ?? "pending") as AgentNode["status"],
|
||||
name: String(node["name"] ?? ""),
|
||||
description: String(node["description"] ?? ""),
|
||||
model: node["selected_model"] != null ? String(node["selected_model"]) : undefined,
|
||||
budgetAllocated: Number(node["budget_allocated"] ?? 0),
|
||||
budgetSpent: Number(node["budget_spent"] ?? 0),
|
||||
complexity: node["complexity"] != null ? Number(node["complexity"]) : undefined,
|
||||
children: children.map((c) => convertTreeNode(c)),
|
||||
};
|
||||
}
|
||||
|
||||
export default function HistoryPage() {
|
||||
const [tasks, setTasks] = useState<TaskState[]>([]);
|
||||
const [runs, setRuns] = useState<Run[]>([]);
|
||||
const [missions, setMissions] = useState<Mission[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [filter, setFilter] = useState<string>("all");
|
||||
const [search, setSearch] = useState("");
|
||||
const [sortField, setSortField] = useState<SortField>('date');
|
||||
const [sortDirection, setSortDirection] = useState<SortDirection>('desc');
|
||||
const [sortField, setSortField] = useState<SortField>("date");
|
||||
const [sortDirection, setSortDirection] = useState<SortDirection>("desc");
|
||||
const fetchedRef = useRef(false);
|
||||
|
||||
// Tree preview state
|
||||
const [previewMissionId, setPreviewMissionId] = useState<string | null>(null);
|
||||
const [previewTree, setPreviewTree] = useState<AgentNode | null>(null);
|
||||
const [loadingTree, setLoadingTree] = useState(false);
|
||||
|
||||
// Track the mission ID being fetched to prevent race conditions
|
||||
const fetchingTreeMissionIdRef = useRef<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (fetchedRef.current) return;
|
||||
fetchedRef.current = true;
|
||||
|
||||
const fetchData = async () => {
|
||||
try {
|
||||
const [tasksData, runsData, missionsData] = await Promise.all([
|
||||
listTasks().catch(() => []),
|
||||
listRuns().catch(() => ({ runs: [] })),
|
||||
listMissions().catch(() => []),
|
||||
]);
|
||||
setTasks(tasksData);
|
||||
setRuns(runsData.runs || []);
|
||||
const missionsData = await listMissions().catch(() => []);
|
||||
setMissions(missionsData);
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch data:", error);
|
||||
@@ -116,31 +140,61 @@ export default function HistoryPage() {
|
||||
fetchData();
|
||||
}, []);
|
||||
|
||||
// Load tree for preview
|
||||
const handlePreviewTree = useCallback(async (missionId: string) => {
|
||||
if (previewMissionId === missionId) {
|
||||
// Toggle off
|
||||
setPreviewMissionId(null);
|
||||
setPreviewTree(null);
|
||||
fetchingTreeMissionIdRef.current = null;
|
||||
return;
|
||||
}
|
||||
|
||||
setPreviewMissionId(missionId);
|
||||
setLoadingTree(true);
|
||||
fetchingTreeMissionIdRef.current = missionId;
|
||||
|
||||
try {
|
||||
const tree = await getMissionTree(missionId);
|
||||
|
||||
// Race condition guard: only update if this is still the mission we want
|
||||
if (fetchingTreeMissionIdRef.current !== missionId) {
|
||||
return; // Another mission was requested, discard this response
|
||||
}
|
||||
|
||||
if (tree && isRecord(tree)) {
|
||||
setPreviewTree(convertTreeNode(tree as Record<string, unknown>));
|
||||
} else {
|
||||
setPreviewTree(null);
|
||||
toast.error("No tree data available for this mission");
|
||||
}
|
||||
} catch {
|
||||
// Race condition guard: only update if this is still the mission we want
|
||||
if (fetchingTreeMissionIdRef.current !== missionId) {
|
||||
return;
|
||||
}
|
||||
|
||||
setPreviewTree(null);
|
||||
toast.error("Failed to load tree");
|
||||
} finally {
|
||||
// Only clear loading if this is still the current fetch
|
||||
if (fetchingTreeMissionIdRef.current === missionId) {
|
||||
setLoadingTree(false);
|
||||
}
|
||||
}
|
||||
}, [previewMissionId]);
|
||||
|
||||
const handleSort = (field: SortField) => {
|
||||
if (sortField === field) {
|
||||
setSortDirection(sortDirection === 'asc' ? 'desc' : 'asc');
|
||||
setSortDirection(sortDirection === "asc" ? "desc" : "asc");
|
||||
} else {
|
||||
setSortField(field);
|
||||
setSortDirection('desc');
|
||||
setSortDirection("desc");
|
||||
}
|
||||
};
|
||||
|
||||
const filteredTasks = tasks.filter((task) => {
|
||||
if (filter !== "all" && task.status !== filter) return false;
|
||||
if (search && !task.task.toLowerCase().includes(search.toLowerCase()))
|
||||
return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
const filteredRuns = runs.filter((run) => {
|
||||
if (filter !== "all" && run.status !== filter) return false;
|
||||
if (search && !run.input_text.toLowerCase().includes(search.toLowerCase()))
|
||||
return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
const filteredMissions = useMemo(() => {
|
||||
let filtered = missions.filter((mission) => {
|
||||
const filtered = missions.filter((mission) => {
|
||||
if (filter !== "all" && mission.status !== filter) return false;
|
||||
const title = mission.title || "";
|
||||
if (search && !title.toLowerCase().includes(search.toLowerCase()))
|
||||
@@ -152,29 +206,30 @@ export default function HistoryPage() {
|
||||
return filtered.sort((a, b) => {
|
||||
let comparison = 0;
|
||||
switch (sortField) {
|
||||
case 'date':
|
||||
comparison = new Date(b.updated_at).getTime() - new Date(a.updated_at).getTime();
|
||||
case "date":
|
||||
comparison =
|
||||
new Date(b.updated_at).getTime() - new Date(a.updated_at).getTime();
|
||||
break;
|
||||
case 'status':
|
||||
case "status":
|
||||
comparison = a.status.localeCompare(b.status);
|
||||
break;
|
||||
case 'messages':
|
||||
case "messages":
|
||||
comparison = b.history.length - a.history.length;
|
||||
break;
|
||||
}
|
||||
return sortDirection === 'asc' ? -comparison : comparison;
|
||||
return sortDirection === "asc" ? -comparison : comparison;
|
||||
});
|
||||
}, [missions, filter, search, sortField, sortDirection]);
|
||||
|
||||
const hasData = filteredTasks.length > 0 || filteredRuns.length > 0 || filteredMissions.length > 0;
|
||||
const hasData = filteredMissions.length > 0;
|
||||
|
||||
return (
|
||||
<div className="p-6">
|
||||
{/* Header */}
|
||||
<div className="mb-6">
|
||||
<h1 className="text-xl font-semibold text-white">History</h1>
|
||||
<h1 className="text-xl font-semibold text-white">Agents</h1>
|
||||
<p className="mt-1 text-sm text-white/50">
|
||||
View all past and current tasks
|
||||
Mission history and agent tree visualization
|
||||
</p>
|
||||
</div>
|
||||
|
||||
@@ -184,7 +239,7 @@ export default function HistoryPage() {
|
||||
<Search className="absolute left-3 top-1/2 h-4 w-4 -translate-y-1/2 text-white/30" />
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Search tasks..."
|
||||
placeholder="Search missions..."
|
||||
value={search}
|
||||
onChange={(e) => setSearch(e.target.value)}
|
||||
className="w-full rounded-lg border border-white/[0.06] bg-white/[0.02] py-2.5 pl-10 pr-4 text-sm text-white placeholder-white/30 focus:border-indigo-500/50 focus:outline-none transition-colors"
|
||||
@@ -210,128 +265,63 @@ export default function HistoryPage() {
|
||||
</div>
|
||||
|
||||
{/* Content */}
|
||||
{loading ? (
|
||||
<div className="space-y-6">
|
||||
{/* Shimmer for missions table */}
|
||||
<div>
|
||||
<div className="h-4 w-24 bg-white/[0.04] rounded mb-3 animate-pulse" />
|
||||
<div className="rounded-xl bg-white/[0.02] border border-white/[0.04] overflow-hidden">
|
||||
<table className="w-full">
|
||||
<thead>
|
||||
<tr className="border-b border-white/[0.04]">
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">Status</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">Mission</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">Messages</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">Updated</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-white/[0.04]">
|
||||
<ShimmerTableRow columns={5} />
|
||||
<ShimmerTableRow columns={5} />
|
||||
<ShimmerTableRow columns={5} />
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
) : !hasData ? (
|
||||
<div className="flex flex-col items-center py-16 text-center">
|
||||
<div className="flex h-16 w-16 items-center justify-center rounded-2xl bg-white/[0.02] mb-4">
|
||||
<MessageSquare className="h-8 w-8 text-white/30" />
|
||||
</div>
|
||||
<p className="text-white/80">No history yet</p>
|
||||
<p className="mt-2 text-sm text-white/40">
|
||||
Start a conversation in the{" "}
|
||||
<Link
|
||||
href="/control"
|
||||
className="text-indigo-400 hover:text-indigo-300"
|
||||
>
|
||||
Control
|
||||
</Link>{" "}
|
||||
page
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-6">
|
||||
{/* Archived Runs - shown first for visibility */}
|
||||
{filteredRuns.length > 0 && (
|
||||
<div>
|
||||
<h2 className="mb-3 text-xs font-medium uppercase tracking-wider text-white/40">
|
||||
Recent Runs ({filteredRuns.length})
|
||||
</h2>
|
||||
<div className="rounded-xl bg-white/[0.02] border border-white/[0.04] overflow-hidden">
|
||||
<table className="w-full">
|
||||
<thead>
|
||||
<tr className="border-b border-white/[0.04]">
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Status
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Input
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Created
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Cost
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-white/[0.04]">
|
||||
{filteredRuns.map((run) => {
|
||||
const status = run.status as keyof typeof statusIcons;
|
||||
const Icon = statusIcons[status] || Clock;
|
||||
const config =
|
||||
statusConfig[status] || statusConfig.pending;
|
||||
return (
|
||||
<tr
|
||||
key={run.id}
|
||||
className="group hover:bg-white/[0.02] transition-colors"
|
||||
>
|
||||
<td className="px-4 py-3">
|
||||
<span
|
||||
className={cn(
|
||||
"inline-flex items-center gap-1.5 rounded-md px-2 py-1 text-[10px] font-medium",
|
||||
config.bg,
|
||||
config.color
|
||||
)}
|
||||
>
|
||||
<Icon className="h-3 w-3" />
|
||||
{run.status}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<p className="max-w-md truncate text-sm text-white/80">
|
||||
{run.input_text}
|
||||
</p>
|
||||
<CopyButton text={run.input_text} showOnHover label="Copied input" />
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
<RelativeTime
|
||||
date={run.created_at}
|
||||
className="text-xs text-white/40"
|
||||
/>
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
<span className="text-sm text-emerald-400 tabular-nums">
|
||||
${(run.total_cost_cents / 100).toFixed(2)}
|
||||
</span>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
<div>
|
||||
{loading ? (
|
||||
<div className="space-y-6">
|
||||
{/* Shimmer for missions table */}
|
||||
<div>
|
||||
<div className="h-4 w-24 bg-white/[0.04] rounded mb-3 animate-pulse" />
|
||||
<div className="rounded-xl bg-white/[0.02] border border-white/[0.04] overflow-hidden">
|
||||
<table className="w-full">
|
||||
<thead>
|
||||
<tr className="border-b border-white/[0.04]">
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Status
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Mission
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Messages
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Updated
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Actions
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-white/[0.04]">
|
||||
<ShimmerTableRow columns={5} />
|
||||
<ShimmerTableRow columns={5} />
|
||||
<ShimmerTableRow columns={5} />
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Missions */}
|
||||
{filteredMissions.length > 0 && (
|
||||
<div>
|
||||
) : !hasData ? (
|
||||
<div className="flex flex-col items-center py-16 text-center">
|
||||
<div className="flex h-16 w-16 items-center justify-center rounded-2xl bg-white/[0.02] mb-4">
|
||||
<MessageSquare className="h-8 w-8 text-white/30" />
|
||||
</div>
|
||||
<p className="text-white/80">No history yet</p>
|
||||
<p className="mt-2 text-sm text-white/40">
|
||||
Start a conversation in the{" "}
|
||||
<Link
|
||||
href="/control"
|
||||
className="text-indigo-400 hover:text-indigo-300"
|
||||
>
|
||||
Control
|
||||
</Link>{" "}
|
||||
page
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-6">
|
||||
{/* Missions */}
|
||||
<div>
|
||||
<h2 className="mb-3 text-xs font-medium uppercase tracking-wider text-white/40">
|
||||
Missions ({filteredMissions.length})
|
||||
</h2>
|
||||
@@ -342,7 +332,7 @@ export default function HistoryPage() {
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
<span className="flex items-center">
|
||||
Status
|
||||
<SortButton field="status" currentField={sortField} direction={sortDirection} onClick={() => handleSort('status')} />
|
||||
<SortButton field="status" currentField={sortField} direction={sortDirection} onClick={() => handleSort("status")} />
|
||||
</span>
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
@@ -351,13 +341,13 @@ export default function HistoryPage() {
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
<span className="flex items-center">
|
||||
Messages
|
||||
<SortButton field="messages" currentField={sortField} direction={sortDirection} onClick={() => handleSort('messages')} />
|
||||
<SortButton field="messages" currentField={sortField} direction={sortDirection} onClick={() => handleSort("messages")} />
|
||||
</span>
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
<span className="flex items-center">
|
||||
Updated
|
||||
<SortButton field="date" currentField={sortField} direction={sortDirection} onClick={() => handleSort('date')} />
|
||||
<SortButton field="date" currentField={sortField} direction={sortDirection} onClick={() => handleSort("date")} />
|
||||
</span>
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
@@ -417,9 +407,21 @@ export default function HistoryPage() {
|
||||
{mission.status === "active" ? "Continue" : "View"}{" "}
|
||||
<ArrowRight className="h-3 w-3" />
|
||||
</Link>
|
||||
<CopyButton
|
||||
text={mission.id}
|
||||
showOnHover
|
||||
<button
|
||||
onClick={() => handlePreviewTree(mission.id)}
|
||||
className={cn(
|
||||
"inline-flex items-center gap-1 text-xs transition-colors",
|
||||
previewMissionId === mission.id
|
||||
? "text-emerald-400 hover:text-emerald-300"
|
||||
: "text-white/40 hover:text-white/60"
|
||||
)}
|
||||
title="View agent tree"
|
||||
>
|
||||
<Network className="h-3 w-3" />
|
||||
</button>
|
||||
<CopyButton
|
||||
text={mission.id}
|
||||
showOnHover
|
||||
label="Copied mission ID"
|
||||
className="opacity-0 group-hover:opacity-100"
|
||||
/>
|
||||
@@ -432,104 +434,72 @@ export default function HistoryPage() {
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Active Tasks */}
|
||||
{filteredTasks.length > 0 && (
|
||||
<div>
|
||||
<h2 className="mb-3 text-xs font-medium uppercase tracking-wider text-white/40">
|
||||
Active Tasks ({filteredTasks.length})
|
||||
</h2>
|
||||
<div className="rounded-xl bg-white/[0.02] border border-white/[0.04] overflow-hidden">
|
||||
<table className="w-full">
|
||||
<thead>
|
||||
<tr className="border-b border-white/[0.04]">
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Status
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Task
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Model
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Iterations
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-[10px] font-medium uppercase tracking-wider text-white/40">
|
||||
Actions
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-white/[0.04]">
|
||||
{filteredTasks.map((task) => {
|
||||
const Icon = statusIcons[task.status];
|
||||
const config = statusConfig[task.status];
|
||||
return (
|
||||
<tr
|
||||
key={task.id}
|
||||
className="group hover:bg-white/[0.02] transition-colors"
|
||||
>
|
||||
<td className="px-4 py-3">
|
||||
<span
|
||||
className={cn(
|
||||
"inline-flex items-center gap-1.5 rounded-md px-2 py-1 text-[10px] font-medium",
|
||||
config.bg,
|
||||
config.color
|
||||
)}
|
||||
>
|
||||
<Icon
|
||||
className={cn(
|
||||
"h-3 w-3",
|
||||
task.status === "running" && "animate-spin"
|
||||
)}
|
||||
/>
|
||||
{task.status}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<p className="max-w-md truncate text-sm text-white/80">
|
||||
{task.task}
|
||||
</p>
|
||||
<CopyButton text={task.task} showOnHover label="Copied task" />
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
<span className="text-xs text-white/40 font-mono">
|
||||
{task.model.split("/").pop()}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
<span className="text-sm text-white tabular-nums">
|
||||
{task.iterations}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<Link
|
||||
href={`/control?task=${task.id}`}
|
||||
className="inline-flex items-center gap-1 text-xs text-indigo-400 hover:text-indigo-300 transition-colors"
|
||||
>
|
||||
View <ArrowRight className="h-3 w-3" />
|
||||
</Link>
|
||||
<CopyButton
|
||||
text={task.id}
|
||||
showOnHover
|
||||
label="Copied task ID"
|
||||
className="opacity-0 group-hover:opacity-100"
|
||||
/>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Agent Tree Modal */}
|
||||
{previewMissionId && (
|
||||
<div
|
||||
className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
|
||||
onClick={() => {
|
||||
setPreviewMissionId(null);
|
||||
setPreviewTree(null);
|
||||
fetchingTreeMissionIdRef.current = null;
|
||||
}}
|
||||
>
|
||||
<div
|
||||
className="relative w-[90vw] h-[85vh] max-w-6xl rounded-2xl bg-[#0a0a0a] border border-white/[0.08] shadow-2xl overflow-hidden flex flex-col"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
{/* Modal Header */}
|
||||
<div className="flex items-center justify-between px-6 py-4 border-b border-white/[0.06]">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="flex h-10 w-10 items-center justify-center rounded-xl bg-emerald-500/10">
|
||||
<Network className="h-5 w-5 text-emerald-400" />
|
||||
</div>
|
||||
<div>
|
||||
<h2 className="text-lg font-semibold text-white">Agent Tree</h2>
|
||||
<p className="text-xs text-white/40">
|
||||
{missions.find((m) => m.id === previewMissionId)?.title?.slice(0, 50) || "Mission visualization"}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => {
|
||||
setPreviewMissionId(null);
|
||||
setPreviewTree(null);
|
||||
fetchingTreeMissionIdRef.current = null;
|
||||
}}
|
||||
className="flex h-10 w-10 items-center justify-center rounded-xl text-white/40 hover:bg-white/[0.04] hover:text-white/70 transition-colors"
|
||||
>
|
||||
<X className="h-5 w-5" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Modal Content */}
|
||||
<div className="flex-1 min-h-0">
|
||||
{loadingTree ? (
|
||||
<div className="flex flex-col items-center justify-center h-full">
|
||||
<Loader className="h-8 w-8 animate-spin text-emerald-400 mb-3" />
|
||||
<p className="text-sm text-white/60">Loading agent tree...</p>
|
||||
</div>
|
||||
) : previewTree ? (
|
||||
<AgentTreeCanvas tree={previewTree} className="w-full h-full" />
|
||||
) : (
|
||||
<div className="flex flex-col items-center justify-center h-full text-center p-8">
|
||||
<div className="flex h-20 w-20 items-center justify-center rounded-2xl bg-white/[0.02] mb-4">
|
||||
<Network className="h-10 w-10 text-white/20" />
|
||||
</div>
|
||||
<p className="text-lg font-medium text-white/60">No tree data available</p>
|
||||
<p className="text-sm text-white/30 mt-2 max-w-md">
|
||||
Agent tree data is captured during mission execution.
|
||||
This mission may have been completed before tree tracking was enabled.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -94,6 +94,8 @@ interface AgentTreeCanvasProps {
|
||||
onSelectNode?: (node: AgentNode | null) => void;
|
||||
selectedNodeId?: string | null;
|
||||
className?: string;
|
||||
/** Compact mode for embedded panels - hides minimap and details panel */
|
||||
compact?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -527,6 +529,7 @@ export function AgentTreeCanvas({
|
||||
onSelectNode,
|
||||
selectedNodeId,
|
||||
className,
|
||||
compact = false,
|
||||
}: AgentTreeCanvasProps) {
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const [dimensions, setDimensions] = useState({ width: 800, height: 600 });
|
||||
@@ -566,8 +569,8 @@ export function AgentTreeCanvas({
|
||||
useEffect(() => {
|
||||
if (layout.width > 0 && layout.height > 0 && dimensions.width > 0 && dimensions.height > 0) {
|
||||
// Calculate zoom to fit the tree in view with some padding
|
||||
const paddingX = 80;
|
||||
const paddingY = 80;
|
||||
const paddingX = compact ? 40 : 80;
|
||||
const paddingY = compact ? 40 : 80;
|
||||
const availableWidth = dimensions.width - paddingX;
|
||||
const availableHeight = dimensions.height - paddingY;
|
||||
|
||||
@@ -575,8 +578,8 @@ export function AgentTreeCanvas({
|
||||
const scaleY = availableHeight / layout.height;
|
||||
|
||||
// Use the smaller scale to fit both dimensions
|
||||
// Cap between 0.4 (minimum readable) and 1 (don't zoom in past 100%)
|
||||
const MIN_ZOOM = 0.4;
|
||||
// Cap between 0.3/0.4 (minimum readable) and 1 (don't zoom in past 100%)
|
||||
const MIN_ZOOM = compact ? 0.3 : 0.4;
|
||||
const fitZoom = Math.max(MIN_ZOOM, Math.min(1, Math.min(scaleX, scaleY)));
|
||||
|
||||
// Calculate pan to center horizontally, start from top with padding
|
||||
@@ -587,12 +590,12 @@ export function AgentTreeCanvas({
|
||||
const scaledHeight = layout.height * fitZoom;
|
||||
const centerY = scaledHeight < availableHeight
|
||||
? (dimensions.height - scaledHeight) / 2
|
||||
: 30; // Start near top if tree is too tall
|
||||
: compact ? 20 : 30; // Start near top if tree is too tall
|
||||
|
||||
setZoom(fitZoom);
|
||||
setPan({ x: centerX, y: centerY });
|
||||
}
|
||||
}, [layout.width, layout.height, dimensions.width, dimensions.height]);
|
||||
}, [layout.width, layout.height, dimensions.width, dimensions.height, compact]);
|
||||
|
||||
// Pan handlers
|
||||
const handleMouseDown = useCallback((e: React.MouseEvent) => {
|
||||
@@ -690,34 +693,40 @@ export function AgentTreeCanvas({
|
||||
</g>
|
||||
</svg>
|
||||
|
||||
{/* Mini-map */}
|
||||
<TreeMiniMap tree={tree} />
|
||||
{/* Mini-map - hidden in compact mode */}
|
||||
{!compact && <TreeMiniMap tree={tree} />}
|
||||
|
||||
{/* Zoom controls */}
|
||||
<div className="absolute bottom-4 right-4 flex gap-2">
|
||||
<div className={cn("absolute flex gap-1", compact ? "bottom-2 right-2" : "bottom-4 right-4 gap-2")}>
|
||||
<button
|
||||
onClick={() => setZoom(z => Math.min(2, z * 1.15))}
|
||||
className="w-8 h-8 rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors flex items-center justify-center"
|
||||
className={cn(
|
||||
"rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors flex items-center justify-center",
|
||||
compact ? "w-6 h-6 text-xs" : "w-8 h-8"
|
||||
)}
|
||||
>
|
||||
+
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setZoom(z => Math.max(0.3, z / 1.15))}
|
||||
className="w-8 h-8 rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors flex items-center justify-center"
|
||||
className={cn(
|
||||
"rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors flex items-center justify-center",
|
||||
compact ? "w-6 h-6 text-xs" : "w-8 h-8"
|
||||
)}
|
||||
>
|
||||
−
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
// Fit to view with minimum zoom for readability
|
||||
const paddingX = 80;
|
||||
const paddingY = 80;
|
||||
const paddingX = compact ? 40 : 80;
|
||||
const paddingY = compact ? 40 : 80;
|
||||
const availableWidth = dimensions.width - paddingX;
|
||||
const availableHeight = dimensions.height - paddingY;
|
||||
|
||||
const scaleX = availableWidth / layout.width;
|
||||
const scaleY = availableHeight / layout.height;
|
||||
const MIN_ZOOM = 0.4;
|
||||
const MIN_ZOOM = compact ? 0.3 : 0.4;
|
||||
const fitZoom = Math.max(MIN_ZOOM, Math.min(1, Math.min(scaleX, scaleY)));
|
||||
|
||||
const scaledWidth = layout.width * fitZoom;
|
||||
@@ -726,26 +735,31 @@ export function AgentTreeCanvas({
|
||||
const scaledHeight = layout.height * fitZoom;
|
||||
const centerY = scaledHeight < availableHeight
|
||||
? (dimensions.height - scaledHeight) / 2
|
||||
: 30;
|
||||
: 20;
|
||||
|
||||
setZoom(fitZoom);
|
||||
setPan({ x: centerX, y: centerY });
|
||||
}}
|
||||
className="px-2 h-8 rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors text-xs"
|
||||
className={cn(
|
||||
"rounded-lg bg-black/40 backdrop-blur-sm border border-white/[0.06] text-white/60 hover:text-white hover:bg-white/[0.04] transition-colors text-xs",
|
||||
compact ? "px-1.5 h-6" : "px-2 h-8"
|
||||
)}
|
||||
>
|
||||
Fit
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Node details panel */}
|
||||
<AnimatePresence>
|
||||
{selectedNode && (
|
||||
<NodeDetailsPanel
|
||||
node={selectedNode}
|
||||
onClose={() => onSelectNode?.(null)}
|
||||
/>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
{/* Node details panel - hidden in compact mode */}
|
||||
{!compact && (
|
||||
<AnimatePresence>
|
||||
{selectedNode && (
|
||||
<NodeDetailsPanel
|
||||
node={selectedNode}
|
||||
onClose={() => onSelectNode?.(null)}
|
||||
/>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -10,7 +10,6 @@ import {
|
||||
LayoutDashboard,
|
||||
MessageSquare,
|
||||
Network,
|
||||
History,
|
||||
Terminal,
|
||||
Settings,
|
||||
Plug,
|
||||
@@ -21,11 +20,10 @@ import {
|
||||
|
||||
const navigation = [
|
||||
{ name: 'Overview', href: '/', icon: LayoutDashboard },
|
||||
{ name: 'Control', href: '/control', icon: MessageSquare },
|
||||
{ name: 'Agents', href: '/agents', icon: Network },
|
||||
{ name: 'Modules', href: '/modules', icon: Plug },
|
||||
{ name: 'Mission', href: '/control', icon: MessageSquare },
|
||||
{ name: 'Agents', href: '/history', icon: Network },
|
||||
{ name: 'Console', href: '/console', icon: Terminal },
|
||||
{ name: 'History', href: '/history', icon: History },
|
||||
{ name: 'Modules', href: '/modules', icon: Plug },
|
||||
{ name: 'Settings', href: '/settings', icon: Settings },
|
||||
];
|
||||
|
||||
|
||||
@@ -121,3 +121,6 @@ export function ConfirmDialog({
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -55,3 +55,6 @@ export function CopyButton({ text, className, label = 'Copied!', showOnHover = t
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -51,3 +51,6 @@ export function RelativeTime({ date, className }: RelativeTimeProps) {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -87,3 +87,6 @@ export function ShimmerText({ lines = 3, className }: ShimmerProps & { lines?: n
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -748,3 +748,61 @@ export async function uploadFile(
|
||||
|
||||
return res.json();
|
||||
}
|
||||
|
||||
// ==================== Models ====================
|
||||
|
||||
export interface ModelsResponse {
|
||||
models: string[];
|
||||
count: number;
|
||||
}
|
||||
|
||||
// List available models
|
||||
export async function listModels(tier?: string): Promise<ModelsResponse> {
|
||||
const params = tier ? `?tier=${encodeURIComponent(tier)}` : "";
|
||||
const res = await apiFetch(`/api/models${params}`);
|
||||
if (!res.ok) throw new Error("Failed to fetch models");
|
||||
return res.json();
|
||||
}
|
||||
|
||||
// Friendly display names for models
|
||||
const MODEL_DISPLAY_NAMES: Record<string, string> = {
|
||||
// OpenAI - simplified (newest first)
|
||||
"openai/gpt-5.2-pro": "gpt-5.2-pro",
|
||||
"openai/gpt-5.2": "gpt-5.2",
|
||||
"openai/gpt-5.2-chat": "gpt-5.2",
|
||||
"openai/gpt-4.1-mini": "gpt-4-mini",
|
||||
"openai/gpt-4.1": "gpt-4",
|
||||
"openai/o1": "o1",
|
||||
"openai/o3-mini-high": "o3-mini",
|
||||
// Anthropic - simplified
|
||||
"anthropic/claude-sonnet-4.5": "4.5-sonnet",
|
||||
"anthropic/claude-opus-4.5": "4.5-opus",
|
||||
"anthropic/claude-haiku-4.5": "4.5-haiku",
|
||||
// Google
|
||||
"google/gemini-3-flash-preview": "gemini-3-flash",
|
||||
"google/gemini-3-pro-image-preview": "gemini-3-pro",
|
||||
// DeepSeek
|
||||
"deepseek/deepseek-r1": "deepseek-r1",
|
||||
"deepseek/deepseek-chat-v3-0324": "deepseek-v3",
|
||||
// Qwen
|
||||
"qwen/qwq-32b": "qwq-32b",
|
||||
"qwen/qwen-2.5-72b-instruct": "qwen-72b",
|
||||
"qwen/qwen3-next-80b-a3b-thinking": "qwen3-thinking",
|
||||
// Mistral
|
||||
"mistralai/mistral-small-24b-instruct-2501": "mistral-small",
|
||||
"mistralai/mistral-medium-3.1": "mistral-medium",
|
||||
"mistralai/mistral-large-2512": "mistral-large",
|
||||
// Meta
|
||||
"meta-llama/llama-3.1-405b": "llama-405b",
|
||||
"meta-llama/llama-3.2-90b-vision-instruct": "llama-90b-vision",
|
||||
"meta-llama/llama-3.3-70b-instruct:free": "llama-70b (free)",
|
||||
};
|
||||
|
||||
// Get display name for a model
|
||||
export function getModelDisplayName(modelId: string): string {
|
||||
if (MODEL_DISPLAY_NAMES[modelId]) {
|
||||
return MODEL_DISPLAY_NAMES[modelId];
|
||||
}
|
||||
// Fallback: strip provider prefix
|
||||
return modelId.includes("/") ? modelId.split("/").pop()! : modelId;
|
||||
}
|
||||
|
||||
@@ -56,3 +56,6 @@ export function getRuntimeTaskDefaults(): { model?: string; budget_cents?: numbe
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -38,3 +38,6 @@ export function formatRelativeTime(date: Date): string {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -11,14 +11,35 @@ enum MissionStatus: String, Codable, CaseIterable {
|
||||
case active
|
||||
case completed
|
||||
case failed
|
||||
case interrupted
|
||||
case blocked
|
||||
case notFeasible = "not_feasible"
|
||||
|
||||
var statusType: StatusType {
|
||||
switch self {
|
||||
case .active: return .active
|
||||
case .completed: return .completed
|
||||
case .failed: return .failed
|
||||
case .interrupted: return .interrupted
|
||||
case .blocked: return .blocked
|
||||
case .notFeasible: return .failed
|
||||
}
|
||||
}
|
||||
|
||||
var displayLabel: String {
|
||||
switch self {
|
||||
case .active: return "Active"
|
||||
case .completed: return "Completed"
|
||||
case .failed: return "Failed"
|
||||
case .interrupted: return "Interrupted"
|
||||
case .blocked: return "Blocked"
|
||||
case .notFeasible: return "Not Feasible"
|
||||
}
|
||||
}
|
||||
|
||||
var canResume: Bool {
|
||||
self == .interrupted || self == .blocked
|
||||
}
|
||||
}
|
||||
|
||||
struct MissionHistoryEntry: Codable, Identifiable {
|
||||
@@ -35,9 +56,12 @@ struct Mission: Codable, Identifiable, Hashable {
|
||||
let id: String
|
||||
var status: MissionStatus
|
||||
let title: String?
|
||||
let modelOverride: String?
|
||||
let history: [MissionHistoryEntry]
|
||||
let createdAt: String
|
||||
let updatedAt: String
|
||||
let interruptedAt: String?
|
||||
let resumable: Bool
|
||||
|
||||
func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(id)
|
||||
@@ -48,9 +72,24 @@ struct Mission: Codable, Identifiable, Hashable {
|
||||
}
|
||||
|
||||
enum CodingKeys: String, CodingKey {
|
||||
case id, status, title, history
|
||||
case id, status, title, history, resumable
|
||||
case modelOverride = "model_override"
|
||||
case createdAt = "created_at"
|
||||
case updatedAt = "updated_at"
|
||||
case interruptedAt = "interrupted_at"
|
||||
}
|
||||
|
||||
init(from decoder: Decoder) throws {
|
||||
let container = try decoder.container(keyedBy: CodingKeys.self)
|
||||
id = try container.decode(String.self, forKey: .id)
|
||||
status = try container.decode(MissionStatus.self, forKey: .status)
|
||||
title = try container.decodeIfPresent(String.self, forKey: .title)
|
||||
modelOverride = try container.decodeIfPresent(String.self, forKey: .modelOverride)
|
||||
history = try container.decode([MissionHistoryEntry].self, forKey: .history)
|
||||
createdAt = try container.decode(String.self, forKey: .createdAt)
|
||||
updatedAt = try container.decode(String.self, forKey: .updatedAt)
|
||||
interruptedAt = try container.decodeIfPresent(String.self, forKey: .interruptedAt)
|
||||
resumable = try container.decodeIfPresent(Bool.self, forKey: .resumable) ?? false
|
||||
}
|
||||
|
||||
var displayTitle: String {
|
||||
@@ -60,11 +99,20 @@ struct Mission: Codable, Identifiable, Hashable {
|
||||
return "Untitled Mission"
|
||||
}
|
||||
|
||||
var displayModel: String? {
|
||||
guard let model = modelOverride else { return nil }
|
||||
return model.split(separator: "/").last.map(String.init)
|
||||
}
|
||||
|
||||
var updatedDate: Date? {
|
||||
let formatter = ISO8601DateFormatter()
|
||||
formatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds]
|
||||
return formatter.date(from: updatedAt) ?? ISO8601DateFormatter().date(from: updatedAt)
|
||||
}
|
||||
|
||||
var canResume: Bool {
|
||||
resumable && status.canResume
|
||||
}
|
||||
}
|
||||
|
||||
enum TaskStatus: String, Codable, CaseIterable {
|
||||
@@ -101,6 +149,66 @@ struct TaskState: Codable, Identifiable {
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Parallel Execution
|
||||
|
||||
struct RunningMissionInfo: Codable, Identifiable {
|
||||
let missionId: String
|
||||
let modelOverride: String?
|
||||
let state: String
|
||||
let queueLen: Int
|
||||
let historyLen: Int
|
||||
let secondsSinceActivity: Int
|
||||
let expectedDeliverables: Int
|
||||
|
||||
var id: String { missionId }
|
||||
|
||||
enum CodingKeys: String, CodingKey {
|
||||
case missionId = "mission_id"
|
||||
case modelOverride = "model_override"
|
||||
case state
|
||||
case queueLen = "queue_len"
|
||||
case historyLen = "history_len"
|
||||
case secondsSinceActivity = "seconds_since_activity"
|
||||
case expectedDeliverables = "expected_deliverables"
|
||||
}
|
||||
|
||||
// Memberwise initializer for previews and testing
|
||||
init(missionId: String, modelOverride: String?, state: String, queueLen: Int, historyLen: Int, secondsSinceActivity: Int, expectedDeliverables: Int) {
|
||||
self.missionId = missionId
|
||||
self.modelOverride = modelOverride
|
||||
self.state = state
|
||||
self.queueLen = queueLen
|
||||
self.historyLen = historyLen
|
||||
self.secondsSinceActivity = secondsSinceActivity
|
||||
self.expectedDeliverables = expectedDeliverables
|
||||
}
|
||||
|
||||
var isRunning: Bool {
|
||||
state == "running" || state == "waiting_for_tool"
|
||||
}
|
||||
|
||||
var isStalled: Bool {
|
||||
isRunning && secondsSinceActivity > 60
|
||||
}
|
||||
|
||||
var displayModel: String {
|
||||
guard let model = modelOverride else { return "Default" }
|
||||
return model.split(separator: "/").last.map(String.init) ?? model
|
||||
}
|
||||
}
|
||||
|
||||
struct ParallelConfig: Codable {
|
||||
let maxParallelMissions: Int
|
||||
let runningCount: Int
|
||||
|
||||
enum CodingKeys: String, CodingKey {
|
||||
case maxParallelMissions = "max_parallel_missions"
|
||||
case runningCount = "running_count"
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Runs
|
||||
|
||||
struct Run: Codable, Identifiable {
|
||||
let id: String
|
||||
let createdAt: String
|
||||
|
||||
@@ -98,6 +98,32 @@ final class APIService {
|
||||
let _: EmptyResponse = try await post("/api/control/missions/\(id)/status", body: StatusRequest(status: status.rawValue))
|
||||
}
|
||||
|
||||
func resumeMission(id: String) async throws -> Mission {
|
||||
try await post("/api/control/missions/\(id)/resume", body: EmptyBody())
|
||||
}
|
||||
|
||||
func cancelMission(id: String) async throws {
|
||||
let _: EmptyResponse = try await post("/api/control/missions/\(id)/cancel", body: EmptyBody())
|
||||
}
|
||||
|
||||
// MARK: - Parallel Missions
|
||||
|
||||
func getRunningMissions() async throws -> [RunningMissionInfo] {
|
||||
try await get("/api/control/running")
|
||||
}
|
||||
|
||||
func startMissionParallel(id: String, content: String, model: String? = nil) async throws {
|
||||
struct ParallelRequest: Encodable {
|
||||
let content: String
|
||||
let model: String?
|
||||
}
|
||||
let _: EmptyResponse = try await post("/api/control/missions/\(id)/parallel", body: ParallelRequest(content: content, model: model))
|
||||
}
|
||||
|
||||
func getParallelConfig() async throws -> ParallelConfig {
|
||||
try await get("/api/control/parallel/config")
|
||||
}
|
||||
|
||||
// MARK: - Control
|
||||
|
||||
func sendMessage(content: String) async throws -> (id: String, queued: Bool) {
|
||||
|
||||
@@ -0,0 +1,269 @@
|
||||
//
|
||||
// RunningMissionsBar.swift
|
||||
// OpenAgentDashboard
|
||||
//
|
||||
// Compact horizontal bar showing currently running missions
|
||||
// Allows switching between parallel missions
|
||||
//
|
||||
|
||||
import SwiftUI
|
||||
|
||||
struct RunningMissionsBar: View {
|
||||
let runningMissions: [RunningMissionInfo]
|
||||
let currentMission: Mission?
|
||||
let viewingMissionId: String?
|
||||
let onSelectMission: (String) -> Void
|
||||
let onCancelMission: (String) -> Void
|
||||
let onRefresh: () -> Void
|
||||
|
||||
var body: some View {
|
||||
ScrollView(.horizontal, showsIndicators: false) {
|
||||
HStack(spacing: 8) {
|
||||
// Header with refresh button
|
||||
headerView
|
||||
|
||||
// Current mission if not in running list
|
||||
if let mission = currentMission,
|
||||
!runningMissions.contains(where: { $0.missionId == mission.id }) {
|
||||
currentMissionChip(mission)
|
||||
}
|
||||
|
||||
// Running missions
|
||||
ForEach(runningMissions) { mission in
|
||||
runningMissionChip(mission)
|
||||
}
|
||||
}
|
||||
.padding(.horizontal, 16)
|
||||
.padding(.vertical, 8)
|
||||
}
|
||||
.background(.ultraThinMaterial)
|
||||
}
|
||||
|
||||
// MARK: - Header
|
||||
|
||||
private var headerView: some View {
|
||||
HStack(spacing: 6) {
|
||||
Image(systemName: "square.stack.3d.up")
|
||||
.font(.system(size: 12, weight: .medium))
|
||||
.foregroundStyle(Theme.textTertiary)
|
||||
|
||||
Text("Running")
|
||||
.font(.caption.weight(.medium))
|
||||
.foregroundStyle(Theme.textTertiary)
|
||||
|
||||
Text("(\(runningMissions.count))")
|
||||
.font(.caption)
|
||||
.foregroundStyle(Theme.textMuted)
|
||||
|
||||
Button(action: onRefresh) {
|
||||
Image(systemName: "arrow.clockwise")
|
||||
.font(.system(size: 10, weight: .medium))
|
||||
.foregroundStyle(Theme.textMuted)
|
||||
}
|
||||
.padding(4)
|
||||
.contentShape(Rectangle())
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Current Mission Chip
|
||||
|
||||
private func currentMissionChip(_ mission: Mission) -> some View {
|
||||
let isViewing = viewingMissionId == mission.id
|
||||
|
||||
return Button {
|
||||
onSelectMission(mission.id)
|
||||
} label: {
|
||||
HStack(spacing: 6) {
|
||||
// Status dot
|
||||
Circle()
|
||||
.fill(Theme.success)
|
||||
.frame(width: 6, height: 6)
|
||||
|
||||
// Model name
|
||||
Text(mission.displayModel ?? "Default")
|
||||
.font(.caption.weight(.medium))
|
||||
.foregroundStyle(Theme.textPrimary)
|
||||
.lineLimit(1)
|
||||
|
||||
// Mission ID
|
||||
Text(String(mission.id.prefix(8)))
|
||||
.font(.system(size: 9).monospaced())
|
||||
.foregroundStyle(Theme.textMuted)
|
||||
|
||||
// Selection indicator
|
||||
if isViewing {
|
||||
Image(systemName: "checkmark")
|
||||
.font(.system(size: 9, weight: .bold))
|
||||
.foregroundStyle(Theme.accent)
|
||||
}
|
||||
}
|
||||
.padding(.horizontal, 10)
|
||||
.padding(.vertical, 6)
|
||||
.background(isViewing ? Theme.accent.opacity(0.15) : Color.white.opacity(0.05))
|
||||
.clipShape(RoundedRectangle(cornerRadius: 8, style: .continuous))
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 8, style: .continuous)
|
||||
.stroke(isViewing ? Theme.accent.opacity(0.3) : Theme.border, lineWidth: 1)
|
||||
)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
}
|
||||
|
||||
// MARK: - Running Mission Chip
|
||||
|
||||
private func runningMissionChip(_ mission: RunningMissionInfo) -> some View {
|
||||
let isViewing = viewingMissionId == mission.missionId
|
||||
let isStalled = mission.isStalled
|
||||
// Only show severely stalled state for running missions
|
||||
let isSeverlyStalled = mission.isRunning && mission.secondsSinceActivity > 120
|
||||
|
||||
let borderColor: Color = {
|
||||
if isViewing { return Theme.accent.opacity(0.3) }
|
||||
if isSeverlyStalled { return Theme.error.opacity(0.3) }
|
||||
if isStalled { return Theme.warning.opacity(0.3) }
|
||||
return Theme.border
|
||||
}()
|
||||
|
||||
let backgroundColor: Color = {
|
||||
if isViewing { return Theme.accent.opacity(0.15) }
|
||||
if isSeverlyStalled { return Theme.error.opacity(0.1) }
|
||||
if isStalled { return Theme.warning.opacity(0.1) }
|
||||
return Color.white.opacity(0.05)
|
||||
}()
|
||||
|
||||
return HStack(spacing: 6) {
|
||||
// Tap area for selection
|
||||
Button {
|
||||
onSelectMission(mission.missionId)
|
||||
} label: {
|
||||
HStack(spacing: 6) {
|
||||
// Status dot with animation
|
||||
Circle()
|
||||
.fill(statusColor(for: mission))
|
||||
.frame(width: 6, height: 6)
|
||||
.overlay {
|
||||
if mission.isRunning && !isStalled {
|
||||
Circle()
|
||||
.stroke(statusColor(for: mission).opacity(0.5), lineWidth: 1.5)
|
||||
.frame(width: 10, height: 10)
|
||||
.opacity(0.6)
|
||||
}
|
||||
}
|
||||
|
||||
// Model name
|
||||
Text(mission.displayModel)
|
||||
.font(.caption.weight(.medium))
|
||||
.foregroundStyle(Theme.textPrimary)
|
||||
.lineLimit(1)
|
||||
|
||||
// Mission ID
|
||||
Text(String(mission.missionId.prefix(8)))
|
||||
.font(.system(size: 9).monospaced())
|
||||
.foregroundStyle(Theme.textMuted)
|
||||
|
||||
// Stalled indicator
|
||||
if isStalled {
|
||||
HStack(spacing: 2) {
|
||||
Image(systemName: "exclamationmark.triangle.fill")
|
||||
.font(.system(size: 8))
|
||||
Text("\(mission.secondsSinceActivity)s")
|
||||
.font(.system(size: 9).monospaced())
|
||||
}
|
||||
.foregroundStyle(isSeverlyStalled ? Theme.error : Theme.warning)
|
||||
}
|
||||
|
||||
// Selection indicator
|
||||
if isViewing {
|
||||
Image(systemName: "checkmark")
|
||||
.font(.system(size: 9, weight: .bold))
|
||||
.foregroundStyle(Theme.accent)
|
||||
}
|
||||
}
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
|
||||
// Cancel button
|
||||
Button {
|
||||
onCancelMission(mission.missionId)
|
||||
} label: {
|
||||
Image(systemName: "xmark")
|
||||
.font(.system(size: 9, weight: .medium))
|
||||
.foregroundStyle(Theme.textMuted)
|
||||
.frame(width: 18, height: 18)
|
||||
.background(Color.white.opacity(0.05))
|
||||
.clipShape(Circle())
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
}
|
||||
.padding(.leading, 10)
|
||||
.padding(.trailing, 6)
|
||||
.padding(.vertical, 6)
|
||||
.background(backgroundColor)
|
||||
.clipShape(RoundedRectangle(cornerRadius: 8, style: .continuous))
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 8, style: .continuous)
|
||||
.stroke(borderColor, lineWidth: 1)
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - Helpers
|
||||
|
||||
private func statusColor(for mission: RunningMissionInfo) -> Color {
|
||||
// Only show stalled/severely-stalled states for running missions
|
||||
if mission.isRunning && mission.secondsSinceActivity > 120 {
|
||||
return Theme.error
|
||||
} else if mission.isStalled {
|
||||
return Theme.warning
|
||||
} else if mission.isRunning {
|
||||
return Theme.success
|
||||
} else {
|
||||
return Theme.warning
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Preview
|
||||
|
||||
#Preview {
|
||||
VStack(spacing: 0) {
|
||||
RunningMissionsBar(
|
||||
runningMissions: [
|
||||
RunningMissionInfo(
|
||||
missionId: "abc12345-6789-0000-0000-000000000001",
|
||||
modelOverride: "deepseek/deepseek-v3.2",
|
||||
state: "running",
|
||||
queueLen: 0,
|
||||
historyLen: 5,
|
||||
secondsSinceActivity: 15,
|
||||
expectedDeliverables: 0
|
||||
),
|
||||
RunningMissionInfo(
|
||||
missionId: "def12345-6789-0000-0000-000000000002",
|
||||
modelOverride: "qwen/qwen3-235b",
|
||||
state: "running",
|
||||
queueLen: 1,
|
||||
historyLen: 3,
|
||||
secondsSinceActivity: 75,
|
||||
expectedDeliverables: 0
|
||||
),
|
||||
RunningMissionInfo(
|
||||
missionId: "ghi12345-6789-0000-0000-000000000003",
|
||||
modelOverride: nil,
|
||||
state: "running",
|
||||
queueLen: 0,
|
||||
historyLen: 10,
|
||||
secondsSinceActivity: 150,
|
||||
expectedDeliverables: 0
|
||||
)
|
||||
],
|
||||
currentMission: nil,
|
||||
viewingMissionId: "abc12345-6789-0000-0000-000000000001",
|
||||
onSelectMission: { _ in },
|
||||
onCancelMission: { _ in },
|
||||
onRefresh: {}
|
||||
)
|
||||
|
||||
Spacer()
|
||||
}
|
||||
.background(Theme.backgroundPrimary)
|
||||
}
|
||||
@@ -19,6 +19,8 @@ enum StatusType {
|
||||
case connected
|
||||
case disconnected
|
||||
case connecting
|
||||
case interrupted
|
||||
case blocked
|
||||
|
||||
var color: Color {
|
||||
switch self {
|
||||
@@ -32,6 +34,8 @@ enum StatusType {
|
||||
return Theme.error
|
||||
case .cancelled, .disconnected:
|
||||
return Theme.textTertiary
|
||||
case .interrupted, .blocked:
|
||||
return Theme.warning
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,6 +56,8 @@ enum StatusType {
|
||||
case .connected: return "Connected"
|
||||
case .disconnected: return "Disconnected"
|
||||
case .connecting: return "Connecting"
|
||||
case .interrupted: return "Interrupted"
|
||||
case .blocked: return "Blocked"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,6 +72,8 @@ enum StatusType {
|
||||
case .idle: return "moon.fill"
|
||||
case .connected: return "wifi"
|
||||
case .disconnected: return "wifi.slash"
|
||||
case .interrupted: return "pause.circle.fill"
|
||||
case .blocked: return "exclamationmark.triangle.fill"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,15 @@ struct ControlView: View {
|
||||
@State private var isAtBottom = true
|
||||
@State private var copiedMessageId: String?
|
||||
|
||||
// Parallel missions state
|
||||
@State private var runningMissions: [RunningMissionInfo] = []
|
||||
@State private var viewingMissionId: String?
|
||||
@State private var showRunningMissions = false
|
||||
@State private var pollingTask: Task<Void, Never>?
|
||||
|
||||
// Track pending fetch to prevent race conditions
|
||||
@State private var fetchingMissionId: String?
|
||||
|
||||
@FocusState private var isInputFocused: Bool
|
||||
|
||||
private let api = APIService.shared
|
||||
@@ -36,6 +45,11 @@ struct ControlView: View {
|
||||
backgroundGlows
|
||||
|
||||
VStack(spacing: 0) {
|
||||
// Running missions bar (when there are parallel missions)
|
||||
if showRunningMissions && (!runningMissions.isEmpty || currentMission != nil) {
|
||||
runningMissionsBar
|
||||
}
|
||||
|
||||
// Messages
|
||||
messagesView
|
||||
|
||||
@@ -76,6 +90,26 @@ struct ControlView: View {
|
||||
}
|
||||
}
|
||||
|
||||
ToolbarItem(placement: .topBarLeading) {
|
||||
// Running missions toggle
|
||||
Button {
|
||||
withAnimation(.easeInOut(duration: 0.2)) {
|
||||
showRunningMissions.toggle()
|
||||
}
|
||||
HapticService.selectionChanged()
|
||||
} label: {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: "square.stack.3d.up")
|
||||
.font(.system(size: 14))
|
||||
if !runningMissions.isEmpty {
|
||||
Text("\(runningMissions.count)")
|
||||
.font(.caption2.weight(.semibold))
|
||||
}
|
||||
}
|
||||
.foregroundStyle(showRunningMissions ? Theme.accent : Theme.textSecondary)
|
||||
}
|
||||
}
|
||||
|
||||
ToolbarItem(placement: .topBarTrailing) {
|
||||
Menu {
|
||||
Button {
|
||||
@@ -87,6 +121,15 @@ struct ControlView: View {
|
||||
if let mission = currentMission {
|
||||
Divider()
|
||||
|
||||
// Resume button for interrupted/blocked missions
|
||||
if mission.canResume {
|
||||
Button {
|
||||
Task { await resumeMission() }
|
||||
} label: {
|
||||
Label("Resume Mission", systemImage: "play.circle")
|
||||
}
|
||||
}
|
||||
|
||||
Button {
|
||||
Task { await setMissionStatus(.completed) }
|
||||
} label: {
|
||||
@@ -99,7 +142,7 @@ struct ControlView: View {
|
||||
Label("Mark Failed", systemImage: "xmark.circle")
|
||||
}
|
||||
|
||||
if mission.status != .active {
|
||||
if mission.status != .active && !mission.canResume {
|
||||
Button {
|
||||
Task { await setMissionStatus(.active) }
|
||||
} label: {
|
||||
@@ -117,10 +160,22 @@ struct ControlView: View {
|
||||
// Check if we're being opened with a specific mission from History
|
||||
if let pendingId = nav.consumePendingMission() {
|
||||
await loadMission(id: pendingId)
|
||||
viewingMissionId = pendingId
|
||||
} else {
|
||||
await loadCurrentMission()
|
||||
viewingMissionId = currentMission?.id
|
||||
}
|
||||
|
||||
// Fetch initial running missions
|
||||
await refreshRunningMissions()
|
||||
|
||||
// Auto-show bar if there are multiple running missions
|
||||
if runningMissions.count > 1 {
|
||||
showRunningMissions = true
|
||||
}
|
||||
|
||||
startStreaming()
|
||||
startPollingRunningMissions()
|
||||
}
|
||||
.onChange(of: nav.pendingMissionId) { _, newId in
|
||||
// Handle navigation from History while Control is already visible
|
||||
@@ -128,15 +183,42 @@ struct ControlView: View {
|
||||
nav.pendingMissionId = nil
|
||||
Task {
|
||||
await loadMission(id: missionId)
|
||||
viewingMissionId = missionId
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
.onChange(of: currentMission?.id) { _, newId in
|
||||
// Sync viewingMissionId with currentMission when it changes
|
||||
if viewingMissionId == nil, let id = newId {
|
||||
viewingMissionId = id
|
||||
}
|
||||
}
|
||||
.onDisappear {
|
||||
streamTask?.cancel()
|
||||
pollingTask?.cancel()
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Running Missions Bar
|
||||
|
||||
private var runningMissionsBar: some View {
|
||||
RunningMissionsBar(
|
||||
runningMissions: runningMissions,
|
||||
currentMission: currentMission,
|
||||
viewingMissionId: viewingMissionId,
|
||||
onSelectMission: { missionId in
|
||||
Task { await switchToMission(id: missionId) }
|
||||
},
|
||||
onCancelMission: { missionId in
|
||||
Task { await cancelMission(id: missionId) }
|
||||
},
|
||||
onRefresh: {
|
||||
Task { await refreshRunningMissions() }
|
||||
}
|
||||
)
|
||||
.transition(.move(edge: .top).combined(with: .opacity))
|
||||
}
|
||||
|
||||
// MARK: - Background
|
||||
|
||||
private var backgroundGlows: some View {
|
||||
@@ -448,6 +530,7 @@ struct ControlView: View {
|
||||
do {
|
||||
if let mission = try await api.getCurrentMission() {
|
||||
currentMission = mission
|
||||
viewingMissionId = mission.id
|
||||
messages = mission.history.enumerated().map { index, entry in
|
||||
ChatMessage(
|
||||
id: "\(mission.id)-\(index)",
|
||||
@@ -467,28 +550,40 @@ struct ControlView: View {
|
||||
}
|
||||
|
||||
private func loadMission(id: String) async {
|
||||
// Set target immediately for race condition tracking
|
||||
fetchingMissionId = id
|
||||
|
||||
isLoading = true
|
||||
defer { isLoading = false }
|
||||
|
||||
do {
|
||||
let missions = try await api.listMissions()
|
||||
if let mission = missions.first(where: { $0.id == id }) {
|
||||
currentMission = mission
|
||||
messages = mission.history.enumerated().map { index, entry in
|
||||
ChatMessage(
|
||||
id: "\(mission.id)-\(index)",
|
||||
type: entry.isUser ? .user : .assistant(success: true, costCents: 0, model: nil),
|
||||
content: entry.content
|
||||
)
|
||||
}
|
||||
HapticService.success()
|
||||
|
||||
// Scroll to bottom after loading
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
|
||||
shouldScrollToBottom = true
|
||||
}
|
||||
let mission = try await api.getMission(id: id)
|
||||
|
||||
// Race condition guard: only update if this is still the mission we want
|
||||
guard fetchingMissionId == id else {
|
||||
return // Another mission was requested, discard this response
|
||||
}
|
||||
|
||||
currentMission = mission
|
||||
viewingMissionId = mission.id
|
||||
messages = mission.history.enumerated().map { index, entry in
|
||||
ChatMessage(
|
||||
id: "\(mission.id)-\(index)",
|
||||
type: entry.isUser ? .user : .assistant(success: true, costCents: 0, model: nil),
|
||||
content: entry.content
|
||||
)
|
||||
}
|
||||
isLoading = false
|
||||
HapticService.success()
|
||||
|
||||
// Scroll to bottom after loading
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) {
|
||||
shouldScrollToBottom = true
|
||||
}
|
||||
} catch {
|
||||
// Race condition guard
|
||||
guard fetchingMissionId == id else { return }
|
||||
|
||||
isLoading = false
|
||||
print("Failed to load mission: \(error)")
|
||||
}
|
||||
}
|
||||
@@ -497,7 +592,19 @@ struct ControlView: View {
|
||||
do {
|
||||
let mission = try await api.createMission()
|
||||
currentMission = mission
|
||||
viewingMissionId = mission.id
|
||||
messages = []
|
||||
|
||||
// Refresh running missions to show the new mission
|
||||
await refreshRunningMissions()
|
||||
|
||||
// Show the bar when creating new missions
|
||||
if !showRunningMissions && !runningMissions.isEmpty {
|
||||
withAnimation(.easeInOut(duration: 0.2)) {
|
||||
showRunningMissions = true
|
||||
}
|
||||
}
|
||||
|
||||
HapticService.success()
|
||||
} catch {
|
||||
print("Failed to create mission: \(error)")
|
||||
@@ -518,6 +625,33 @@ struct ControlView: View {
|
||||
}
|
||||
}
|
||||
|
||||
private func resumeMission() async {
|
||||
guard let mission = currentMission, mission.canResume else { return }
|
||||
|
||||
do {
|
||||
let resumed = try await api.resumeMission(id: mission.id)
|
||||
currentMission = resumed
|
||||
viewingMissionId = resumed.id
|
||||
// Reload messages to get the resume prompt
|
||||
messages = resumed.history.enumerated().map { index, entry in
|
||||
ChatMessage(
|
||||
id: "\(resumed.id)-\(index)",
|
||||
type: entry.isUser ? .user : .assistant(success: true, costCents: 0, model: nil),
|
||||
content: entry.content
|
||||
)
|
||||
}
|
||||
|
||||
// Refresh running missions
|
||||
await refreshRunningMissions()
|
||||
|
||||
HapticService.success()
|
||||
shouldScrollToBottom = true
|
||||
} catch {
|
||||
print("Failed to resume mission: \(error)")
|
||||
HapticService.error()
|
||||
}
|
||||
}
|
||||
|
||||
private func sendMessage() {
|
||||
let content = inputText.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
guard !content.isEmpty else { return }
|
||||
@@ -553,25 +687,119 @@ struct ControlView: View {
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Parallel Missions
|
||||
|
||||
private func refreshRunningMissions() async {
|
||||
do {
|
||||
runningMissions = try await api.getRunningMissions()
|
||||
} catch {
|
||||
print("Failed to refresh running missions: \(error)")
|
||||
}
|
||||
}
|
||||
|
||||
private func startPollingRunningMissions() {
|
||||
pollingTask = Task {
|
||||
while !Task.isCancelled {
|
||||
try? await Task.sleep(for: .seconds(3))
|
||||
guard !Task.isCancelled else { break }
|
||||
await refreshRunningMissions()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func switchToMission(id: String) async {
|
||||
guard id != viewingMissionId else { return }
|
||||
|
||||
// Set the target mission ID immediately for race condition tracking
|
||||
viewingMissionId = id
|
||||
fetchingMissionId = id
|
||||
|
||||
isLoading = true
|
||||
|
||||
do {
|
||||
// Load the mission from API
|
||||
let mission = try await api.getMission(id: id)
|
||||
|
||||
// Race condition guard: only update if this is still the mission we want
|
||||
guard fetchingMissionId == id else {
|
||||
return // Another mission was requested, discard this response
|
||||
}
|
||||
|
||||
// If this is not a parallel mission, also update currentMission
|
||||
if runningMissions.contains(where: { $0.missionId == id }) {
|
||||
// This is a parallel mission - just load its history
|
||||
messages = mission.history.enumerated().map { index, entry in
|
||||
ChatMessage(
|
||||
id: "\(mission.id)-\(index)",
|
||||
type: entry.isUser ? .user : .assistant(success: true, costCents: 0, model: nil),
|
||||
content: entry.content
|
||||
)
|
||||
}
|
||||
} else {
|
||||
// This is the main mission - load it fully
|
||||
currentMission = mission
|
||||
messages = mission.history.enumerated().map { index, entry in
|
||||
ChatMessage(
|
||||
id: "\(mission.id)-\(index)",
|
||||
type: entry.isUser ? .user : .assistant(success: true, costCents: 0, model: nil),
|
||||
content: entry.content
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
isLoading = false
|
||||
HapticService.selectionChanged()
|
||||
shouldScrollToBottom = true
|
||||
} catch {
|
||||
// Race condition guard: only show error if this is still the mission we want
|
||||
guard fetchingMissionId == id else { return }
|
||||
|
||||
isLoading = false
|
||||
print("Failed to switch mission: \(error)")
|
||||
HapticService.error()
|
||||
}
|
||||
}
|
||||
|
||||
private func cancelMission(id: String) async {
|
||||
do {
|
||||
try await api.cancelMission(id: id)
|
||||
|
||||
// Refresh running missions
|
||||
await refreshRunningMissions()
|
||||
|
||||
// If we were viewing this mission, switch to current
|
||||
if viewingMissionId == id {
|
||||
if let currentId = currentMission?.id {
|
||||
await switchToMission(id: currentId)
|
||||
}
|
||||
}
|
||||
|
||||
HapticService.success()
|
||||
} catch {
|
||||
print("Failed to cancel mission: \(error)")
|
||||
HapticService.error()
|
||||
}
|
||||
}
|
||||
|
||||
private func handleStreamEvent(type: String, data: [String: Any]) {
|
||||
// Filter events by mission_id - only show events for the current mission
|
||||
// Filter events by mission_id - only show events for the mission we're viewing
|
||||
// This prevents cross-mission contamination when parallel missions are running
|
||||
let eventMissionId = data["mission_id"] as? String
|
||||
let currentMissionId = currentMission?.id
|
||||
let viewingId = viewingMissionId
|
||||
let currentId = currentMission?.id
|
||||
|
||||
// Only allow status events from any mission (for global state)
|
||||
// All other events must match the current mission
|
||||
// All other events must match the mission we're viewing
|
||||
if type != "status" {
|
||||
if let eventId = eventMissionId {
|
||||
// Event has a mission_id - must match current mission
|
||||
if eventId != currentMissionId {
|
||||
// Event has a mission_id - must match viewing mission
|
||||
if eventId != viewingId {
|
||||
return // Skip events from other missions
|
||||
}
|
||||
} else if currentMissionId != nil {
|
||||
} else if viewingId != nil && viewingId != currentId {
|
||||
// Event has NO mission_id (from main session)
|
||||
// This is fine if we're on the current/main mission
|
||||
// But we can't verify, so allow it for now
|
||||
// TODO: Backend should always include mission_id
|
||||
// Skip if we're viewing a different (parallel) mission
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,9 @@ struct FilesView: View {
|
||||
@State private var newFolderName = ""
|
||||
@State private var isImporting = false
|
||||
|
||||
// Track pending path fetch to prevent race conditions
|
||||
@State private var fetchingPath: String?
|
||||
|
||||
private let api = APIService.shared
|
||||
|
||||
private var sortedEntries: [FileEntry] {
|
||||
@@ -360,16 +363,32 @@ struct FilesView: View {
|
||||
// MARK: - Actions
|
||||
|
||||
private func loadDirectory() async {
|
||||
let pathToLoad = currentPath
|
||||
fetchingPath = pathToLoad
|
||||
|
||||
isLoading = true
|
||||
errorMessage = nil
|
||||
|
||||
do {
|
||||
entries = try await api.listDirectory(path: currentPath)
|
||||
let result = try await api.listDirectory(path: pathToLoad)
|
||||
|
||||
// Race condition guard: only update if this is still the path we want
|
||||
guard fetchingPath == pathToLoad else {
|
||||
return // Navigation changed, discard this response
|
||||
}
|
||||
|
||||
entries = result
|
||||
} catch {
|
||||
// Race condition guard
|
||||
guard fetchingPath == pathToLoad else { return }
|
||||
|
||||
errorMessage = error.localizedDescription
|
||||
}
|
||||
|
||||
isLoading = false
|
||||
// Only clear loading if this is still the current fetch
|
||||
if fetchingPath == pathToLoad {
|
||||
isLoading = false
|
||||
}
|
||||
}
|
||||
|
||||
private func navigateTo(_ path: String) {
|
||||
|
||||
@@ -22,15 +22,17 @@ struct HistoryView: View {
|
||||
enum StatusFilter: String, CaseIterable {
|
||||
case all = "All"
|
||||
case active = "Active"
|
||||
case interrupted = "Interrupted"
|
||||
case completed = "Completed"
|
||||
case failed = "Failed"
|
||||
|
||||
var missionStatus: MissionStatus? {
|
||||
var missionStatuses: [MissionStatus]? {
|
||||
switch self {
|
||||
case .all: return nil
|
||||
case .active: return .active
|
||||
case .completed: return .completed
|
||||
case .failed: return .failed
|
||||
case .active: return [.active]
|
||||
case .interrupted: return [.interrupted, .blocked]
|
||||
case .completed: return [.completed]
|
||||
case .failed: return [.failed, .notFeasible]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -38,7 +40,7 @@ struct HistoryView: View {
|
||||
private var filteredMissions: [Mission] {
|
||||
missions.filter { mission in
|
||||
// Filter by status
|
||||
if let statusFilter = selectedFilter.missionStatus, mission.status != statusFilter {
|
||||
if let statuses = selectedFilter.missionStatuses, !statuses.contains(mission.status) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -257,11 +259,11 @@ private struct MissionRow: View {
|
||||
var body: some View {
|
||||
HStack(spacing: 14) {
|
||||
// Icon
|
||||
Image(systemName: "target")
|
||||
Image(systemName: mission.canResume ? "play.circle" : "target")
|
||||
.font(.title3)
|
||||
.foregroundStyle(Theme.accent)
|
||||
.foregroundStyle(mission.canResume ? Theme.warning : Theme.accent)
|
||||
.frame(width: 40, height: 40)
|
||||
.background(Theme.accent.opacity(0.15))
|
||||
.background((mission.canResume ? Theme.warning : Theme.accent).opacity(0.15))
|
||||
.clipShape(RoundedRectangle(cornerRadius: 10, style: .continuous))
|
||||
|
||||
// Content
|
||||
@@ -274,9 +276,23 @@ private struct MissionRow: View {
|
||||
HStack(spacing: 8) {
|
||||
StatusBadge(status: mission.status.statusType, compact: true)
|
||||
|
||||
if mission.canResume {
|
||||
Text("Resumable")
|
||||
.font(.caption2.weight(.medium))
|
||||
.foregroundStyle(Theme.warning)
|
||||
}
|
||||
|
||||
Text("\(mission.history.count) messages")
|
||||
.font(.caption)
|
||||
.foregroundStyle(Theme.textTertiary)
|
||||
|
||||
if let model = mission.displayModel {
|
||||
Text("•")
|
||||
.foregroundStyle(Theme.textMuted)
|
||||
Text(model)
|
||||
.font(.caption2.monospaced())
|
||||
.foregroundStyle(Theme.textTertiary)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -300,7 +316,7 @@ private struct MissionRow: View {
|
||||
.clipShape(RoundedRectangle(cornerRadius: 14, style: .continuous))
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 14, style: .continuous)
|
||||
.stroke(Theme.border, lineWidth: 0.5)
|
||||
.stroke(mission.canResume ? Theme.warning.opacity(0.3) : Theme.border, lineWidth: mission.canResume ? 1 : 0.5)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,10 +5,27 @@ Native iOS dashboard for Open Agent with **Liquid Glass** design language.
|
||||
## Features
|
||||
|
||||
- **Control** - Chat interface with the AI agent, real-time streaming
|
||||
- **History** - View past missions, tasks, and runs
|
||||
- **History** - View past missions with filtering (active, interrupted, completed, failed)
|
||||
- **Terminal** - SSH console via WebSocket
|
||||
- **Files** - Remote file explorer with upload/download
|
||||
|
||||
### Mission Management
|
||||
|
||||
- Create new missions with optional model override
|
||||
- Resume interrupted or blocked missions
|
||||
- Mark missions as completed/failed
|
||||
- View mission status (active, completed, failed, interrupted, blocked, not_feasible)
|
||||
- Model override display per mission
|
||||
|
||||
### Parallel Missions
|
||||
|
||||
- View all running missions in a compact horizontal bar
|
||||
- Switch between parallel missions with a single tap
|
||||
- Real-time status indicators (running, stalled, severely stalled)
|
||||
- Cancel running missions directly from the bar
|
||||
- Automatic polling for running mission updates (every 3s)
|
||||
- SSE event filtering by mission_id to prevent cross-contamination
|
||||
|
||||
## Design System
|
||||
|
||||
Built with "Quiet Luxury + Liquid Glass" aesthetic:
|
||||
@@ -77,6 +94,12 @@ ios_dashboard/
|
||||
│ │ ├── Terminal/ # SSH console
|
||||
│ │ ├── Files/ # File explorer
|
||||
│ │ └── Components/ # Reusable UI
|
||||
│ │ ├── GlassButton.swift
|
||||
│ │ ├── GlassCard.swift
|
||||
│ │ ├── StatusBadge.swift
|
||||
│ │ ├── LoadingView.swift
|
||||
│ │ ├── RunningMissionsBar.swift # Parallel missions UI
|
||||
│ │ └── ToolUI/ # Tool UI components
|
||||
│ └── Assets.xcassets/
|
||||
└── OpenAgentDashboard.xcodeproj/
|
||||
```
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -76,6 +76,9 @@ MODEL_FAMILY_PATTERNS = [
|
||||
(r"^anthropic/claude-(\d+\.?\d*)-haiku$", "claude-haiku", "fast"),
|
||||
|
||||
# OpenAI GPT
|
||||
(r"^openai/gpt-5\.2-pro$", "gpt-5-pro", "flagship"),
|
||||
(r"^openai/gpt-5\.2$", "gpt-5", "mid"),
|
||||
(r"^openai/gpt-5\.2-chat$", "gpt-5", "mid"),
|
||||
(r"^openai/gpt-4\.1$", "gpt-4", "mid"),
|
||||
(r"^openai/gpt-4o$", "gpt-4", "mid"),
|
||||
(r"^openai/gpt-4-turbo", "gpt-4", "mid"),
|
||||
@@ -110,6 +113,8 @@ MODEL_FAMILY_PATTERNS = [
|
||||
# Qwen
|
||||
(r"^qwen/qwen-2\.5-72b", "qwen-72b", "mid"),
|
||||
(r"^qwen/qwq-32b", "qwq", "mid"),
|
||||
(r"^qwen/qwen3-next-80b.*thinking", "qwen3-thinking", "flagship"),
|
||||
(r"^qwen/qwen3-235b.*instruct", "qwen3-instruct", "mid"),
|
||||
]
|
||||
|
||||
HEADERS = {
|
||||
|
||||
@@ -1,409 +0,0 @@
|
||||
//! Complexity estimation agent.
|
||||
//!
|
||||
//! Analyzes a task description and estimates:
|
||||
//! - Complexity score (0-1)
|
||||
//! - Whether to split into subtasks
|
||||
//! - Estimated token count
|
||||
//!
|
||||
//! ## Learning Integration
|
||||
//! When memory is available, the estimator queries similar past tasks
|
||||
//! and adjusts predictions based on historical actual token usage.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::agents::{
|
||||
Agent, AgentContext, AgentId, AgentResult, AgentType, Complexity, LeafAgent, LeafCapability,
|
||||
};
|
||||
use crate::llm::{ChatMessage, ChatOptions, Role};
|
||||
use crate::memory::HistoricalContext;
|
||||
use crate::task::Task;
|
||||
|
||||
/// Agent that estimates task complexity.
|
||||
///
|
||||
/// # Purpose
|
||||
/// Given a task description, estimate how complex it is and whether
|
||||
/// it should be split into subtasks.
|
||||
///
|
||||
/// # Algorithm
|
||||
/// 1. Send task description to LLM with complexity evaluation prompt
|
||||
/// 2. Parse LLM response for complexity score and reasoning
|
||||
/// 3. Return structured Complexity object
|
||||
pub struct ComplexityEstimator {
|
||||
id: AgentId,
|
||||
prompt_variant: ComplexityPromptVariant,
|
||||
split_threshold: f64,
|
||||
token_multiplier: f64,
|
||||
}
|
||||
|
||||
/// Prompt variants for complexity estimation.
|
||||
///
|
||||
/// We keep this as an enum (not free-form strings) so we can:
|
||||
/// - A/B test variants deterministically
|
||||
/// - Store tuned choice as a stable symbol in config
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ComplexityPromptVariant {
|
||||
/// Short rubric-based prompt (fast).
|
||||
RubricV1,
|
||||
/// More explicit calibration prompt encouraging realistic token estimates.
|
||||
CalibratedV2,
|
||||
}
|
||||
|
||||
impl ComplexityEstimator {
|
||||
/// Create a new complexity estimator.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
id: AgentId::new(),
|
||||
prompt_variant: ComplexityPromptVariant::CalibratedV2,
|
||||
split_threshold: 0.6,
|
||||
token_multiplier: 1.0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a custom estimator (used by calibration harness).
|
||||
pub fn with_params(
|
||||
prompt_variant: ComplexityPromptVariant,
|
||||
split_threshold: f64,
|
||||
token_multiplier: f64,
|
||||
) -> Self {
|
||||
Self {
|
||||
id: AgentId::new(),
|
||||
prompt_variant,
|
||||
split_threshold: split_threshold.clamp(0.0, 1.0),
|
||||
token_multiplier: token_multiplier.max(0.1),
|
||||
}
|
||||
}
|
||||
|
||||
/// Prompt template for complexity estimation.
|
||||
///
|
||||
/// # Response Format
|
||||
/// LLM should respond with JSON containing:
|
||||
/// - score: float 0-1
|
||||
/// - reasoning: string explanation
|
||||
fn build_prompt(&self, task: &Task) -> String {
|
||||
match self.prompt_variant {
|
||||
ComplexityPromptVariant::RubricV1 => format!(
|
||||
r#"You are a task complexity analyzer.
|
||||
|
||||
Task: {task}
|
||||
|
||||
Respond with ONLY a JSON object:
|
||||
{{
|
||||
"score": <float 0..1>,
|
||||
"reasoning": <string>,
|
||||
"estimated_tokens": <int>
|
||||
}}
|
||||
|
||||
Rubric for score:
|
||||
- 0.0-0.2: Trivial
|
||||
- 0.2-0.4: Simple
|
||||
- 0.4-0.6: Moderate
|
||||
- 0.6-0.8: Complex
|
||||
- 0.8-1.0: Very Complex"#,
|
||||
task = task.description()
|
||||
),
|
||||
ComplexityPromptVariant::CalibratedV2 => format!(
|
||||
r#"You are a task complexity analyzer. Your goal is to estimate:
|
||||
1) a complexity score in [0, 1]
|
||||
2) a realistic token budget estimate for completing the task end-to-end using an LLM with tools.
|
||||
|
||||
Task: {task}
|
||||
|
||||
Important: \"estimated_tokens\" should reflect TOTAL tokens (input + output) across multiple turns, including:
|
||||
- planning / reasoning
|
||||
- tool call arguments and tool outputs
|
||||
- iterative fixes and retries
|
||||
|
||||
Respond with ONLY a JSON object:
|
||||
{{
|
||||
\"score\": <float 0..1>,
|
||||
\"reasoning\": <string>,
|
||||
\"estimated_tokens\": <int>
|
||||
}}
|
||||
|
||||
Rubric for score:
|
||||
- 0.0-0.2: Trivial (single tool call)
|
||||
- 0.2-0.4: Simple (1-3 tool calls)
|
||||
- 0.4-0.6: Moderate (3-8 tool calls)
|
||||
- 0.6-0.8: Complex (multi-file, tests, iterations)
|
||||
- 0.8-1.0: Very Complex (architecture, significant refactor)"#,
|
||||
task = task.description()
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse LLM response into Complexity struct.
|
||||
///
|
||||
/// # Postconditions
|
||||
/// - Returns valid Complexity with score in [0, 1]
|
||||
/// - Falls back to moderate complexity on parse error
|
||||
fn parse_response(&self, response: &str) -> Complexity {
|
||||
// Try to parse as JSON
|
||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(response) {
|
||||
let score = json["score"].as_f64().unwrap_or(0.5);
|
||||
let reasoning = json["reasoning"].as_str().unwrap_or("No reasoning provided");
|
||||
let estimated_tokens = json["estimated_tokens"].as_u64().unwrap_or(2000);
|
||||
|
||||
return Complexity::new(score, reasoning, estimated_tokens);
|
||||
}
|
||||
|
||||
// Try to extract score from text
|
||||
if let Some(score) = self.extract_score_from_text(response) {
|
||||
return Complexity::new(score, response, 2000);
|
||||
}
|
||||
|
||||
// Default to moderate complexity
|
||||
Complexity::moderate("Could not parse complexity response")
|
||||
}
|
||||
|
||||
/// Try to extract a score from free-form text.
|
||||
fn extract_score_from_text(&self, text: &str) -> Option<f64> {
|
||||
// Look for patterns like "0.5" or "score: 0.5" or "50%"
|
||||
let text_lower = text.to_lowercase();
|
||||
|
||||
// Check for keywords
|
||||
if text_lower.contains("trivial") || text_lower.contains("very simple") {
|
||||
return Some(0.1);
|
||||
}
|
||||
if text_lower.contains("very complex") || text_lower.contains("extremely") {
|
||||
return Some(0.9);
|
||||
}
|
||||
if text_lower.contains("complex") {
|
||||
return Some(0.7);
|
||||
}
|
||||
if text_lower.contains("moderate") || text_lower.contains("medium") {
|
||||
return Some(0.5);
|
||||
}
|
||||
if text_lower.contains("simple") || text_lower.contains("easy") {
|
||||
return Some(0.3);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Query historical context for similar tasks.
|
||||
///
|
||||
/// Returns adjustment multipliers based on past actual vs predicted values.
|
||||
async fn get_historical_adjustments(
|
||||
&self,
|
||||
task_description: &str,
|
||||
ctx: &AgentContext,
|
||||
) -> Option<HistoricalContext> {
|
||||
let memory = ctx.memory.as_ref()?;
|
||||
|
||||
match memory.retriever.get_historical_context(task_description, 5).await {
|
||||
Ok(context) => {
|
||||
if let Some(ref hist) = context {
|
||||
tracing::debug!(
|
||||
"Historical context found: {} similar tasks, avg token ratio: {:.2}, success rate: {:.2}",
|
||||
hist.similar_outcomes.len(),
|
||||
hist.avg_token_multiplier,
|
||||
hist.similar_success_rate
|
||||
);
|
||||
}
|
||||
context
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to fetch historical context: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Adjust token estimate based on historical data.
|
||||
///
|
||||
/// If similar past tasks consistently used more/fewer tokens than predicted,
|
||||
/// we adjust our estimate accordingly.
|
||||
fn apply_historical_adjustment(
|
||||
&self,
|
||||
base_tokens: u64,
|
||||
historical: Option<&HistoricalContext>,
|
||||
) -> u64 {
|
||||
match historical {
|
||||
Some(hist) if hist.similar_outcomes.len() >= 2 => {
|
||||
// Apply the historical token multiplier (clamped to reasonable range)
|
||||
let multiplier = hist.avg_token_multiplier.clamp(0.5, 3.0);
|
||||
let adjusted = (base_tokens as f64 * multiplier).round() as u64;
|
||||
|
||||
tracing::debug!(
|
||||
"Adjusted token estimate: {} -> {} (multiplier: {:.2})",
|
||||
base_tokens, adjusted, multiplier
|
||||
);
|
||||
|
||||
adjusted
|
||||
}
|
||||
_ => base_tokens,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ComplexityEstimator {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Agent for ComplexityEstimator {
|
||||
fn id(&self) -> &AgentId {
|
||||
&self.id
|
||||
}
|
||||
|
||||
fn agent_type(&self) -> AgentType {
|
||||
AgentType::ComplexityEstimator
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Estimates task complexity and recommends splitting strategy"
|
||||
}
|
||||
|
||||
/// Estimate complexity of a task.
|
||||
///
|
||||
/// # Returns
|
||||
/// AgentResult with Complexity data in the `data` field.
|
||||
///
|
||||
/// # Learning Integration
|
||||
/// When memory is available, queries similar past tasks to adjust predictions
|
||||
/// based on actual historical token usage.
|
||||
async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
|
||||
// Query historical context for similar tasks (if memory available)
|
||||
let historical = self.get_historical_adjustments(task.description(), ctx).await;
|
||||
|
||||
let prompt = self.build_prompt(task);
|
||||
|
||||
let messages = vec![
|
||||
ChatMessage::new(Role::System, "You are a precise task complexity analyzer. Respond only with JSON."),
|
||||
ChatMessage::new(Role::User, prompt),
|
||||
];
|
||||
|
||||
// Use a fast, cheap model for complexity estimation
|
||||
let model = "openai/gpt-4.1-mini";
|
||||
|
||||
let pricing = ctx.pricing.get_pricing(model).await;
|
||||
let options = ChatOptions {
|
||||
temperature: Some(0.0),
|
||||
top_p: None,
|
||||
max_tokens: Some(400),
|
||||
};
|
||||
|
||||
match ctx
|
||||
.llm
|
||||
.chat_completion_with_options(model, &messages, None, options)
|
||||
.await
|
||||
{
|
||||
Ok(response) => {
|
||||
let content = response.content.unwrap_or_default();
|
||||
let parsed = self.parse_response(&content);
|
||||
|
||||
// Apply calibrated adjustments (pure post-processing).
|
||||
let base_tokens = ((parsed.estimated_tokens() as f64) * self.token_multiplier)
|
||||
.round()
|
||||
.max(1.0) as u64;
|
||||
|
||||
// Apply historical adjustment if we have relevant data
|
||||
let adjusted_tokens = self.apply_historical_adjustment(base_tokens, historical.as_ref());
|
||||
|
||||
let should_split = parsed.score() > self.split_threshold;
|
||||
let complexity = Complexity::new(parsed.score(), parsed.reasoning(), adjusted_tokens)
|
||||
.with_split(should_split);
|
||||
|
||||
// Record analysis on the task
|
||||
{
|
||||
let a = task.analysis_mut();
|
||||
a.complexity_score = Some(complexity.score());
|
||||
a.complexity_reasoning = Some(complexity.reasoning().to_string());
|
||||
a.should_split = Some(complexity.should_split());
|
||||
a.estimated_total_tokens = Some(complexity.estimated_tokens());
|
||||
}
|
||||
|
||||
// Compute cost (if usage + pricing available)
|
||||
let cost_cents = match (&response.usage, &pricing) {
|
||||
(Some(u), Some(p)) => p.calculate_cost_cents(u.prompt_tokens, u.completion_tokens),
|
||||
_ => 1, // fallback tiny cost
|
||||
};
|
||||
|
||||
// Build historical info for response data
|
||||
let historical_info = historical.as_ref().map(|h| json!({
|
||||
"similar_tasks_found": h.similar_outcomes.len(),
|
||||
"avg_token_multiplier": h.avg_token_multiplier,
|
||||
"avg_cost_multiplier": h.avg_cost_multiplier,
|
||||
"similar_success_rate": h.similar_success_rate,
|
||||
}));
|
||||
|
||||
AgentResult::success(
|
||||
format!(
|
||||
"Complexity: {:.2} - {}{}",
|
||||
complexity.score(),
|
||||
if complexity.should_split() { "Should split" } else { "Execute directly" },
|
||||
if historical.is_some() { " (adjusted from history)" } else { "" }
|
||||
),
|
||||
cost_cents,
|
||||
)
|
||||
.with_model(model)
|
||||
.with_data(json!({
|
||||
"score": complexity.score(),
|
||||
"reasoning": complexity.reasoning(),
|
||||
"should_split": complexity.should_split(),
|
||||
"estimated_tokens": complexity.estimated_tokens(),
|
||||
"base_tokens_before_history": base_tokens,
|
||||
"historical_adjustment": historical_info,
|
||||
"usage": response.usage.as_ref().map(|u| json!({
|
||||
"prompt_tokens": u.prompt_tokens,
|
||||
"completion_tokens": u.completion_tokens,
|
||||
"total_tokens": u.total_tokens
|
||||
})),
|
||||
}))
|
||||
}
|
||||
Err(e) => {
|
||||
// On error, return moderate complexity as fallback
|
||||
let fallback = Complexity::moderate(format!("LLM error, using fallback: {}", e));
|
||||
|
||||
AgentResult::success(
|
||||
"Using fallback complexity estimate due to LLM error",
|
||||
0,
|
||||
)
|
||||
.with_data(json!({
|
||||
"score": fallback.score(),
|
||||
"reasoning": fallback.reasoning(),
|
||||
"should_split": fallback.should_split(),
|
||||
"estimated_tokens": fallback.estimated_tokens(),
|
||||
"fallback": true,
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LeafAgent for ComplexityEstimator {
|
||||
fn capability(&self) -> LeafCapability {
|
||||
LeafCapability::ComplexityEstimation
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_json_response() {
|
||||
let estimator = ComplexityEstimator::new();
|
||||
|
||||
let json_response = r#"{"score": 0.7, "reasoning": "Complex task", "estimated_tokens": 3000, "should_split": true}"#;
|
||||
let complexity = estimator.parse_response(json_response);
|
||||
|
||||
assert!((complexity.score() - 0.7).abs() < 0.01);
|
||||
assert!(complexity.should_split());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_text_response() {
|
||||
let estimator = ComplexityEstimator::new();
|
||||
|
||||
let text_response = "This is a very complex task";
|
||||
let complexity = estimator.parse_response(text_response);
|
||||
|
||||
assert!(complexity.score() > 0.6);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use serde_json::json;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::agents::{
|
||||
Agent, AgentContext, AgentId, AgentResult, AgentType, LeafAgent, LeafCapability,
|
||||
Agent, AgentContext, AgentId, AgentResult, AgentType, LeafAgent, LeafCapability, TerminalReason,
|
||||
};
|
||||
use crate::api::control::{AgentEvent, ControlRunState};
|
||||
use crate::budget::ExecutionSignals;
|
||||
@@ -43,6 +43,8 @@ pub struct ExecutionLoopResult {
|
||||
pub signals: ExecutionSignals,
|
||||
/// Whether execution succeeded
|
||||
pub success: bool,
|
||||
/// Why execution terminated (if not successful completion)
|
||||
pub terminal_reason: Option<TerminalReason>,
|
||||
}
|
||||
|
||||
/// Agent that executes tasks using tools.
|
||||
@@ -60,6 +62,33 @@ pub struct TaskExecutor {
|
||||
id: AgentId,
|
||||
}
|
||||
|
||||
/// Categorize a tool name into a broader approach category.
|
||||
/// Used for tracking repeated failures of similar approaches.
|
||||
fn categorize_tool(tool_name: &str) -> String {
|
||||
match tool_name {
|
||||
// Static analysis tools
|
||||
name if name.contains("slither") || name.contains("mythril") ||
|
||||
name.contains("solhint") || name.contains("echidna") => "static_analysis".to_string(),
|
||||
|
||||
// Code execution/compilation
|
||||
"run_command" => "shell_command".to_string(),
|
||||
name if name.contains("compile") || name.contains("build") => "compilation".to_string(),
|
||||
|
||||
// File operations
|
||||
"read_file" | "write_file" | "list_directory" | "search_files" => "file_ops".to_string(),
|
||||
|
||||
// Network/API calls
|
||||
name if name.contains("browser") || name.contains("http") ||
|
||||
name.contains("fetch") || name.contains("curl") => "network".to_string(),
|
||||
|
||||
// Git operations
|
||||
name if name.contains("git") || name.contains("clone") => "git".to_string(),
|
||||
|
||||
// Default: use the tool name itself
|
||||
_ => tool_name.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
impl TaskExecutor {
|
||||
/// Create a new task executor.
|
||||
pub fn new() -> Self {
|
||||
@@ -558,7 +587,54 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
4. **Explicit completion** — Use complete_mission tool when the goal is fully achieved
|
||||
5. **Failure acknowledgment** — If you cannot complete, explain why and call complete_mission with failed status
|
||||
6. **No silent exits** — Every execution should end with either a deliverable or an explanation
|
||||
7. **Large files in chunks** — If writing files >2000 chars, verify content isn't truncated"#,
|
||||
7. **Large files in chunks** — If writing files >2000 chars, verify content isn't truncated
|
||||
|
||||
## ⚠️ CRITICAL: Blocker Detection (STOP if these occur!)
|
||||
|
||||
**If you encounter ANY blocker, STOP IMMEDIATELY and report it. DO NOT produce placeholder content.**
|
||||
|
||||
### Type Mismatch Blockers
|
||||
| Requested | But Target Is | Action |
|
||||
|-----------|---------------|--------|
|
||||
| Solidity/Smart Contract audit | C++/Rust/Go project | STOP → `complete_mission(blocked, "Target is C++/Rust/Go, not Solidity")` |
|
||||
| Python analysis | Java/JavaScript project | STOP → `complete_mission(blocked, "Target is Java/JS, not Python")` |
|
||||
| Web scraping | Desktop app | STOP → `complete_mission(blocked, "Target is desktop app, not website")` |
|
||||
|
||||
**How to detect project types:**
|
||||
- **Solidity**: `.sol` files, `hardhat.config.js`, `truffle-config.js`, `foundry.toml`
|
||||
- **C++ (Bitcoin forks)**: `configure.ac`, `Makefile.am`, `src/*.cpp`, `src/*.h`
|
||||
- **Rust**: `Cargo.toml`, `src/*.rs`
|
||||
- **Go**: `go.mod`, `*.go` files
|
||||
|
||||
### Access/Resource Blockers
|
||||
| Blocker | Action |
|
||||
|---------|--------|
|
||||
| Can't clone/access repository | STOP → report exact error |
|
||||
| Can't fetch contract bytecode | STOP → report RPC error and address |
|
||||
| Required tool won't install | STOP → report installation error |
|
||||
| Source code not available | TRY bytecode analysis first, then report if still blocked |
|
||||
|
||||
### Smart Contract Audit Specific
|
||||
**When auditing contracts WITHOUT source code:**
|
||||
1. FIRST try fetching bytecode: `cast code <address> --rpc-url <rpc>`
|
||||
2. THEN decompile: Use `heimdall`, `panoramix`, `dedaub`
|
||||
3. ONLY report "blocked" if bytecode analysis also fails
|
||||
|
||||
**Chain RPCs:**
|
||||
- Ethereum: `https://eth.llamarpc.com`
|
||||
- BSC: `https://bsc-dataseed.binance.org`
|
||||
- Polygon: `https://polygon-rpc.com`
|
||||
- Merlin: `https://rpc.merlinchain.io`
|
||||
|
||||
## 🚫 NEVER DO THESE
|
||||
|
||||
1. **NEVER create "example" or "illustrative" content** as substitute for real analysis
|
||||
2. **NEVER analyze unrelated code** (e.g., library code instead of target contracts)
|
||||
3. **NEVER produce generic filler** (e.g., "SQL injection" in a smart contract audit)
|
||||
4. **NEVER frame placeholder content as real analysis**
|
||||
5. **NEVER mark "completed" if you analyzed substitute targets**
|
||||
|
||||
If you cannot perform the requested analysis, use `complete_mission(blocked, reason)` and explain clearly what blocked you."#,
|
||||
session_metadata = session_metadata,
|
||||
memory_context = memory_context,
|
||||
working_dir = working_dir,
|
||||
@@ -622,6 +698,15 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
let mut has_error_messages = false;
|
||||
let mut iterations_completed = 0u32;
|
||||
|
||||
// Track consecutive empty/reasoning-only responses (P0 fix for agent stalls)
|
||||
let mut empty_response_count: u32 = 0;
|
||||
const EMPTY_RESPONSE_WARNING_THRESHOLD: u32 = 2;
|
||||
const EMPTY_RESPONSE_FORCE_COMPLETE_THRESHOLD: u32 = 4;
|
||||
|
||||
// Track failed tool attempts by category (P3 fix for approach looping)
|
||||
let mut failed_tool_attempts: std::collections::HashMap<String, u32> = std::collections::HashMap::new();
|
||||
const TOOL_FAILURE_THRESHOLD: u32 = 3;
|
||||
|
||||
// Track uploaded images that need to be included in the response
|
||||
// When upload_image succeeds, we store the (url, markdown) so we can warn
|
||||
// the agent if they try to complete without including the images.
|
||||
@@ -701,8 +786,25 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
tracing::info!("Browser tools available: {:?}", browser_tools);
|
||||
}
|
||||
|
||||
tracing::info!("Discovered {} built-in tools, {} MCP tools", builtin_count, mcp_tool_schemas.len());
|
||||
tool_schemas.extend(mcp_tool_schemas);
|
||||
// Filter out MCP tools that conflict with built-in tools (built-in takes precedence)
|
||||
let builtin_names: std::collections::HashSet<_> = tool_schemas.iter().map(|t| t.function.name.as_str()).collect();
|
||||
let mcp_count_before = mcp_tool_schemas.len();
|
||||
let filtered_mcp: Vec<_> = mcp_tool_schemas
|
||||
.into_iter()
|
||||
.filter(|t| {
|
||||
if builtin_names.contains(t.function.name.as_str()) {
|
||||
tracing::debug!("Skipping MCP tool '{}' - conflicts with built-in tool", t.function.name);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let mcp_skipped = mcp_count_before - filtered_mcp.len();
|
||||
|
||||
tracing::info!("Discovered {} built-in tools, {} MCP tools ({} skipped due to conflicts)",
|
||||
builtin_count, filtered_mcp.len(), mcp_skipped);
|
||||
tool_schemas.extend(filtered_mcp);
|
||||
|
||||
// Agent loop
|
||||
for iteration in 0..ctx.max_iterations {
|
||||
@@ -734,6 +836,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
usage,
|
||||
signals,
|
||||
success: false,
|
||||
terminal_reason: Some(TerminalReason::Cancelled),
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -762,13 +865,19 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
usage,
|
||||
signals,
|
||||
success: false,
|
||||
terminal_reason: Some(TerminalReason::BudgetExhausted),
|
||||
};
|
||||
}
|
||||
|
||||
// Call LLM
|
||||
let response = match ctx.llm.chat_completion(model, &messages, Some(&tool_schemas)).await {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
// Call LLM with timeout (P2 fix: detect hangs)
|
||||
const LLM_TIMEOUT_SECS: u64 = 300; // 5 minutes max per LLM call
|
||||
let llm_future = ctx.llm.chat_completion(model, &messages, Some(&tool_schemas));
|
||||
let response = match tokio::time::timeout(
|
||||
std::time::Duration::from_secs(LLM_TIMEOUT_SECS),
|
||||
llm_future
|
||||
).await {
|
||||
Ok(Ok(r)) => r,
|
||||
Ok(Err(e)) => {
|
||||
has_error_messages = true;
|
||||
let error_msg = format!("LLM error: {}", e);
|
||||
let signals = ExecutionSignals {
|
||||
@@ -792,6 +901,45 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
usage,
|
||||
signals,
|
||||
success: false,
|
||||
terminal_reason: Some(TerminalReason::LlmError),
|
||||
};
|
||||
}
|
||||
Err(_timeout) => {
|
||||
// P2 FIX: LLM call timed out - return with partial results
|
||||
has_error_messages = true;
|
||||
tracing::error!(
|
||||
"LLM call timed out after {} seconds at iteration {}",
|
||||
LLM_TIMEOUT_SECS,
|
||||
iterations_completed
|
||||
);
|
||||
let signals = ExecutionSignals {
|
||||
iterations: iterations_completed,
|
||||
max_iterations: ctx.max_iterations as u32,
|
||||
successful_tool_calls,
|
||||
failed_tool_calls,
|
||||
files_modified,
|
||||
repetitive_actions,
|
||||
has_error_messages,
|
||||
partial_progress: files_modified || successful_tool_calls > 0,
|
||||
cost_spent_cents: total_cost_cents,
|
||||
budget_total_cents: task.budget().total_cents(),
|
||||
final_output: format!(
|
||||
"LLM call timed out after {} seconds. Partial results may be in working directory.",
|
||||
LLM_TIMEOUT_SECS
|
||||
),
|
||||
model_used: model.to_string(),
|
||||
};
|
||||
return ExecutionLoopResult {
|
||||
output: format!(
|
||||
"Agent stalled: LLM call timed out after {} seconds. Check working directory for partial results.",
|
||||
LLM_TIMEOUT_SECS
|
||||
),
|
||||
cost_cents: total_cost_cents,
|
||||
tool_log,
|
||||
usage,
|
||||
signals,
|
||||
success: false,
|
||||
terminal_reason: Some(TerminalReason::Stalled),
|
||||
};
|
||||
}
|
||||
};
|
||||
@@ -907,6 +1055,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
usage,
|
||||
signals,
|
||||
success: false,
|
||||
terminal_reason: Some(TerminalReason::InfiniteLoop),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -999,6 +1148,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
usage,
|
||||
signals,
|
||||
success: false,
|
||||
terminal_reason: Some(TerminalReason::Cancelled),
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1069,7 +1219,28 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
Err(e) => {
|
||||
failed_tool_calls += 1;
|
||||
has_error_messages = true;
|
||||
let s = format!("Error: {}", e);
|
||||
|
||||
// P3 FIX: Track failed approaches by tool category
|
||||
let tool_category = categorize_tool(&tool_name);
|
||||
let count = failed_tool_attempts.entry(tool_category.clone()).or_insert(0);
|
||||
*count += 1;
|
||||
|
||||
let s = if *count >= TOOL_FAILURE_THRESHOLD {
|
||||
tracing::warn!(
|
||||
"Tool category '{}' has failed {} times - suggesting pivot",
|
||||
tool_category,
|
||||
*count
|
||||
);
|
||||
format!(
|
||||
"Error: {}\n\n[SYSTEM NOTE: The '{}' approach has failed {} times. \
|
||||
Consider: 1) Try a completely different tool/approach, \
|
||||
2) Analyze what you DO have and produce partial results, \
|
||||
3) Call complete_mission(blocked) if fundamentally stuck]",
|
||||
e, tool_category, *count
|
||||
)
|
||||
} else {
|
||||
format!("Error: {}", e)
|
||||
};
|
||||
(s.clone(), serde_json::Value::String(s))
|
||||
}
|
||||
}
|
||||
@@ -1205,14 +1376,92 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
usage,
|
||||
signals,
|
||||
success: true,
|
||||
terminal_reason: None,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Reset empty response counter on successful tool execution
|
||||
empty_response_count = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// P0 FIX: Handle reasoning-only responses (no tool calls, no/empty content)
|
||||
// This prevents the agent from stalling when the LLM returns only thinking
|
||||
let has_reasoning = response.reasoning.as_ref().map(|r| !r.is_empty()).unwrap_or(false);
|
||||
let has_content = response.content.as_ref().map(|c| !c.trim().is_empty()).unwrap_or(false);
|
||||
let has_tool_calls = response.tool_calls.as_ref().map(|tc| !tc.is_empty()).unwrap_or(false);
|
||||
|
||||
if !has_tool_calls && !has_content {
|
||||
empty_response_count += 1;
|
||||
tracing::warn!(
|
||||
"Empty/reasoning-only response #{} (has_reasoning: {}, iteration: {})",
|
||||
empty_response_count,
|
||||
has_reasoning,
|
||||
iterations_completed
|
||||
);
|
||||
|
||||
// Force completion if too many empty responses
|
||||
if empty_response_count >= EMPTY_RESPONSE_FORCE_COMPLETE_THRESHOLD {
|
||||
tracing::error!(
|
||||
"Force completing: {} consecutive empty/reasoning-only responses",
|
||||
empty_response_count
|
||||
);
|
||||
has_error_messages = true;
|
||||
let signals = ExecutionSignals {
|
||||
iterations: iterations_completed,
|
||||
max_iterations: ctx.max_iterations as u32,
|
||||
successful_tool_calls,
|
||||
failed_tool_calls,
|
||||
files_modified,
|
||||
repetitive_actions,
|
||||
has_error_messages,
|
||||
partial_progress: files_modified || successful_tool_calls > 0,
|
||||
cost_spent_cents: total_cost_cents,
|
||||
budget_total_cents: task.budget().total_cents(),
|
||||
final_output: format!(
|
||||
"Agent stalled: {} consecutive responses without action. Partial results may be in working directory.",
|
||||
empty_response_count
|
||||
),
|
||||
model_used: model.to_string(),
|
||||
};
|
||||
return ExecutionLoopResult {
|
||||
output: format!(
|
||||
"Agent stalled after {} responses without taking action. Check working directory for partial results.",
|
||||
empty_response_count
|
||||
),
|
||||
cost_cents: total_cost_cents,
|
||||
tool_log,
|
||||
usage,
|
||||
signals,
|
||||
success: false,
|
||||
terminal_reason: Some(TerminalReason::Stalled),
|
||||
};
|
||||
}
|
||||
|
||||
// Inject a prompt to get the model to take action
|
||||
if empty_response_count >= EMPTY_RESPONSE_WARNING_THRESHOLD {
|
||||
messages.push(ChatMessage::new(
|
||||
Role::User,
|
||||
format!(
|
||||
"[SYSTEM WARNING] You've returned {} responses without taking any action (only thinking/reasoning).\n\n\
|
||||
You MUST now do ONE of:\n\
|
||||
1. Call a tool to continue working on the task\n\
|
||||
2. Provide a complete final response summarizing your work\n\
|
||||
3. Call complete_mission with status='completed' if done, or status='blocked' if stuck\n\n\
|
||||
DO NOT respond with only thinking - take concrete action NOW.",
|
||||
empty_response_count
|
||||
)
|
||||
));
|
||||
}
|
||||
|
||||
continue; // Retry - let the model try again with the warning
|
||||
}
|
||||
|
||||
// If we reach here with content, it's the final response
|
||||
// (no need to reset empty_response_count since we're returning)
|
||||
|
||||
// No tool calls - final response
|
||||
if let Some(content) = response.content.filter(|c| !c.trim().is_empty()) {
|
||||
let signals = ExecutionSignals {
|
||||
@@ -1236,6 +1485,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
usage,
|
||||
signals,
|
||||
success: true,
|
||||
terminal_reason: None,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1262,6 +1512,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
usage,
|
||||
signals,
|
||||
success: false,
|
||||
terminal_reason: Some(TerminalReason::LlmError),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1287,6 +1538,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
usage,
|
||||
signals,
|
||||
success: false,
|
||||
terminal_reason: Some(TerminalReason::MaxIterations),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1350,6 +1602,11 @@ impl Agent for TaskExecutor {
|
||||
AgentResult::failure(&result.output, result.cost_cents)
|
||||
};
|
||||
|
||||
// Propagate terminal reason from execution loop
|
||||
if let Some(reason) = result.terminal_reason {
|
||||
agent_result = agent_result.with_terminal_reason(reason);
|
||||
}
|
||||
|
||||
agent_result = agent_result
|
||||
.with_model(model)
|
||||
.with_data(json!({
|
||||
@@ -1415,6 +1672,11 @@ impl TaskExecutor {
|
||||
AgentResult::failure(&result.output, result.cost_cents)
|
||||
};
|
||||
|
||||
// Propagate terminal reason from execution loop
|
||||
if let Some(reason) = result.terminal_reason {
|
||||
agent_result = agent_result.with_terminal_reason(reason);
|
||||
}
|
||||
|
||||
agent_result = agent_result
|
||||
.with_model(model)
|
||||
.with_data(json!({
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
//! Leaf agents - specialized agents that do actual work.
|
||||
//!
|
||||
//! # Leaf Agent Types
|
||||
//! - `ComplexityEstimator`: Estimates task complexity (0-1 score)
|
||||
//! - `ModelSelector`: Selects optimal model for task/budget
|
||||
//! # Active Leaf Agent
|
||||
//! - `TaskExecutor`: Executes tasks using tools (main worker)
|
||||
//! - `Verifier`: Validates task completion
|
||||
//!
|
||||
//! # Removed Agents (superseded by SimpleAgent)
|
||||
//! - `ComplexityEstimator`: Was unreliable (LLM-based estimation)
|
||||
//! - `ModelSelector`: Was over-engineered (U-curve optimization)
|
||||
//! - `Verifier`: Was ineffective (rubber-stamped everything)
|
||||
|
||||
mod complexity;
|
||||
mod model_select;
|
||||
mod executor;
|
||||
mod verifier;
|
||||
|
||||
pub use complexity::{ComplexityEstimator, ComplexityPromptVariant};
|
||||
pub use model_select::ModelSelector;
|
||||
pub use executor::{TaskExecutor, ExecutionLoopResult};
|
||||
pub use verifier::Verifier;
|
||||
|
||||
|
||||
@@ -1,766 +0,0 @@
|
||||
//! Model selection agent with U-curve cost optimization.
|
||||
//!
|
||||
//! # U-Curve Optimization
|
||||
//! The total expected cost follows a U-shaped curve:
|
||||
//! - Cheap models: Low per-token cost, but may fail/retry, use more tokens
|
||||
//! - Expensive models: High per-token cost, but succeed more often
|
||||
//! - Optimal: Somewhere in the middle, minimizing total expected cost
|
||||
//!
|
||||
//! # Cost Model
|
||||
//! Expected Cost = base_cost * (1 + failure_rate * retry_multiplier) * token_efficiency
|
||||
//!
|
||||
//! # Benchmark Integration
|
||||
//! When benchmark data is available, uses actual benchmark scores (from llm-stats.com)
|
||||
//! for task-type-specific capability estimation instead of price-based heuristics.
|
||||
//!
|
||||
//! # Learning Integration
|
||||
//! When memory is available, uses historical model statistics (actual success rates,
|
||||
//! cost ratios) instead of pure heuristics.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::agents::{
|
||||
Agent, AgentContext, AgentId, AgentResult, AgentType, LeafAgent, LeafCapability,
|
||||
};
|
||||
use crate::budget::{PricingInfo, TaskType};
|
||||
use crate::memory::ModelStats;
|
||||
use crate::task::Task;
|
||||
|
||||
/// Agent that selects the optimal model for a task.
|
||||
///
|
||||
/// # Algorithm
|
||||
/// 1. Get task complexity and budget constraints
|
||||
/// 2. Fetch available models and pricing
|
||||
/// 3. For each model, calculate expected total cost
|
||||
/// 4. Return model with minimum expected cost within budget
|
||||
pub struct ModelSelector {
|
||||
id: AgentId,
|
||||
retry_multiplier: f64,
|
||||
inefficiency_scale: f64,
|
||||
max_failure_probability: f64,
|
||||
}
|
||||
|
||||
/// Model recommendation from the selector.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ModelRecommendation {
|
||||
/// Recommended model ID
|
||||
pub model_id: String,
|
||||
|
||||
/// Expected cost in cents
|
||||
pub expected_cost_cents: u64,
|
||||
|
||||
/// Confidence in this recommendation (0-1)
|
||||
pub confidence: f64,
|
||||
|
||||
/// Reasoning for the selection
|
||||
pub reasoning: String,
|
||||
|
||||
/// Alternative models if primary fails
|
||||
pub fallbacks: Vec<String>,
|
||||
|
||||
/// Whether historical data was used for this selection
|
||||
pub used_historical_data: bool,
|
||||
|
||||
/// Whether benchmark data was used for capability estimation
|
||||
pub used_benchmark_data: bool,
|
||||
|
||||
/// Inferred task type
|
||||
pub task_type: Option<TaskType>,
|
||||
}
|
||||
|
||||
impl ModelSelector {
|
||||
/// Create a new model selector.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
id: AgentId::new(),
|
||||
retry_multiplier: 1.5,
|
||||
inefficiency_scale: 0.5,
|
||||
max_failure_probability: 0.9,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a selector with calibrated parameters.
|
||||
pub fn with_params(retry_multiplier: f64, inefficiency_scale: f64, max_failure_probability: f64) -> Self {
|
||||
Self {
|
||||
id: AgentId::new(),
|
||||
retry_multiplier: retry_multiplier.max(1.0),
|
||||
inefficiency_scale: inefficiency_scale.max(0.0),
|
||||
max_failure_probability: max_failure_probability.clamp(0.0, 0.99),
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate expected cost for a model given task complexity.
|
||||
///
|
||||
/// # Formula
|
||||
/// ```text
|
||||
/// expected_cost = base_cost * (1 + failure_prob * retry_cost) * inefficiency_factor
|
||||
/// ```
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `pricing`: Model pricing info
|
||||
/// - `complexity`: Task complexity (0-1)
|
||||
/// - `estimated_tokens`: Estimated tokens needed
|
||||
/// - `capability`: Model capability score (0-1), from benchmarks or price heuristic
|
||||
///
|
||||
/// # Returns
|
||||
/// Expected cost in cents
|
||||
///
|
||||
/// # Pure Function
|
||||
/// No side effects, deterministic output.
|
||||
fn calculate_expected_cost_with_capability(
|
||||
&self,
|
||||
pricing: &PricingInfo,
|
||||
complexity: f64,
|
||||
estimated_tokens: u64,
|
||||
capability: f64,
|
||||
from_benchmarks: bool,
|
||||
) -> ExpectedCost {
|
||||
// Failure probability: higher complexity + lower capability = more failures
|
||||
// Formula: P(fail) = complexity * (1 - capability)
|
||||
let failure_prob = (complexity * (1.0 - capability)).clamp(0.0, self.max_failure_probability);
|
||||
|
||||
// Token inefficiency: weaker models need more tokens
|
||||
// Formula: inefficiency = 1 + (1 - capability) * 0.5
|
||||
let inefficiency = 1.0 + (1.0 - capability) * self.inefficiency_scale;
|
||||
|
||||
// Retry cost: if it fails, we pay again (possibly with a better model)
|
||||
let retry_multiplier = self.retry_multiplier;
|
||||
|
||||
// Base cost for estimated tokens
|
||||
let input_tokens = estimated_tokens / 2;
|
||||
let output_tokens = estimated_tokens / 2;
|
||||
let base_cost = pricing.calculate_cost_cents(input_tokens, output_tokens);
|
||||
|
||||
// Adjusted for inefficiency (weak models use more tokens)
|
||||
let adjusted_tokens = ((estimated_tokens as f64) * inefficiency) as u64;
|
||||
let adjusted_cost = pricing.calculate_cost_cents(adjusted_tokens / 2, adjusted_tokens / 2);
|
||||
|
||||
// Expected cost including retry probability
|
||||
let expected_cost = (adjusted_cost as f64) * (1.0 + failure_prob * retry_multiplier);
|
||||
|
||||
ExpectedCost {
|
||||
model_id: pricing.model_id.clone(),
|
||||
base_cost_cents: base_cost,
|
||||
expected_cost_cents: expected_cost.ceil() as u64,
|
||||
failure_probability: failure_prob,
|
||||
capability,
|
||||
inefficiency,
|
||||
from_benchmarks,
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate expected cost using price-based capability (fallback).
|
||||
fn calculate_expected_cost(
|
||||
&self,
|
||||
pricing: &PricingInfo,
|
||||
complexity: f64,
|
||||
estimated_tokens: u64,
|
||||
) -> ExpectedCost {
|
||||
let avg_cost = pricing.average_cost_per_token();
|
||||
let capability = self.estimate_capability_from_price(avg_cost);
|
||||
self.calculate_expected_cost_with_capability(pricing, complexity, estimated_tokens, capability, false)
|
||||
}
|
||||
|
||||
/// Estimate model capability from its cost (fallback heuristic).
|
||||
///
|
||||
/// # Heuristic
|
||||
/// More expensive models are generally more capable.
|
||||
/// Uses log scale to normalize across price ranges.
|
||||
///
|
||||
/// # Returns
|
||||
/// Capability score 0-1
|
||||
fn estimate_capability_from_price(&self, avg_cost_per_token: f64) -> f64 {
|
||||
// Cost tiers (per token):
|
||||
// < 0.0001: weak (capability ~0.3)
|
||||
// 0.0001-0.001: moderate (capability ~0.6)
|
||||
// > 0.001: strong (capability ~0.9)
|
||||
|
||||
if avg_cost_per_token < 0.0000001 {
|
||||
return 0.3; // Free/very cheap
|
||||
}
|
||||
|
||||
// Log scale normalization
|
||||
let log_cost = avg_cost_per_token.log10();
|
||||
// Map from ~-7 (cheap) to ~-3 (expensive) => 0.3 to 0.95
|
||||
let normalized = ((log_cost + 7.0) / 4.0).clamp(0.0, 1.0);
|
||||
|
||||
0.3 + normalized * 0.65
|
||||
}
|
||||
|
||||
/// Get model capability from benchmarks (preferred) or fall back to price heuristic.
|
||||
///
|
||||
/// # Benchmark-Based Capability
|
||||
/// Uses actual benchmark scores from llm-stats.com when available.
|
||||
/// This provides task-type-specific capability estimation.
|
||||
async fn get_capability(
|
||||
&self,
|
||||
model_id: &str,
|
||||
task_type: TaskType,
|
||||
avg_cost_per_token: f64,
|
||||
ctx: &AgentContext,
|
||||
) -> (f64, bool) {
|
||||
// Try to get benchmark-based capability
|
||||
if let Some(benchmarks) = &ctx.benchmarks {
|
||||
let registry = benchmarks.read().await;
|
||||
if let Some(model) = registry.get(model_id) {
|
||||
if model.has_benchmarks() {
|
||||
let capability = model.capability(task_type);
|
||||
tracing::info!(
|
||||
"Using benchmark capability for {}: {:.3} (task_type: {:?})",
|
||||
model_id, capability, task_type
|
||||
);
|
||||
return (capability, true); // (capability, from_benchmarks)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to price-based heuristic
|
||||
let capability = self.estimate_capability_from_price(avg_cost_per_token);
|
||||
tracing::debug!(
|
||||
"Using price-based capability for {}: {:.3} (avg_cost: {:.10})",
|
||||
model_id, capability, avg_cost_per_token
|
||||
);
|
||||
(capability, false)
|
||||
}
|
||||
|
||||
/// Select optimal model from available options.
|
||||
///
|
||||
/// # Algorithm
|
||||
/// 1. Calculate expected cost for each model using benchmark capabilities when available
|
||||
/// 2. If user requested a specific model, use it as minimum capability floor
|
||||
/// 3. Filter models exceeding budget
|
||||
/// 4. Select model with minimum expected cost
|
||||
/// 5. Include fallbacks in case of failure
|
||||
///
|
||||
/// # Preconditions
|
||||
/// - `models` is non-empty
|
||||
/// - `budget_cents > 0`
|
||||
async fn select_optimal(
|
||||
&self,
|
||||
models: &[PricingInfo],
|
||||
complexity: f64,
|
||||
estimated_tokens: u64,
|
||||
budget_cents: u64,
|
||||
task_type: TaskType,
|
||||
historical_stats: Option<&HashMap<String, ModelStats>>,
|
||||
requested_model: Option<&str>,
|
||||
ctx: &AgentContext,
|
||||
) -> Option<ModelRecommendation> {
|
||||
if models.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Calculate expected cost for all models, using benchmark or historical stats when available
|
||||
let mut costs: Vec<ExpectedCost> = Vec::with_capacity(models.len());
|
||||
let mut any_from_benchmarks = false;
|
||||
|
||||
for m in models {
|
||||
let cost = if let Some(stats) = historical_stats.and_then(|h| h.get(&m.model_id)) {
|
||||
// Use historical data if available (highest priority)
|
||||
self.calculate_expected_cost_with_history(m, complexity, estimated_tokens, stats)
|
||||
} else {
|
||||
// Use benchmark data for capability
|
||||
let (capability, from_benchmarks) = self.get_capability(
|
||||
&m.model_id,
|
||||
task_type,
|
||||
m.average_cost_per_token(),
|
||||
ctx,
|
||||
).await;
|
||||
|
||||
if from_benchmarks {
|
||||
any_from_benchmarks = true;
|
||||
}
|
||||
|
||||
self.calculate_expected_cost_with_capability(
|
||||
m, complexity, estimated_tokens, capability, from_benchmarks
|
||||
)
|
||||
};
|
||||
costs.push(cost);
|
||||
}
|
||||
|
||||
// Sort by expected cost (ascending)
|
||||
costs.sort_by(|a, b| {
|
||||
a.expected_cost_cents
|
||||
.cmp(&b.expected_cost_cents)
|
||||
});
|
||||
|
||||
// If user requested a specific model, use it as minimum capability floor
|
||||
// Filter out models with lower capability than the requested one
|
||||
let min_capability = if let Some(req_model) = requested_model {
|
||||
// Find the requested model's capability
|
||||
if let Some(req_cost) = costs.iter().find(|c| c.model_id == req_model) {
|
||||
tracing::info!(
|
||||
"Using requested model {} as capability floor: {:.3}",
|
||||
req_model,
|
||||
req_cost.capability
|
||||
);
|
||||
req_cost.capability
|
||||
} else {
|
||||
// Requested model not found - fall back to looking up its price
|
||||
if let Some(req_pricing) = models.iter().find(|m| m.model_id == req_model) {
|
||||
let cap = self.estimate_capability_from_price(req_pricing.average_cost_per_token());
|
||||
tracing::info!(
|
||||
"Requested model {} not in costs list, using price-based capability: {:.3}",
|
||||
req_model,
|
||||
cap
|
||||
);
|
||||
cap
|
||||
} else {
|
||||
// Model not found at all, use a reasonable floor (0.7 = mid-tier)
|
||||
tracing::warn!(
|
||||
"Requested model {} not found, using default capability floor 0.7",
|
||||
req_model
|
||||
);
|
||||
0.7
|
||||
}
|
||||
}
|
||||
} else {
|
||||
0.0 // No minimum
|
||||
};
|
||||
|
||||
// Filter to models meeting minimum capability
|
||||
let filtered_costs: Vec<_> = if min_capability > 0.0 {
|
||||
costs.iter()
|
||||
.filter(|c| c.capability >= min_capability * 0.95) // Allow 5% tolerance
|
||||
.cloned()
|
||||
.collect()
|
||||
} else {
|
||||
costs.clone()
|
||||
};
|
||||
|
||||
let costs_to_use = if filtered_costs.is_empty() {
|
||||
tracing::warn!("No models meet minimum capability {:.2}, using all models", min_capability);
|
||||
&costs
|
||||
} else {
|
||||
&filtered_costs
|
||||
};
|
||||
|
||||
// Find cheapest model within budget
|
||||
let within_budget: Vec<_> = costs_to_use
|
||||
.iter()
|
||||
.filter(|c| c.expected_cost_cents <= budget_cents)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
let selected = within_budget.first().cloned().or_else(|| costs_to_use.first().cloned())?;
|
||||
|
||||
// Get fallback models (next best options)
|
||||
let fallbacks: Vec<String> = costs
|
||||
.iter()
|
||||
.filter(|c| c.model_id != selected.model_id)
|
||||
.take(2)
|
||||
.map(|c| c.model_id.clone())
|
||||
.collect();
|
||||
|
||||
let used_history = historical_stats.and_then(|h| h.get(&selected.model_id)).is_some();
|
||||
|
||||
let recommendation = ModelRecommendation {
|
||||
model_id: selected.model_id.clone(),
|
||||
expected_cost_cents: selected.expected_cost_cents,
|
||||
confidence: 1.0 - selected.failure_probability,
|
||||
reasoning: format!(
|
||||
"Selected {} for {:?} task with expected cost {} cents (capability: {:.2}, failure prob: {:.2}){}{}",
|
||||
selected.model_id,
|
||||
task_type,
|
||||
selected.expected_cost_cents,
|
||||
selected.capability,
|
||||
selected.failure_probability,
|
||||
if used_history { " [historical]" } else { "" },
|
||||
if selected.from_benchmarks { " [benchmark]" } else { "" }
|
||||
),
|
||||
fallbacks,
|
||||
used_historical_data: used_history,
|
||||
used_benchmark_data: selected.from_benchmarks,
|
||||
task_type: Some(task_type),
|
||||
};
|
||||
|
||||
tracing::info!(
|
||||
"Model selected: {} (task: {:?}, cost: {} cents, benchmark_data: {}, history: {})",
|
||||
recommendation.model_id,
|
||||
task_type,
|
||||
recommendation.expected_cost_cents,
|
||||
recommendation.used_benchmark_data,
|
||||
recommendation.used_historical_data
|
||||
);
|
||||
|
||||
Some(recommendation)
|
||||
}
|
||||
|
||||
/// Calculate expected cost using actual historical statistics.
|
||||
///
|
||||
/// This uses real success rates and cost ratios from past executions
|
||||
/// instead of heuristic estimates.
|
||||
fn calculate_expected_cost_with_history(
|
||||
&self,
|
||||
pricing: &PricingInfo,
|
||||
_complexity: f64,
|
||||
estimated_tokens: u64,
|
||||
stats: &ModelStats,
|
||||
) -> ExpectedCost {
|
||||
// Use actual failure rate from history (inverted success rate)
|
||||
let failure_prob = (1.0 - stats.success_rate).clamp(0.0, self.max_failure_probability);
|
||||
|
||||
// Use actual token ratio from history for inefficiency
|
||||
let inefficiency = stats.avg_token_ratio.clamp(0.5, 3.0);
|
||||
|
||||
// Base cost for estimated tokens
|
||||
let input_tokens = estimated_tokens / 2;
|
||||
let output_tokens = estimated_tokens / 2;
|
||||
let base_cost = pricing.calculate_cost_cents(input_tokens, output_tokens);
|
||||
|
||||
// Adjust for actual inefficiency
|
||||
let adjusted_tokens = ((estimated_tokens as f64) * inefficiency) as u64;
|
||||
let adjusted_cost = pricing.calculate_cost_cents(adjusted_tokens / 2, adjusted_tokens / 2);
|
||||
|
||||
// Apply actual cost ratio (how much more/less than predicted)
|
||||
let cost_with_ratio = (adjusted_cost as f64) * stats.avg_cost_ratio.clamp(0.5, 3.0);
|
||||
|
||||
// Expected cost including retry probability
|
||||
let expected_cost = cost_with_ratio * (1.0 + failure_prob * self.retry_multiplier);
|
||||
|
||||
// Capability estimated from success rate rather than price
|
||||
let capability = stats.success_rate.clamp(0.3, 0.95);
|
||||
|
||||
ExpectedCost {
|
||||
model_id: pricing.model_id.clone(),
|
||||
base_cost_cents: base_cost,
|
||||
expected_cost_cents: expected_cost.ceil() as u64,
|
||||
failure_probability: failure_prob,
|
||||
capability,
|
||||
inefficiency,
|
||||
from_benchmarks: false, // Historical data is not benchmark data
|
||||
}
|
||||
}
|
||||
|
||||
/// Query historical model stats from memory.
|
||||
async fn get_historical_model_stats(
|
||||
&self,
|
||||
complexity: f64,
|
||||
ctx: &AgentContext,
|
||||
) -> Option<HashMap<String, ModelStats>> {
|
||||
let memory = ctx.memory.as_ref()?;
|
||||
|
||||
// Query stats for models at similar complexity levels (+/- 0.2)
|
||||
match memory.retriever.get_model_stats(complexity, 0.2).await {
|
||||
Ok(stats) if !stats.is_empty() => {
|
||||
tracing::debug!(
|
||||
"Found historical stats for {} models at complexity ~{:.2}",
|
||||
stats.len(),
|
||||
complexity
|
||||
);
|
||||
|
||||
// Convert to HashMap for easy lookup
|
||||
Some(stats.into_iter()
|
||||
.map(|s| (s.model_id.clone(), s))
|
||||
.collect())
|
||||
}
|
||||
Ok(_) => {
|
||||
tracing::debug!("No historical stats found for complexity ~{:.2}", complexity);
|
||||
None
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to fetch model stats: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Intermediate calculation result for a model.
|
||||
#[derive(Debug, Clone)]
|
||||
struct ExpectedCost {
|
||||
model_id: String,
|
||||
#[allow(dead_code)]
|
||||
base_cost_cents: u64,
|
||||
expected_cost_cents: u64,
|
||||
failure_probability: f64,
|
||||
capability: f64,
|
||||
#[allow(dead_code)]
|
||||
inefficiency: f64,
|
||||
/// Whether capability was derived from benchmark data
|
||||
from_benchmarks: bool,
|
||||
}
|
||||
|
||||
impl Default for ModelSelector {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Agent for ModelSelector {
|
||||
fn id(&self) -> &AgentId {
|
||||
&self.id
|
||||
}
|
||||
|
||||
fn agent_type(&self) -> AgentType {
|
||||
AgentType::ModelSelector
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Selects optimal model for task based on complexity and budget (U-curve optimization)"
|
||||
}
|
||||
|
||||
/// Select the optimal model for a task.
|
||||
///
|
||||
/// # Expected Input
|
||||
/// Task should have complexity data in its context (from ComplexityEstimator).
|
||||
///
|
||||
/// # Returns
|
||||
/// AgentResult with ModelRecommendation in the `data` field.
|
||||
///
|
||||
/// # Benchmark Integration
|
||||
/// When benchmark data is available, uses actual benchmark scores for
|
||||
/// task-type-specific capability estimation.
|
||||
///
|
||||
/// # Learning Integration
|
||||
/// When memory is available, queries historical model statistics and uses
|
||||
/// actual success rates/cost ratios instead of heuristics.
|
||||
async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
|
||||
// Get complexity + estimated tokens from task analysis (populated by ComplexityEstimator).
|
||||
let complexity = task
|
||||
.analysis()
|
||||
.complexity_score
|
||||
.unwrap_or(0.5)
|
||||
.clamp(0.0, 1.0);
|
||||
let estimated_tokens = task.analysis().estimated_total_tokens.unwrap_or(2000_u64);
|
||||
|
||||
// Infer task type from description for benchmark-based selection
|
||||
let task_type = TaskType::infer_from_description(task.description());
|
||||
|
||||
// Get available budget
|
||||
let budget_cents = task.budget().remaining_cents();
|
||||
|
||||
// Query historical model stats (if memory available)
|
||||
let historical_stats = self.get_historical_model_stats(complexity, ctx).await;
|
||||
|
||||
// Fetch pricing for tool-supporting models only
|
||||
let models = ctx.pricing.models_by_cost_filtered(true).await;
|
||||
|
||||
if models.is_empty() {
|
||||
// Fall back to configured default model (after resolving to latest)
|
||||
let default_model = if let Some(resolver) = &ctx.resolver {
|
||||
let resolver = resolver.read().await;
|
||||
let resolved = resolver.resolve(&ctx.config.default_model);
|
||||
if resolved.upgraded {
|
||||
tracing::info!(
|
||||
"Default model auto-upgraded: {} → {}",
|
||||
resolved.original, resolved.resolved
|
||||
);
|
||||
}
|
||||
resolved.resolved
|
||||
} else {
|
||||
ctx.config.default_model.clone()
|
||||
};
|
||||
|
||||
// Record on task analysis
|
||||
{
|
||||
let a = task.analysis_mut();
|
||||
a.selected_model = Some(default_model.clone());
|
||||
}
|
||||
|
||||
return AgentResult::success(
|
||||
"Using configured default model (no other models available)",
|
||||
0,
|
||||
)
|
||||
.with_data(json!({
|
||||
"model_id": default_model,
|
||||
"expected_cost_cents": 50,
|
||||
"confidence": 0.8,
|
||||
"reasoning": "Fallback to configured default model",
|
||||
"fallbacks": [],
|
||||
"used_historical_data": false,
|
||||
"used_benchmark_data": false,
|
||||
"task_type": format!("{:?}", task_type),
|
||||
}));
|
||||
}
|
||||
|
||||
// Get user-requested model - if specified, resolve to latest version and use it
|
||||
let requested_model = task.analysis().requested_model.clone();
|
||||
|
||||
// Auto-upgrade outdated model names using the resolver
|
||||
let (resolved_model, was_upgraded) = if let Some(ref req_model) = requested_model {
|
||||
if let Some(resolver) = &ctx.resolver {
|
||||
let resolver = resolver.read().await;
|
||||
let resolved = resolver.resolve(req_model);
|
||||
if resolved.upgraded {
|
||||
tracing::info!(
|
||||
"Model auto-upgraded: {} → {} ({})",
|
||||
resolved.original,
|
||||
resolved.resolved,
|
||||
resolved.reason.as_deref().unwrap_or("family upgrade")
|
||||
);
|
||||
}
|
||||
(Some(resolved.resolved), resolved.upgraded)
|
||||
} else {
|
||||
(Some(req_model.clone()), false)
|
||||
}
|
||||
} else {
|
||||
(None, false)
|
||||
};
|
||||
|
||||
// If user explicitly requested a model (possibly upgraded), use it directly
|
||||
// This bypasses the allowlist check - user knows what they want
|
||||
if let Some(ref req_model) = resolved_model {
|
||||
let in_allowlist = models.iter().any(|m| &m.model_id == req_model);
|
||||
let upgrade_note = if was_upgraded {
|
||||
format!(" (auto-upgraded from {})", requested_model.as_deref().unwrap_or("unknown"))
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
if !in_allowlist {
|
||||
tracing::warn!(
|
||||
"Requested model {} not in allowlist, but using it anyway (user override){}",
|
||||
req_model,
|
||||
upgrade_note
|
||||
);
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Using requested model directly: {}{}",
|
||||
req_model,
|
||||
upgrade_note
|
||||
);
|
||||
|
||||
// Record selection in analysis
|
||||
{
|
||||
let a = task.analysis_mut();
|
||||
a.selected_model = Some(req_model.clone());
|
||||
a.estimated_cost_cents = Some(50); // Default estimate
|
||||
}
|
||||
|
||||
return AgentResult::success(
|
||||
&format!("Using requested model: {}{}", req_model, upgrade_note),
|
||||
1,
|
||||
)
|
||||
.with_data(json!({
|
||||
"model_id": req_model,
|
||||
"expected_cost_cents": 50,
|
||||
"confidence": 1.0,
|
||||
"reasoning": format!("User requested model: {}{}", req_model, upgrade_note),
|
||||
"fallbacks": [],
|
||||
"used_historical_data": false,
|
||||
"used_benchmark_data": false,
|
||||
"was_upgraded": was_upgraded,
|
||||
"original_model": requested_model,
|
||||
"task_type": format!("{:?}", task_type),
|
||||
"in_allowlist": in_allowlist,
|
||||
}));
|
||||
}
|
||||
|
||||
match self.select_optimal(
|
||||
&models,
|
||||
complexity,
|
||||
estimated_tokens,
|
||||
budget_cents,
|
||||
task_type,
|
||||
historical_stats.as_ref(),
|
||||
resolved_model.as_deref(),
|
||||
ctx,
|
||||
).await {
|
||||
Some(rec) => {
|
||||
// Record selection in analysis
|
||||
{
|
||||
let a = task.analysis_mut();
|
||||
a.selected_model = Some(rec.model_id.clone());
|
||||
a.estimated_cost_cents = Some(rec.expected_cost_cents);
|
||||
}
|
||||
|
||||
AgentResult::success(
|
||||
&rec.reasoning,
|
||||
1, // Minimal cost for selection itself
|
||||
)
|
||||
.with_data(json!({
|
||||
"model_id": rec.model_id,
|
||||
"expected_cost_cents": rec.expected_cost_cents,
|
||||
"confidence": rec.confidence,
|
||||
"reasoning": rec.reasoning,
|
||||
"fallbacks": rec.fallbacks,
|
||||
"used_historical_data": rec.used_historical_data,
|
||||
"used_benchmark_data": rec.used_benchmark_data,
|
||||
"task_type": format!("{:?}", task_type),
|
||||
"historical_stats_available": historical_stats.as_ref().map(|h| h.len()),
|
||||
"inputs": {
|
||||
"complexity": complexity,
|
||||
"estimated_tokens": estimated_tokens,
|
||||
"budget_cents": budget_cents
|
||||
}
|
||||
}))
|
||||
}
|
||||
None => {
|
||||
AgentResult::failure(
|
||||
"No suitable model found within budget",
|
||||
0,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LeafAgent for ModelSelector {
|
||||
fn capability(&self) -> LeafCapability {
|
||||
LeafCapability::ModelSelection
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn make_pricing(id: &str, prompt: f64, completion: f64) -> PricingInfo {
|
||||
PricingInfo {
|
||||
model_id: id.to_string(),
|
||||
prompt_cost_per_million: prompt,
|
||||
completion_cost_per_million: completion,
|
||||
context_length: 100000,
|
||||
max_output_tokens: None,
|
||||
supports_tools: true,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expected_cost_u_curve() {
|
||||
let selector = ModelSelector::new();
|
||||
|
||||
let cheap = make_pricing("cheap", 0.1, 0.2);
|
||||
let medium = make_pricing("medium", 1.0, 2.0);
|
||||
let expensive = make_pricing("expensive", 10.0, 20.0);
|
||||
|
||||
let complexity = 0.7;
|
||||
let tokens = 2000;
|
||||
|
||||
let cheap_cost = selector.calculate_expected_cost(&cheap, complexity, tokens);
|
||||
let medium_cost = selector.calculate_expected_cost(&medium, complexity, tokens);
|
||||
let expensive_cost = selector.calculate_expected_cost(&expensive, complexity, tokens);
|
||||
|
||||
// For complex tasks, medium should be optimal (U-curve)
|
||||
// Cheap model has high failure rate
|
||||
// Expensive model has high base cost
|
||||
println!("Cheap: {} (fail: {})", cheap_cost.expected_cost_cents, cheap_cost.failure_probability);
|
||||
println!("Medium: {} (fail: {})", medium_cost.expected_cost_cents, medium_cost.failure_probability);
|
||||
println!("Expensive: {} (fail: {})", expensive_cost.expected_cost_cents, expensive_cost.failure_probability);
|
||||
|
||||
// Basic sanity check: cheap model should have higher failure rate
|
||||
assert!(cheap_cost.failure_probability > medium_cost.failure_probability);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task_type_inference() {
|
||||
assert_eq!(
|
||||
TaskType::infer_from_description("Implement a function to sort arrays"),
|
||||
TaskType::Code
|
||||
);
|
||||
assert_eq!(
|
||||
TaskType::infer_from_description("Calculate the integral of x^2"),
|
||||
TaskType::Math
|
||||
);
|
||||
assert_eq!(
|
||||
TaskType::infer_from_description("Explain quantum mechanics"),
|
||||
TaskType::Reasoning
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,361 +0,0 @@
|
||||
//! Verification agent - validates task completion.
|
||||
//!
|
||||
//! # Verification Strategy (Hybrid)
|
||||
//! 1. Try programmatic verification first (fast, deterministic)
|
||||
//! 2. Fall back to LLM verification if needed
|
||||
//!
|
||||
//! # Programmatic Checks
|
||||
//! - File exists
|
||||
//! - Command succeeds
|
||||
//! - Output matches pattern
|
||||
|
||||
use async_trait::async_trait;
|
||||
use std::path::Path;
|
||||
use std::process::Stdio;
|
||||
use tokio::process::Command;
|
||||
|
||||
use crate::agents::{
|
||||
Agent, AgentContext, AgentId, AgentResult, AgentType, LeafAgent, LeafCapability,
|
||||
};
|
||||
use crate::llm::{ChatMessage, Role};
|
||||
use crate::task::{ProgrammaticCheck, Task, VerificationCriteria, VerificationMethod, VerificationResult};
|
||||
|
||||
/// Agent that verifies task completion.
|
||||
///
|
||||
/// # Hybrid Verification
|
||||
/// - Programmatic: Fast, deterministic, no cost
|
||||
/// - LLM: Flexible, for subjective criteria
|
||||
pub struct Verifier {
|
||||
id: AgentId,
|
||||
}
|
||||
|
||||
impl Verifier {
|
||||
/// Create a new verifier.
|
||||
pub fn new() -> Self {
|
||||
Self { id: AgentId::new() }
|
||||
}
|
||||
|
||||
/// Execute a programmatic check.
|
||||
///
|
||||
/// # Returns
|
||||
/// `Ok(true)` if check passes, `Ok(false)` if fails, `Err` on error.
|
||||
///
|
||||
/// # Note
|
||||
/// Paths in checks can be absolute or relative to working_dir.
|
||||
async fn run_programmatic_check(
|
||||
&self,
|
||||
check: &ProgrammaticCheck,
|
||||
working_dir: &Path,
|
||||
) -> Result<bool, String> {
|
||||
match check {
|
||||
ProgrammaticCheck::FileExists { path } => {
|
||||
let full_path = Self::resolve_path(path, working_dir);
|
||||
Ok(full_path.exists())
|
||||
}
|
||||
|
||||
ProgrammaticCheck::FileContains { path, content } => {
|
||||
let full_path = Self::resolve_path(path, working_dir);
|
||||
match tokio::fs::read_to_string(&full_path).await {
|
||||
Ok(file_content) => Ok(file_content.contains(content)),
|
||||
Err(_) => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
ProgrammaticCheck::CommandSucceeds { command } => {
|
||||
let output = Command::new("sh")
|
||||
.arg("-c")
|
||||
.arg(command)
|
||||
.current_dir(working_dir)
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.status()
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(output.success())
|
||||
}
|
||||
|
||||
ProgrammaticCheck::CommandOutputMatches { command, pattern } => {
|
||||
let output = Command::new("sh")
|
||||
.arg("-c")
|
||||
.arg(command)
|
||||
.current_dir(working_dir)
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let regex = regex::Regex::new(pattern).map_err(|e| e.to_string())?;
|
||||
Ok(regex.is_match(&stdout))
|
||||
}
|
||||
|
||||
ProgrammaticCheck::DirectoryExists { path } => {
|
||||
let full_path = Self::resolve_path(path, working_dir);
|
||||
Ok(full_path.is_dir())
|
||||
}
|
||||
|
||||
ProgrammaticCheck::FileMatchesRegex { path, pattern } => {
|
||||
let full_path = Self::resolve_path(path, working_dir);
|
||||
match tokio::fs::read_to_string(&full_path).await {
|
||||
Ok(content) => {
|
||||
let regex = regex::Regex::new(pattern).map_err(|e| e.to_string())?;
|
||||
Ok(regex.is_match(&content))
|
||||
}
|
||||
Err(_) => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
ProgrammaticCheck::All(checks) => {
|
||||
for c in checks {
|
||||
if !Box::pin(self.run_programmatic_check(c, working_dir)).await? {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
ProgrammaticCheck::Any(checks) => {
|
||||
for c in checks {
|
||||
if Box::pin(self.run_programmatic_check(c, working_dir)).await? {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve a path - if absolute, use as-is; if relative, join with working_dir.
|
||||
fn resolve_path(path_str: &str, working_dir: &Path) -> std::path::PathBuf {
|
||||
let path = Path::new(path_str);
|
||||
if path.is_absolute() {
|
||||
path.to_path_buf()
|
||||
} else {
|
||||
working_dir.join(path)
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify using LLM.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `task`: The task that was executed
|
||||
/// - `success_criteria`: What success looks like
|
||||
/// - `task_output`: The actual output produced by the executor
|
||||
/// - `ctx`: Agent context
|
||||
///
|
||||
/// # Returns
|
||||
/// VerificationResult with LLM's assessment
|
||||
async fn verify_with_llm(
|
||||
&self,
|
||||
task: &Task,
|
||||
success_criteria: &str,
|
||||
task_output: Option<&str>,
|
||||
ctx: &AgentContext,
|
||||
) -> VerificationResult {
|
||||
let output_section = task_output
|
||||
.map(|o| format!("\n\nActual Output/Result:\n{}\n", o.chars().take(3000).collect::<String>()))
|
||||
.unwrap_or_default();
|
||||
|
||||
let prompt = format!(
|
||||
r#"You are verifying if a task was completed correctly.
|
||||
|
||||
Task: {}
|
||||
|
||||
Success Criteria: {}
|
||||
{output_section}
|
||||
Based on what was actually accomplished (shown in the output above), respond with a JSON object:
|
||||
{{
|
||||
"passed": true/false,
|
||||
"reasoning": "explanation of why the task passed or failed based on the actual output"
|
||||
}}
|
||||
|
||||
Be lenient - if the core goal was achieved even if the format isn't perfect, pass it.
|
||||
Respond ONLY with the JSON object."#,
|
||||
task.description(),
|
||||
success_criteria
|
||||
);
|
||||
|
||||
let messages = vec![
|
||||
ChatMessage::new(Role::System, "You are a precise task verifier. Respond only with JSON."),
|
||||
ChatMessage::new(Role::User, prompt),
|
||||
];
|
||||
|
||||
let model = "openai/gpt-4.1-mini";
|
||||
|
||||
match ctx.llm.chat_completion(model, &messages, None).await {
|
||||
Ok(response) => {
|
||||
let content = response.content.unwrap_or_default();
|
||||
self.parse_llm_verification(&content, model)
|
||||
}
|
||||
Err(e) => {
|
||||
VerificationResult::fail(
|
||||
format!("LLM verification failed: {}", e),
|
||||
VerificationMethod::Llm { model: model.to_string() },
|
||||
0,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse LLM verification response.
|
||||
fn parse_llm_verification(&self, response: &str, model: &str) -> VerificationResult {
|
||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(response) {
|
||||
let passed = json["passed"].as_bool().unwrap_or(false);
|
||||
let reasoning = json["reasoning"]
|
||||
.as_str()
|
||||
.unwrap_or("No reasoning provided")
|
||||
.to_string();
|
||||
|
||||
if passed {
|
||||
VerificationResult::pass(
|
||||
reasoning,
|
||||
VerificationMethod::Llm { model: model.to_string() },
|
||||
1, // Minimal cost
|
||||
)
|
||||
} else {
|
||||
VerificationResult::fail(
|
||||
reasoning,
|
||||
VerificationMethod::Llm { model: model.to_string() },
|
||||
1,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
// Try to infer from text
|
||||
let passed = response.to_lowercase().contains("pass")
|
||||
|| response.to_lowercase().contains("success")
|
||||
|| response.to_lowercase().contains("completed");
|
||||
|
||||
if passed {
|
||||
VerificationResult::pass(
|
||||
response.to_string(),
|
||||
VerificationMethod::Llm { model: model.to_string() },
|
||||
1,
|
||||
)
|
||||
} else {
|
||||
VerificationResult::fail(
|
||||
response.to_string(),
|
||||
VerificationMethod::Llm { model: model.to_string() },
|
||||
1,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Run verification according to criteria.
|
||||
async fn verify(
|
||||
&self,
|
||||
task: &Task,
|
||||
ctx: &AgentContext,
|
||||
) -> VerificationResult {
|
||||
match task.verification() {
|
||||
VerificationCriteria::None => {
|
||||
VerificationResult::pass(
|
||||
"No verification required",
|
||||
VerificationMethod::None,
|
||||
0,
|
||||
)
|
||||
}
|
||||
|
||||
VerificationCriteria::Programmatic(check) => {
|
||||
match self.run_programmatic_check(check, &ctx.working_dir).await {
|
||||
Ok(true) => VerificationResult::pass(
|
||||
"Programmatic check passed",
|
||||
VerificationMethod::Programmatic,
|
||||
0,
|
||||
),
|
||||
Ok(false) => VerificationResult::fail(
|
||||
"Programmatic check failed",
|
||||
VerificationMethod::Programmatic,
|
||||
0,
|
||||
),
|
||||
Err(e) => VerificationResult::fail(
|
||||
format!("Programmatic check error: {}", e),
|
||||
VerificationMethod::Programmatic,
|
||||
0,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
VerificationCriteria::LlmBased { success_criteria } => {
|
||||
// Get last output from task analysis if available
|
||||
let last_output = task.last_output();
|
||||
self.verify_with_llm(task, success_criteria, last_output, ctx).await
|
||||
}
|
||||
|
||||
VerificationCriteria::Hybrid { programmatic, llm_fallback } => {
|
||||
// Try programmatic first
|
||||
match self.run_programmatic_check(programmatic, &ctx.working_dir).await {
|
||||
Ok(true) => VerificationResult::pass(
|
||||
"Programmatic check passed",
|
||||
VerificationMethod::Programmatic,
|
||||
0,
|
||||
),
|
||||
Ok(false) => {
|
||||
// Fall back to LLM
|
||||
let last_output = task.last_output();
|
||||
self.verify_with_llm(task, llm_fallback, last_output, ctx).await
|
||||
}
|
||||
Err(_) => {
|
||||
// Error in programmatic, fall back to LLM
|
||||
let last_output = task.last_output();
|
||||
self.verify_with_llm(task, llm_fallback, last_output, ctx).await
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Verifier {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Agent for Verifier {
|
||||
fn id(&self) -> &AgentId {
|
||||
&self.id
|
||||
}
|
||||
|
||||
fn agent_type(&self) -> AgentType {
|
||||
AgentType::Verifier
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Verifies task completion using programmatic checks and LLM fallback"
|
||||
}
|
||||
|
||||
async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
|
||||
let result = self.verify(task, ctx).await;
|
||||
|
||||
if result.passed() {
|
||||
AgentResult::success(
|
||||
result.reasoning(),
|
||||
result.cost_cents(),
|
||||
)
|
||||
.with_data(serde_json::json!({
|
||||
"passed": true,
|
||||
"method": format!("{:?}", result.method()),
|
||||
"reasoning": result.reasoning(),
|
||||
}))
|
||||
} else {
|
||||
AgentResult::failure(
|
||||
result.reasoning(),
|
||||
result.cost_cents(),
|
||||
)
|
||||
.with_data(serde_json::json!({
|
||||
"passed": false,
|
||||
"method": format!("{:?}", result.method()),
|
||||
"reasoning": result.reasoning(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LeafAgent for Verifier {
|
||||
fn capability(&self) -> LeafCapability {
|
||||
LeafCapability::Verification
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,29 +1,33 @@
|
||||
//! Agents module - the hierarchical agent tree.
|
||||
//! Agents module - task execution system.
|
||||
//!
|
||||
//! # Agent Types
|
||||
//! - **RootAgent**: Top-level orchestrator, receives tasks from API
|
||||
//! - **NodeAgent**: Intermediate orchestrator, delegates to children
|
||||
//! - **LeafAgent**: Specialized agents that do actual work
|
||||
//! - **SimpleAgent**: Unified agent that directly executes tasks
|
||||
//! - **TaskExecutor**: Core execution loop with tools (used by SimpleAgent)
|
||||
//!
|
||||
//! # Leaf Agent Specializations
|
||||
//! - `ComplexityEstimator`: Estimates task difficulty
|
||||
//! - `ModelSelector`: Chooses optimal model for cost/capability
|
||||
//! - `TaskExecutor`: Executes tasks using tools
|
||||
//! - `Verifier`: Validates task completion
|
||||
//! # Legacy Types (deprecated, will be removed)
|
||||
//! - **RootAgent**: Complex orchestrator (replaced by SimpleAgent)
|
||||
//! - **NodeAgent**: Recursive splitter (removed - lost context)
|
||||
//! - **ComplexityEstimator**: LLM-based estimation (unreliable)
|
||||
//! - **ModelSelector**: U-curve optimization (over-engineered)
|
||||
//! - **Verifier**: LLM self-verification (rubber-stamped everything)
|
||||
//!
|
||||
//! # Design Principles
|
||||
//! - Agents communicate synchronously (parent calls child, child returns)
|
||||
//! - Designed for future async message passing migration
|
||||
//! - All operations return `Result` with meaningful errors
|
||||
//! - Direct execution without orchestration overhead
|
||||
//! - User controls task granularity (no auto-splitting)
|
||||
//! - Blocker detection via system prompt rules
|
||||
//! - Mission completion via complete_mission tool
|
||||
|
||||
mod types;
|
||||
mod context;
|
||||
mod tree;
|
||||
pub mod tuning;
|
||||
pub mod orchestrator;
|
||||
pub mod orchestrator; // TODO: Remove after migration
|
||||
pub mod leaf;
|
||||
mod simple;
|
||||
|
||||
pub use types::{AgentId, AgentType, AgentResult, AgentError, Complexity};
|
||||
pub use simple::SimpleAgent;
|
||||
|
||||
pub use types::{AgentId, AgentType, AgentResult, AgentError, Complexity, TerminalReason};
|
||||
pub use context::AgentContext;
|
||||
pub use tree::{AgentTree, AgentRef};
|
||||
pub use tuning::TuningParams;
|
||||
|
||||
@@ -1,8 +1,4 @@
|
||||
//! Orchestrator agents - Root and Node agents that manage the tree.
|
||||
|
||||
mod root;
|
||||
mod node;
|
||||
|
||||
pub use root::RootAgent;
|
||||
pub use node::NodeAgent;
|
||||
|
||||
//! Orchestrator agents (legacy - removed).
|
||||
//!
|
||||
//! The orchestrator agents (RootAgent, NodeAgent) have been superseded by SimpleAgent.
|
||||
//! This module is kept empty for now; will be removed in a future cleanup.
|
||||
|
||||
@@ -1,889 +0,0 @@
|
||||
//! Node agent - intermediate orchestrator in the agent tree.
|
||||
//!
|
||||
//! Node agents are like mini-root agents that can:
|
||||
//! - Receive delegated tasks from parent
|
||||
//! - Estimate complexity and split complex subtasks further (recursive)
|
||||
//! - Delegate to their own children
|
||||
//! - Aggregate results for parent
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::agents::{
|
||||
leaf::{ComplexityEstimator, ModelSelector, TaskExecutor, Verifier},
|
||||
Agent, AgentContext, AgentId, AgentRef, AgentResult, AgentType, Complexity, OrchestratorAgent,
|
||||
};
|
||||
use crate::budget::Budget;
|
||||
use crate::llm::{ChatMessage, Role};
|
||||
use crate::task::{Subtask, SubtaskPlan, Task, VerificationCriteria};
|
||||
|
||||
/// Node agent - intermediate orchestrator.
|
||||
///
|
||||
/// # Purpose
|
||||
/// Handles subtasks that may still be complex enough
|
||||
/// to warrant further splitting. Now with full recursive
|
||||
/// splitting capabilities like RootAgent.
|
||||
///
|
||||
/// # Recursive Splitting
|
||||
/// NodeAgent can estimate complexity of its subtasks and
|
||||
/// recursively split them if they're still too complex,
|
||||
/// respecting the `max_split_depth` in context.
|
||||
pub struct NodeAgent {
|
||||
id: AgentId,
|
||||
|
||||
/// Name for identification in logs
|
||||
name: String,
|
||||
|
||||
// Child agents - full pipeline for recursive splitting
|
||||
complexity_estimator: Arc<ComplexityEstimator>,
|
||||
model_selector: Arc<ModelSelector>,
|
||||
task_executor: Arc<TaskExecutor>,
|
||||
verifier: Arc<Verifier>,
|
||||
|
||||
// Child node agents (for further splitting)
|
||||
child_nodes: Vec<Arc<NodeAgent>>,
|
||||
}
|
||||
|
||||
impl NodeAgent {
|
||||
/// Create a new node agent with full recursive capabilities.
|
||||
pub fn new(name: impl Into<String>) -> Self {
|
||||
Self {
|
||||
id: AgentId::new(),
|
||||
name: name.into(),
|
||||
complexity_estimator: Arc::new(ComplexityEstimator::new()),
|
||||
model_selector: Arc::new(ModelSelector::new()),
|
||||
task_executor: Arc::new(TaskExecutor::new()),
|
||||
verifier: Arc::new(Verifier::new()),
|
||||
child_nodes: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a node with custom executor.
|
||||
pub fn with_executor(mut self, executor: Arc<TaskExecutor>) -> Self {
|
||||
self.task_executor = executor;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a child node for hierarchical delegation.
|
||||
pub fn add_child_node(&mut self, child: Arc<NodeAgent>) {
|
||||
self.child_nodes.push(child);
|
||||
}
|
||||
|
||||
/// Get the node's name.
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
/// Estimate complexity of a task.
|
||||
async fn estimate_complexity(&self, task: &mut Task, ctx: &AgentContext) -> Complexity {
|
||||
let result = self.complexity_estimator.execute(task, ctx).await;
|
||||
|
||||
if let Some(data) = result.data {
|
||||
let score = data["score"].as_f64().unwrap_or(0.5);
|
||||
let reasoning = data["reasoning"].as_str().unwrap_or("").to_string();
|
||||
let estimated_tokens = data["estimated_tokens"].as_u64().unwrap_or(2000);
|
||||
let should_split = data["should_split"].as_bool().unwrap_or(false);
|
||||
|
||||
Complexity::new(score, reasoning, estimated_tokens).with_split(should_split)
|
||||
} else {
|
||||
Complexity::moderate("Could not estimate complexity")
|
||||
}
|
||||
}
|
||||
|
||||
/// Split a complex task into subtasks.
|
||||
async fn split_task(
|
||||
&self,
|
||||
task: &Task,
|
||||
ctx: &AgentContext,
|
||||
) -> Result<SubtaskPlan, AgentResult> {
|
||||
let prompt = format!(
|
||||
r#"You are a task planner. Break down this task into smaller, manageable subtasks.
|
||||
|
||||
Task: {}
|
||||
|
||||
Respond with a JSON object:
|
||||
{{
|
||||
"subtasks": [
|
||||
{{
|
||||
"description": "What to do",
|
||||
"verification": "How to verify it's done",
|
||||
"weight": 1.0,
|
||||
"dependencies": []
|
||||
}}
|
||||
],
|
||||
"reasoning": "Why this breakdown makes sense"
|
||||
}}
|
||||
|
||||
Guidelines:
|
||||
- Each subtask should be independently executable once its dependencies are complete
|
||||
- The "dependencies" array contains indices (0-based) of subtasks that MUST complete before this one can start
|
||||
- For example, if subtask 2 needs subtask 0's output, set "dependencies": [0]
|
||||
- Include verification for each subtask
|
||||
- Weight indicates relative effort (higher = more work)
|
||||
- Keep subtasks focused and specific
|
||||
- Aim for 2-4 subtasks typically
|
||||
- IMPORTANT: If subtasks have a logical order (e.g., download before analyze), specify dependencies!
|
||||
|
||||
PREFER COMMAND-LINE APPROACHES:
|
||||
- For downloading files: use curl/wget, NOT browser automation
|
||||
- For Chrome extensions: download CRX directly via URL pattern, then unzip
|
||||
- For file analysis: use grep/find/ripgrep, NOT GUI tools
|
||||
- For web APIs: use curl/fetch_url, NOT browser clicks
|
||||
- Desktop automation is a LAST RESORT only when no CLI option exists
|
||||
|
||||
Respond ONLY with the JSON object."#,
|
||||
task.description()
|
||||
);
|
||||
|
||||
let messages = vec![
|
||||
ChatMessage::new(
|
||||
Role::System,
|
||||
"You are a precise task planner. Respond only with JSON.",
|
||||
),
|
||||
ChatMessage::new(Role::User, prompt),
|
||||
];
|
||||
|
||||
let response = ctx
|
||||
.llm
|
||||
.chat_completion("openai/gpt-4.1-mini", &messages, None)
|
||||
.await
|
||||
.map_err(|e| AgentResult::failure(format!("LLM error: {}", e), 1))?;
|
||||
|
||||
let content = response.content.unwrap_or_default();
|
||||
self.parse_subtask_plan(&content, task.id())
|
||||
}
|
||||
|
||||
/// Extract JSON from LLM response (handles markdown code blocks).
|
||||
fn extract_json(response: &str) -> String {
|
||||
let trimmed = response.trim();
|
||||
|
||||
// Check for markdown code block
|
||||
if trimmed.starts_with("```") {
|
||||
// Find the end of the opening fence
|
||||
if let Some(start_idx) = trimmed.find('\n') {
|
||||
let after_fence = &trimmed[start_idx + 1..];
|
||||
// Find the closing fence
|
||||
if let Some(end_idx) = after_fence.rfind("```") {
|
||||
return after_fence[..end_idx].trim().to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find JSON object in the response
|
||||
if let Some(start) = trimmed.find('{') {
|
||||
if let Some(end) = trimmed.rfind('}') {
|
||||
if end > start {
|
||||
return trimmed[start..=end].to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return as-is if no extraction needed
|
||||
trimmed.to_string()
|
||||
}
|
||||
|
||||
/// Parse LLM response into SubtaskPlan.
|
||||
fn parse_subtask_plan(
|
||||
&self,
|
||||
response: &str,
|
||||
parent_id: crate::task::TaskId,
|
||||
) -> Result<SubtaskPlan, AgentResult> {
|
||||
let extracted = Self::extract_json(response);
|
||||
let json: serde_json::Value = serde_json::from_str(&extracted).map_err(|e| {
|
||||
AgentResult::failure(
|
||||
format!(
|
||||
"Failed to parse subtasks: {} (raw: {}...)",
|
||||
e,
|
||||
response.chars().take(100).collect::<String>()
|
||||
),
|
||||
0,
|
||||
)
|
||||
})?;
|
||||
|
||||
let reasoning = json["reasoning"]
|
||||
.as_str()
|
||||
.unwrap_or("No reasoning provided")
|
||||
.to_string();
|
||||
|
||||
let subtasks: Vec<Subtask> = json["subtasks"]
|
||||
.as_array()
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.map(|s| {
|
||||
let desc = s["description"].as_str().unwrap_or("").to_string();
|
||||
let verification = s["verification"].as_str().unwrap_or("");
|
||||
let weight = s["weight"].as_f64().unwrap_or(1.0);
|
||||
|
||||
// Parse dependencies array
|
||||
let dependencies: Vec<usize> = s["dependencies"]
|
||||
.as_array()
|
||||
.map(|deps| {
|
||||
deps.iter()
|
||||
.filter_map(|d| d.as_u64().map(|n| n as usize))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
Subtask::new(desc, VerificationCriteria::llm_based(verification), weight)
|
||||
.with_dependencies(dependencies)
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
if subtasks.is_empty() {
|
||||
return Err(AgentResult::failure("No subtasks generated", 1));
|
||||
}
|
||||
|
||||
SubtaskPlan::new(parent_id, subtasks, reasoning)
|
||||
.map_err(|e| AgentResult::failure(format!("Invalid subtask plan: {}", e), 0))
|
||||
}
|
||||
|
||||
/// Execute subtasks recursively, potentially splitting further.
|
||||
async fn execute_subtasks(
|
||||
&self,
|
||||
subtask_plan: SubtaskPlan,
|
||||
parent_budget: &Budget,
|
||||
ctx: &AgentContext,
|
||||
requested_model: Option<&str>,
|
||||
) -> AgentResult {
|
||||
// Convert plan to tasks
|
||||
let mut tasks = match subtask_plan.into_tasks(parent_budget) {
|
||||
Ok(t) => t,
|
||||
Err(e) => return AgentResult::failure(format!("Failed to create subtasks: {}", e), 0),
|
||||
};
|
||||
|
||||
// Propagate requested_model to all subtasks
|
||||
if let Some(model) = requested_model {
|
||||
for task in &mut tasks {
|
||||
task.analysis_mut().requested_model = Some(model.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let mut results = Vec::new();
|
||||
let mut total_cost = 0u64;
|
||||
|
||||
// Create a child context with reduced split depth
|
||||
let child_ctx = ctx.child_context();
|
||||
|
||||
// Execute each subtask recursively
|
||||
for task in &mut tasks {
|
||||
tracing::info!(
|
||||
"NodeAgent '{}' processing subtask: {}",
|
||||
self.name,
|
||||
task.description().chars().take(80).collect::<String>()
|
||||
);
|
||||
|
||||
// Create a child NodeAgent for this subtask (recursive)
|
||||
let child_node = NodeAgent::new(format!("{}-sub", self.name));
|
||||
|
||||
// Execute through the child node (which may split further)
|
||||
let result = child_node.execute(task, &child_ctx).await;
|
||||
total_cost += result.cost_cents;
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
// Aggregate results
|
||||
let successes = results.iter().filter(|r| r.success).count();
|
||||
let total = results.len();
|
||||
|
||||
// Concatenate successful outputs for meaningful aggregation
|
||||
let combined_output = Self::concatenate_outputs(&results);
|
||||
|
||||
if successes == total {
|
||||
AgentResult::success(combined_output, total_cost).with_data(json!({
|
||||
"subtasks_total": total,
|
||||
"subtasks_succeeded": successes,
|
||||
"results": results.iter().map(|r| &r.output).collect::<Vec<_>>(),
|
||||
}))
|
||||
} else {
|
||||
AgentResult::failure(
|
||||
format!(
|
||||
"{}/{} subtasks succeeded\n\n{}",
|
||||
successes, total, combined_output
|
||||
),
|
||||
total_cost,
|
||||
)
|
||||
.with_data(json!({
|
||||
"subtasks_total": total,
|
||||
"subtasks_succeeded": successes,
|
||||
"results": results.iter().map(|r| json!({
|
||||
"success": r.success,
|
||||
"output": &r.output,
|
||||
})).collect::<Vec<_>>(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Concatenate subtask outputs into a single string.
|
||||
/// Used for intermediate aggregation (RootAgent handles final synthesis).
|
||||
fn concatenate_outputs(results: &[AgentResult]) -> String {
|
||||
let outputs: Vec<String> = results
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, r)| r.success && !r.output.is_empty())
|
||||
.map(|(i, r)| {
|
||||
if results.len() == 1 {
|
||||
r.output.clone()
|
||||
} else {
|
||||
format!("### Part {}\n{}", i + 1, r.output)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
if outputs.is_empty() {
|
||||
"No output generated.".to_string()
|
||||
} else if outputs.len() == 1 {
|
||||
outputs.into_iter().next().unwrap()
|
||||
} else {
|
||||
outputs.join("\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute with tree updates for visualization.
|
||||
/// This method updates the parent's tree structure as this node executes.
|
||||
pub async fn execute_with_tree(
|
||||
&self,
|
||||
task: &mut Task,
|
||||
ctx: &AgentContext,
|
||||
node_id: &str,
|
||||
root_tree: &mut crate::api::control::AgentTreeNode,
|
||||
emit_ctx: &AgentContext,
|
||||
) -> AgentResult {
|
||||
use crate::api::control::AgentTreeNode;
|
||||
|
||||
let mut total_cost = 0u64;
|
||||
|
||||
tracing::info!(
|
||||
"NodeAgent '{}' executing task (depth remaining: {}): {}",
|
||||
self.name,
|
||||
ctx.max_split_depth,
|
||||
task.description().chars().take(80).collect::<String>()
|
||||
);
|
||||
|
||||
// Step 1: Estimate complexity
|
||||
ctx.emit_phase(
|
||||
"estimating_complexity",
|
||||
Some("Analyzing subtask..."),
|
||||
Some(&self.name),
|
||||
);
|
||||
let complexity = self.estimate_complexity(task, ctx).await;
|
||||
total_cost += 1;
|
||||
|
||||
// Update node with complexity
|
||||
if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == node_id) {
|
||||
node.complexity = Some(complexity.score());
|
||||
}
|
||||
emit_ctx.emit_tree(root_tree.clone());
|
||||
|
||||
tracing::info!(
|
||||
"NodeAgent '{}' complexity: {:.2} (should_split: {}, can_split: {})",
|
||||
self.name,
|
||||
complexity.score(),
|
||||
complexity.should_split(),
|
||||
ctx.can_split()
|
||||
);
|
||||
|
||||
// Step 2: Decide execution strategy
|
||||
if complexity.should_split() && ctx.can_split() {
|
||||
ctx.emit_phase(
|
||||
"splitting_task",
|
||||
Some("Decomposing subtask..."),
|
||||
Some(&self.name),
|
||||
);
|
||||
tracing::info!("NodeAgent '{}' splitting task into sub-subtasks", self.name);
|
||||
|
||||
match self.split_task(task, ctx).await {
|
||||
Ok(plan) => {
|
||||
total_cost += 2;
|
||||
|
||||
// Add child nodes to this node in the tree
|
||||
if let Some(parent_node) =
|
||||
root_tree.children.iter_mut().find(|n| n.id == node_id)
|
||||
{
|
||||
for (i, subtask) in plan.subtasks().iter().enumerate() {
|
||||
let child_node = AgentTreeNode::new(
|
||||
&format!("{}-sub-{}", node_id, i + 1),
|
||||
"Node",
|
||||
&format!("Sub-subtask {}", i + 1),
|
||||
&subtask.description.chars().take(40).collect::<String>(),
|
||||
)
|
||||
.with_status("pending");
|
||||
parent_node.children.push(child_node);
|
||||
}
|
||||
}
|
||||
emit_ctx.emit_tree(root_tree.clone());
|
||||
|
||||
let subtask_count = plan.subtasks().len();
|
||||
tracing::info!(
|
||||
"NodeAgent '{}' created {} sub-subtasks",
|
||||
self.name,
|
||||
subtask_count
|
||||
);
|
||||
|
||||
// Execute subtasks recursively with tree updates
|
||||
let child_ctx = ctx.child_context();
|
||||
let requested_model = task.analysis().requested_model.as_deref();
|
||||
let result = self
|
||||
.execute_subtasks_with_tree(
|
||||
plan,
|
||||
task.budget(),
|
||||
&child_ctx,
|
||||
node_id,
|
||||
root_tree,
|
||||
emit_ctx,
|
||||
requested_model,
|
||||
)
|
||||
.await;
|
||||
|
||||
return AgentResult {
|
||||
success: result.success,
|
||||
output: result.output,
|
||||
cost_cents: total_cost + result.cost_cents,
|
||||
model_used: result.model_used,
|
||||
data: result.data,
|
||||
};
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"NodeAgent '{}' couldn't split, executing directly: {}",
|
||||
self.name,
|
||||
e.output
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Simple task: add child nodes for executor and verifier
|
||||
if let Some(parent_node) = root_tree.children.iter_mut().find(|n| n.id == node_id) {
|
||||
parent_node.children.push(
|
||||
AgentTreeNode::new(
|
||||
&format!("{}-executor", node_id),
|
||||
"TaskExecutor",
|
||||
"Task Executor",
|
||||
"Execute subtask",
|
||||
)
|
||||
.with_status("running"),
|
||||
);
|
||||
parent_node.children.push(
|
||||
AgentTreeNode::new(
|
||||
&format!("{}-verifier", node_id),
|
||||
"Verifier",
|
||||
"Verifier",
|
||||
"Verify result",
|
||||
)
|
||||
.with_status("pending"),
|
||||
);
|
||||
}
|
||||
emit_ctx.emit_tree(root_tree.clone());
|
||||
|
||||
// Select model
|
||||
ctx.emit_phase(
|
||||
"selecting_model",
|
||||
Some("Choosing model..."),
|
||||
Some(&self.name),
|
||||
);
|
||||
let sel_result = self.model_selector.execute(task, ctx).await;
|
||||
total_cost += sel_result.cost_cents;
|
||||
|
||||
// Execute
|
||||
ctx.emit_phase("executing", Some("Running subtask..."), Some(&self.name));
|
||||
let result = self.task_executor.execute(task, ctx).await;
|
||||
total_cost += result.cost_cents;
|
||||
|
||||
// Update executor status
|
||||
if let Some(parent_node) = root_tree.children.iter_mut().find(|n| n.id == node_id) {
|
||||
if let Some(exec_node) = parent_node
|
||||
.children
|
||||
.iter_mut()
|
||||
.find(|n| n.id == format!("{}-executor", node_id))
|
||||
{
|
||||
exec_node.status = if result.success {
|
||||
"completed".to_string()
|
||||
} else {
|
||||
"failed".to_string()
|
||||
};
|
||||
exec_node.budget_spent = result.cost_cents;
|
||||
}
|
||||
}
|
||||
emit_ctx.emit_tree(root_tree.clone());
|
||||
|
||||
// Store the executor output for verification
|
||||
task.set_last_output(result.output.clone());
|
||||
|
||||
if !result.success {
|
||||
return AgentResult::failure(result.output, total_cost).with_data(json!({
|
||||
"node_name": self.name,
|
||||
"complexity": complexity.score(),
|
||||
"was_split": false,
|
||||
"execution": result.data,
|
||||
}));
|
||||
}
|
||||
|
||||
// Verify
|
||||
if let Some(parent_node) = root_tree.children.iter_mut().find(|n| n.id == node_id) {
|
||||
if let Some(ver_node) = parent_node
|
||||
.children
|
||||
.iter_mut()
|
||||
.find(|n| n.id == format!("{}-verifier", node_id))
|
||||
{
|
||||
ver_node.status = "running".to_string();
|
||||
}
|
||||
}
|
||||
emit_ctx.emit_tree(root_tree.clone());
|
||||
|
||||
ctx.emit_phase("verifying", Some("Checking results..."), Some(&self.name));
|
||||
let verification = self.verifier.execute(task, ctx).await;
|
||||
total_cost += verification.cost_cents;
|
||||
|
||||
// Update verifier status
|
||||
if let Some(parent_node) = root_tree.children.iter_mut().find(|n| n.id == node_id) {
|
||||
if let Some(ver_node) = parent_node
|
||||
.children
|
||||
.iter_mut()
|
||||
.find(|n| n.id == format!("{}-verifier", node_id))
|
||||
{
|
||||
ver_node.status = if verification.success {
|
||||
"completed".to_string()
|
||||
} else {
|
||||
"failed".to_string()
|
||||
};
|
||||
ver_node.budget_spent = verification.cost_cents;
|
||||
}
|
||||
}
|
||||
emit_ctx.emit_tree(root_tree.clone());
|
||||
|
||||
if verification.success {
|
||||
AgentResult::success(result.output, total_cost)
|
||||
.with_model(result.model_used.unwrap_or_default())
|
||||
.with_data(json!({
|
||||
"node_name": self.name,
|
||||
"complexity": complexity.score(),
|
||||
"was_split": false,
|
||||
"execution": result.data,
|
||||
"verification": verification.data,
|
||||
}))
|
||||
} else {
|
||||
AgentResult::failure(
|
||||
format!(
|
||||
"Task completed but verification failed: {}",
|
||||
verification.output
|
||||
),
|
||||
total_cost,
|
||||
)
|
||||
.with_data(json!({
|
||||
"node_name": self.name,
|
||||
"complexity": complexity.score(),
|
||||
"was_split": false,
|
||||
"execution": result.data,
|
||||
"verification": verification.data,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute subtasks with tree updates for visualization.
|
||||
async fn execute_subtasks_with_tree(
|
||||
&self,
|
||||
subtask_plan: SubtaskPlan,
|
||||
parent_budget: &Budget,
|
||||
ctx: &AgentContext,
|
||||
parent_node_id: &str,
|
||||
root_tree: &mut crate::api::control::AgentTreeNode,
|
||||
emit_ctx: &AgentContext,
|
||||
requested_model: Option<&str>,
|
||||
) -> AgentResult {
|
||||
let mut tasks = match subtask_plan.into_tasks(parent_budget) {
|
||||
Ok(t) => t,
|
||||
Err(e) => return AgentResult::failure(format!("Failed to create subtasks: {}", e), 0),
|
||||
};
|
||||
|
||||
// Propagate requested_model to all subtasks
|
||||
if let Some(model) = requested_model {
|
||||
for task in &mut tasks {
|
||||
task.analysis_mut().requested_model = Some(model.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let mut results = Vec::new();
|
||||
let mut total_cost = 0u64;
|
||||
let child_ctx = ctx.child_context();
|
||||
|
||||
for (i, task) in tasks.iter_mut().enumerate() {
|
||||
let subtask_id = format!("{}-sub-{}", parent_node_id, i + 1);
|
||||
|
||||
// Update subtask status to running
|
||||
if let Some(parent_node) = root_tree
|
||||
.children
|
||||
.iter_mut()
|
||||
.find(|n| n.id == parent_node_id)
|
||||
{
|
||||
if let Some(child_node) =
|
||||
parent_node.children.iter_mut().find(|n| n.id == subtask_id)
|
||||
{
|
||||
child_node.status = "running".to_string();
|
||||
}
|
||||
}
|
||||
emit_ctx.emit_tree(root_tree.clone());
|
||||
|
||||
tracing::info!(
|
||||
"NodeAgent '{}' processing sub-subtask: {}",
|
||||
self.name,
|
||||
task.description().chars().take(80).collect::<String>()
|
||||
);
|
||||
|
||||
// Create and execute a child NodeAgent
|
||||
let child_node_agent = NodeAgent::new(subtask_id.clone());
|
||||
let result = child_node_agent.execute(task, &child_ctx).await;
|
||||
total_cost += result.cost_cents;
|
||||
|
||||
// Update subtask status
|
||||
if let Some(parent_node) = root_tree
|
||||
.children
|
||||
.iter_mut()
|
||||
.find(|n| n.id == parent_node_id)
|
||||
{
|
||||
if let Some(child_node) =
|
||||
parent_node.children.iter_mut().find(|n| n.id == subtask_id)
|
||||
{
|
||||
child_node.status = if result.success {
|
||||
"completed".to_string()
|
||||
} else {
|
||||
"failed".to_string()
|
||||
};
|
||||
child_node.budget_spent = result.cost_cents;
|
||||
}
|
||||
}
|
||||
emit_ctx.emit_tree(root_tree.clone());
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
let successes = results.iter().filter(|r| r.success).count();
|
||||
let total = results.len();
|
||||
|
||||
// Concatenate successful outputs for meaningful aggregation
|
||||
let combined_output = Self::concatenate_outputs(&results);
|
||||
|
||||
if successes == total {
|
||||
AgentResult::success(combined_output, total_cost).with_data(json!({
|
||||
"subtasks_total": total,
|
||||
"subtasks_succeeded": successes,
|
||||
}))
|
||||
} else {
|
||||
AgentResult::failure(
|
||||
format!(
|
||||
"{}/{} sub-subtasks succeeded\n\n{}",
|
||||
successes, total, combined_output
|
||||
),
|
||||
total_cost,
|
||||
)
|
||||
.with_data(json!({
|
||||
"subtasks_total": total,
|
||||
"subtasks_succeeded": successes,
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for NodeAgent {
|
||||
fn default() -> Self {
|
||||
Self::new("node")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Agent for NodeAgent {
|
||||
fn id(&self) -> &AgentId {
|
||||
&self.id
|
||||
}
|
||||
|
||||
fn agent_type(&self) -> AgentType {
|
||||
AgentType::Node
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Intermediate orchestrator with recursive splitting capabilities"
|
||||
}
|
||||
|
||||
async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
|
||||
let mut total_cost = 0u64;
|
||||
|
||||
tracing::info!(
|
||||
"NodeAgent '{}' executing task (depth remaining: {}): {}",
|
||||
self.name,
|
||||
ctx.max_split_depth,
|
||||
task.description().chars().take(80).collect::<String>()
|
||||
);
|
||||
|
||||
// Step 1: Estimate complexity
|
||||
ctx.emit_phase(
|
||||
"estimating_complexity",
|
||||
Some("Analyzing subtask..."),
|
||||
Some(&self.name),
|
||||
);
|
||||
let complexity = self.estimate_complexity(task, ctx).await;
|
||||
total_cost += 1;
|
||||
|
||||
tracing::info!(
|
||||
"NodeAgent '{}' complexity: {:.2} (should_split: {}, can_split: {})",
|
||||
self.name,
|
||||
complexity.score(),
|
||||
complexity.should_split(),
|
||||
ctx.can_split()
|
||||
);
|
||||
|
||||
// Step 2: Decide execution strategy
|
||||
if complexity.should_split() && ctx.can_split() {
|
||||
// Complex subtask: split further recursively
|
||||
ctx.emit_phase(
|
||||
"splitting_task",
|
||||
Some("Decomposing subtask..."),
|
||||
Some(&self.name),
|
||||
);
|
||||
tracing::info!("NodeAgent '{}' splitting task into sub-subtasks", self.name);
|
||||
|
||||
match self.split_task(task, ctx).await {
|
||||
Ok(plan) => {
|
||||
total_cost += 2; // Splitting cost
|
||||
|
||||
let subtask_count = plan.subtasks().len();
|
||||
tracing::info!(
|
||||
"NodeAgent '{}' created {} sub-subtasks",
|
||||
self.name,
|
||||
subtask_count
|
||||
);
|
||||
|
||||
// Execute subtasks recursively
|
||||
let requested_model = task.analysis().requested_model.as_deref();
|
||||
let result = self
|
||||
.execute_subtasks(plan, task.budget(), ctx, requested_model)
|
||||
.await;
|
||||
|
||||
return AgentResult {
|
||||
success: result.success,
|
||||
output: result.output,
|
||||
cost_cents: total_cost + result.cost_cents,
|
||||
model_used: result.model_used,
|
||||
data: result.data,
|
||||
};
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"NodeAgent '{}' couldn't split, executing directly: {}",
|
||||
self.name,
|
||||
e.output
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Simple task or failed to split: execute directly
|
||||
// Select model
|
||||
ctx.emit_phase(
|
||||
"selecting_model",
|
||||
Some("Choosing model..."),
|
||||
Some(&self.name),
|
||||
);
|
||||
let sel_result = self.model_selector.execute(task, ctx).await;
|
||||
total_cost += sel_result.cost_cents;
|
||||
|
||||
// Execute
|
||||
ctx.emit_phase("executing", Some("Running subtask..."), Some(&self.name));
|
||||
let result = self.task_executor.execute(task, ctx).await;
|
||||
total_cost += result.cost_cents;
|
||||
|
||||
// Store the executor output for verification
|
||||
task.set_last_output(result.output.clone());
|
||||
|
||||
if !result.success {
|
||||
return AgentResult::failure(result.output, total_cost).with_data(json!({
|
||||
"node_name": self.name,
|
||||
"complexity": complexity.score(),
|
||||
"was_split": false,
|
||||
"execution": result.data,
|
||||
}));
|
||||
}
|
||||
|
||||
// Verify
|
||||
ctx.emit_phase("verifying", Some("Checking results..."), Some(&self.name));
|
||||
let verification = self.verifier.execute(task, ctx).await;
|
||||
total_cost += verification.cost_cents;
|
||||
|
||||
if verification.success {
|
||||
AgentResult::success(result.output, total_cost)
|
||||
.with_model(result.model_used.unwrap_or_default())
|
||||
.with_data(json!({
|
||||
"node_name": self.name,
|
||||
"complexity": complexity.score(),
|
||||
"was_split": false,
|
||||
"execution": result.data,
|
||||
"verification": verification.data,
|
||||
}))
|
||||
} else {
|
||||
AgentResult::failure(
|
||||
format!(
|
||||
"Task completed but verification failed: {}",
|
||||
verification.output
|
||||
),
|
||||
total_cost,
|
||||
)
|
||||
.with_data(json!({
|
||||
"node_name": self.name,
|
||||
"complexity": complexity.score(),
|
||||
"was_split": false,
|
||||
"execution": result.data,
|
||||
"verification": verification.data,
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl OrchestratorAgent for NodeAgent {
|
||||
fn children(&self) -> Vec<AgentRef> {
|
||||
let mut children: Vec<AgentRef> = vec![
|
||||
Arc::clone(&self.complexity_estimator) as AgentRef,
|
||||
Arc::clone(&self.model_selector) as AgentRef,
|
||||
Arc::clone(&self.task_executor) as AgentRef,
|
||||
Arc::clone(&self.verifier) as AgentRef,
|
||||
];
|
||||
|
||||
for node in &self.child_nodes {
|
||||
children.push(Arc::clone(node) as AgentRef);
|
||||
}
|
||||
|
||||
children
|
||||
}
|
||||
|
||||
fn find_child(&self, agent_type: AgentType) -> Option<AgentRef> {
|
||||
match agent_type {
|
||||
AgentType::ComplexityEstimator => {
|
||||
Some(Arc::clone(&self.complexity_estimator) as AgentRef)
|
||||
}
|
||||
AgentType::ModelSelector => Some(Arc::clone(&self.model_selector) as AgentRef),
|
||||
AgentType::TaskExecutor => Some(Arc::clone(&self.task_executor) as AgentRef),
|
||||
AgentType::Verifier => Some(Arc::clone(&self.verifier) as AgentRef),
|
||||
AgentType::Node => self.child_nodes.first().map(|n| Arc::clone(n) as AgentRef),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
async fn delegate(&self, task: &mut Task, child: AgentRef, ctx: &AgentContext) -> AgentResult {
|
||||
child.execute(task, ctx).await
|
||||
}
|
||||
|
||||
async fn delegate_all(&self, tasks: &mut [Task], ctx: &AgentContext) -> Vec<AgentResult> {
|
||||
let mut results = Vec::with_capacity(tasks.len());
|
||||
|
||||
for task in tasks {
|
||||
// Use recursive execution for each task
|
||||
let result = self.execute(task, ctx).await;
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
}
|
||||
@@ -1,903 +0,0 @@
|
||||
//! Root agent - top-level orchestrator of the agent tree.
|
||||
//!
|
||||
//! # Responsibilities
|
||||
//! 1. Receive tasks from the API
|
||||
//! 2. Estimate complexity
|
||||
//! 3. Decide: execute directly or split into subtasks
|
||||
//! 4. Delegate to appropriate children
|
||||
//! 5. Aggregate results
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::agents::{
|
||||
Agent, AgentContext, AgentId, AgentRef, AgentResult, AgentType, Complexity,
|
||||
OrchestratorAgent,
|
||||
leaf::{ComplexityEstimator, ModelSelector, TaskExecutor, Verifier},
|
||||
};
|
||||
use crate::agents::tuning::TuningParams;
|
||||
use crate::budget::Budget;
|
||||
use crate::task::{Task, Subtask, SubtaskPlan, VerificationCriteria};
|
||||
|
||||
/// Root agent - the top of the agent tree.
|
||||
///
|
||||
/// # Task Processing Flow
|
||||
/// ```text
|
||||
/// 1. Estimate complexity (ComplexityEstimator)
|
||||
/// 2. If simple: execute directly (TaskExecutor)
|
||||
/// 3. If complex:
|
||||
/// a. Split into subtasks (LLM-based)
|
||||
/// b. Select model for each subtask (ModelSelector)
|
||||
/// c. Execute subtasks (TaskExecutor)
|
||||
/// d. Verify results (Verifier)
|
||||
/// 4. Return aggregated result
|
||||
/// ```
|
||||
pub struct RootAgent {
|
||||
id: AgentId,
|
||||
|
||||
// Child agents
|
||||
complexity_estimator: Arc<ComplexityEstimator>,
|
||||
model_selector: Arc<ModelSelector>,
|
||||
task_executor: Arc<TaskExecutor>,
|
||||
verifier: Arc<Verifier>,
|
||||
}
|
||||
|
||||
impl RootAgent {
|
||||
/// Create a new root agent with default children.
|
||||
pub fn new() -> Self {
|
||||
Self::new_with_tuning(&TuningParams::default())
|
||||
}
|
||||
|
||||
/// Create a new root agent using empirically tuned parameters.
|
||||
pub fn new_with_tuning(tuning: &TuningParams) -> Self {
|
||||
Self {
|
||||
id: AgentId::new(),
|
||||
complexity_estimator: Arc::new(ComplexityEstimator::with_params(
|
||||
tuning.complexity.prompt_variant,
|
||||
tuning.complexity.split_threshold,
|
||||
tuning.complexity.token_multiplier,
|
||||
)),
|
||||
model_selector: Arc::new(ModelSelector::with_params(
|
||||
tuning.model_selector.retry_multiplier,
|
||||
tuning.model_selector.inefficiency_scale,
|
||||
tuning.model_selector.max_failure_probability,
|
||||
)),
|
||||
task_executor: Arc::new(TaskExecutor::new()),
|
||||
verifier: Arc::new(Verifier::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Estimate complexity of a task.
|
||||
async fn estimate_complexity(&self, task: &mut Task, ctx: &AgentContext) -> Complexity {
|
||||
let result = self.complexity_estimator.execute(task, ctx).await;
|
||||
|
||||
if let Some(data) = result.data {
|
||||
let score = data["score"].as_f64().unwrap_or(0.5);
|
||||
let reasoning = data["reasoning"].as_str().unwrap_or("").to_string();
|
||||
let estimated_tokens = data["estimated_tokens"].as_u64().unwrap_or(2000);
|
||||
let should_split = data["should_split"].as_bool().unwrap_or(false);
|
||||
|
||||
Complexity::new(score, reasoning, estimated_tokens)
|
||||
.with_split(should_split)
|
||||
} else {
|
||||
Complexity::moderate("Could not estimate complexity")
|
||||
}
|
||||
}
|
||||
|
||||
/// Split a complex task into subtasks.
|
||||
///
|
||||
/// Uses LLM to analyze the task and propose subtasks.
|
||||
async fn split_task(&self, task: &Task, ctx: &AgentContext) -> Result<SubtaskPlan, AgentResult> {
|
||||
let prompt = format!(
|
||||
r#"You are a task planner. Break down this task into smaller, manageable subtasks.
|
||||
|
||||
Task: {}
|
||||
|
||||
Respond with a JSON object:
|
||||
{{
|
||||
"subtasks": [
|
||||
{{
|
||||
"description": "What to do",
|
||||
"verification": "How to verify it's done",
|
||||
"weight": 1.0,
|
||||
"dependencies": []
|
||||
}}
|
||||
],
|
||||
"reasoning": "Why this breakdown makes sense"
|
||||
}}
|
||||
|
||||
Guidelines:
|
||||
- Each subtask should be independently executable once its dependencies are complete
|
||||
- The "dependencies" array contains indices (0-based) of subtasks that MUST complete before this one can start
|
||||
- For example, if subtask 2 needs subtask 0's output, set "dependencies": [0]
|
||||
- Include verification for each subtask
|
||||
- Weight indicates relative effort (higher = more work)
|
||||
- Keep subtasks focused and specific
|
||||
- IMPORTANT: If subtasks have a logical order (e.g., download before analyze), specify dependencies!
|
||||
|
||||
PREFER COMMAND-LINE APPROACHES:
|
||||
- For downloading files: use curl/wget, NOT browser automation
|
||||
- For Chrome extensions: download CRX directly via URL pattern, then unzip
|
||||
- For file analysis: use grep/find/ripgrep, NOT GUI tools
|
||||
- For web APIs: use curl/fetch_url, NOT browser clicks
|
||||
- Desktop automation is a LAST RESORT only when no CLI option exists
|
||||
|
||||
Respond ONLY with the JSON object."#,
|
||||
task.description()
|
||||
);
|
||||
|
||||
let messages = vec![
|
||||
crate::llm::ChatMessage::new(crate::llm::Role::System, "You are a precise task planner. Respond only with JSON."),
|
||||
crate::llm::ChatMessage::new(crate::llm::Role::User, prompt),
|
||||
];
|
||||
|
||||
let response = ctx.llm
|
||||
.chat_completion("openai/gpt-4.1-mini", &messages, None)
|
||||
.await
|
||||
.map_err(|e| AgentResult::failure(format!("LLM error: {}", e), 1))?;
|
||||
|
||||
let content = response.content.unwrap_or_default();
|
||||
self.parse_subtask_plan(&content, task.id())
|
||||
}
|
||||
|
||||
/// Synthesize a final output from subtask results.
|
||||
///
|
||||
/// # Purpose
|
||||
/// When a task is split into subtasks, this method produces a coherent final
|
||||
/// response by asking the LLM to synthesize all subtask outputs into a single
|
||||
/// answer that addresses the original request.
|
||||
///
|
||||
/// # Fallback
|
||||
/// If LLM synthesis fails, falls back to concatenating subtask outputs.
|
||||
async fn synthesize_final_output(
|
||||
&self,
|
||||
original_task: &str,
|
||||
results: &[AgentResult],
|
||||
ctx: &AgentContext,
|
||||
) -> String {
|
||||
// Collect successful outputs
|
||||
let subtask_outputs: Vec<String> = results
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, r)| r.success)
|
||||
.map(|(i, r)| format!("## Subtask {} Output\n{}", i + 1, r.output))
|
||||
.collect();
|
||||
|
||||
if subtask_outputs.is_empty() {
|
||||
return "All subtasks failed - no output to synthesize.".to_string();
|
||||
}
|
||||
|
||||
// If only one subtask, just return its output directly
|
||||
if subtask_outputs.len() == 1 {
|
||||
return results
|
||||
.iter()
|
||||
.find(|r| r.success)
|
||||
.map(|r| r.output.clone())
|
||||
.unwrap_or_default();
|
||||
}
|
||||
|
||||
let combined_outputs = subtask_outputs.join("\n\n---\n\n");
|
||||
|
||||
let prompt = format!(
|
||||
r#"You have completed a multi-step task. Below are the outputs from each step.
|
||||
|
||||
## Original Request
|
||||
{original_task}
|
||||
|
||||
## Subtask Outputs
|
||||
{combined_outputs}
|
||||
|
||||
## Your Task
|
||||
Synthesize these outputs into a single, coherent response that directly answers the original request.
|
||||
|
||||
Guidelines:
|
||||
- Combine findings into a unified narrative or report
|
||||
- Remove redundancy between subtask outputs
|
||||
- Maintain the format the user requested (e.g., if they asked for a markdown report, provide one)
|
||||
- If subtasks produced code or files, list them clearly
|
||||
- Be comprehensive but concise
|
||||
- Do NOT mention "subtasks" or the internal execution structure - respond as if you did the work yourself"#
|
||||
);
|
||||
|
||||
let messages = vec![
|
||||
crate::llm::ChatMessage::new(
|
||||
crate::llm::Role::System,
|
||||
"You are a helpful assistant that synthesizes work outputs into coherent responses.",
|
||||
),
|
||||
crate::llm::ChatMessage::new(crate::llm::Role::User, prompt),
|
||||
];
|
||||
|
||||
// Use a fast model for synthesis to minimize cost
|
||||
match ctx
|
||||
.llm
|
||||
.chat_completion("openai/gpt-4.1-mini", &messages, None)
|
||||
.await
|
||||
{
|
||||
Ok(response) => response.content.unwrap_or_else(|| {
|
||||
// Fallback: concatenate outputs if synthesis returned empty
|
||||
self.fallback_concatenate_outputs(results)
|
||||
}),
|
||||
Err(e) => {
|
||||
tracing::warn!("Synthesis LLM call failed, using fallback: {}", e);
|
||||
self.fallback_concatenate_outputs(results)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fallback method: concatenate subtask outputs with headers.
|
||||
fn fallback_concatenate_outputs(&self, results: &[AgentResult]) -> String {
|
||||
let outputs: Vec<String> = results
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, r)| r.success && !r.output.is_empty())
|
||||
.map(|(i, r)| format!("## Part {}\n\n{}", i + 1, r.output))
|
||||
.collect();
|
||||
|
||||
if outputs.is_empty() {
|
||||
"Task completed but no output was generated.".to_string()
|
||||
} else {
|
||||
outputs.join("\n\n---\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract JSON from LLM response (handles markdown code blocks).
|
||||
fn extract_json(response: &str) -> String {
|
||||
let trimmed = response.trim();
|
||||
|
||||
// Check for markdown code block
|
||||
if trimmed.starts_with("```") {
|
||||
// Find the end of the opening fence
|
||||
if let Some(start_idx) = trimmed.find('\n') {
|
||||
let after_fence = &trimmed[start_idx + 1..];
|
||||
// Find the closing fence
|
||||
if let Some(end_idx) = after_fence.rfind("```") {
|
||||
return after_fence[..end_idx].trim().to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find JSON object in the response
|
||||
if let Some(start) = trimmed.find('{') {
|
||||
if let Some(end) = trimmed.rfind('}') {
|
||||
if end > start {
|
||||
return trimmed[start..=end].to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return as-is if no extraction needed
|
||||
trimmed.to_string()
|
||||
}
|
||||
|
||||
/// Parse LLM response into SubtaskPlan.
|
||||
fn parse_subtask_plan(
|
||||
&self,
|
||||
response: &str,
|
||||
parent_id: crate::task::TaskId,
|
||||
) -> Result<SubtaskPlan, AgentResult> {
|
||||
let extracted = Self::extract_json(response);
|
||||
let json: serde_json::Value = serde_json::from_str(&extracted)
|
||||
.map_err(|e| AgentResult::failure(format!("Failed to parse subtasks: {} (raw: {}...)", e, response.chars().take(100).collect::<String>()), 0))?;
|
||||
|
||||
let reasoning = json["reasoning"]
|
||||
.as_str()
|
||||
.unwrap_or("No reasoning provided")
|
||||
.to_string();
|
||||
|
||||
let subtasks: Vec<Subtask> = json["subtasks"]
|
||||
.as_array()
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.map(|s| {
|
||||
let desc = s["description"].as_str().unwrap_or("").to_string();
|
||||
let verification = s["verification"].as_str().unwrap_or("");
|
||||
let weight = s["weight"].as_f64().unwrap_or(1.0);
|
||||
|
||||
// Parse dependencies array
|
||||
let dependencies: Vec<usize> = s["dependencies"]
|
||||
.as_array()
|
||||
.map(|deps| {
|
||||
deps.iter()
|
||||
.filter_map(|d| d.as_u64().map(|n| n as usize))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
Subtask::new(
|
||||
desc,
|
||||
VerificationCriteria::llm_based(verification),
|
||||
weight,
|
||||
).with_dependencies(dependencies)
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
if subtasks.is_empty() {
|
||||
return Err(AgentResult::failure("No subtasks generated", 1));
|
||||
}
|
||||
|
||||
SubtaskPlan::new(parent_id, subtasks, reasoning)
|
||||
.map_err(|e| AgentResult::failure(format!("Invalid subtask plan: {}", e), 0))
|
||||
}
|
||||
|
||||
/// Execute subtasks using NodeAgents for recursive processing.
|
||||
///
|
||||
/// Each subtask is handled by a NodeAgent which can:
|
||||
/// - Estimate complexity of the subtask
|
||||
/// - Recursively split if the subtask is still too complex
|
||||
/// - Execute directly if simple enough
|
||||
async fn execute_subtasks(
|
||||
&self,
|
||||
subtask_plan: SubtaskPlan,
|
||||
parent_budget: &Budget,
|
||||
ctx: &AgentContext,
|
||||
) -> AgentResult {
|
||||
use super::NodeAgent;
|
||||
|
||||
// Convert plan to tasks
|
||||
let mut tasks = match subtask_plan.into_tasks(parent_budget) {
|
||||
Ok(t) => t,
|
||||
Err(e) => return AgentResult::failure(format!("Failed to create subtasks: {}", e), 0),
|
||||
};
|
||||
|
||||
let mut results = Vec::new();
|
||||
let mut total_cost = 0u64;
|
||||
|
||||
// Create a child context with reduced split depth for subtasks
|
||||
let child_ctx = ctx.child_context();
|
||||
|
||||
let total_subtasks = tasks.len();
|
||||
|
||||
tracing::info!(
|
||||
"RootAgent executing {} subtasks (child depth: {})",
|
||||
total_subtasks,
|
||||
child_ctx.max_split_depth
|
||||
);
|
||||
|
||||
// Execute each subtask through a NodeAgent (which can recursively split)
|
||||
for (i, task) in tasks.iter_mut().enumerate() {
|
||||
tracing::info!(
|
||||
"RootAgent delegating subtask {}/{}: {}",
|
||||
i + 1,
|
||||
total_subtasks,
|
||||
task.description().chars().take(80).collect::<String>()
|
||||
);
|
||||
|
||||
// Create a NodeAgent for this subtask
|
||||
let node_agent = NodeAgent::new(format!("subtask-{}", i + 1));
|
||||
|
||||
// Execute through the NodeAgent (which may split further if complex)
|
||||
let result = node_agent.execute(task, &child_ctx).await;
|
||||
total_cost += result.cost_cents;
|
||||
|
||||
tracing::info!(
|
||||
"Subtask {}/{} {}: {}",
|
||||
i + 1,
|
||||
total_subtasks,
|
||||
if result.success { "succeeded" } else { "failed" },
|
||||
result.output.chars().take(100).collect::<String>()
|
||||
);
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
// Aggregate results
|
||||
let successes = results.iter().filter(|r| r.success).count();
|
||||
let total = results.len();
|
||||
|
||||
// Concatenate outputs (fallback aggregation for non-tree path)
|
||||
let combined_output = self.fallback_concatenate_outputs(&results);
|
||||
|
||||
if successes == total {
|
||||
AgentResult::success(combined_output, total_cost)
|
||||
.with_data(json!({
|
||||
"subtasks_total": total,
|
||||
"subtasks_succeeded": successes,
|
||||
"recursive_execution": true,
|
||||
"results": results.iter().map(|r| json!({
|
||||
"success": r.success,
|
||||
"output": &r.output,
|
||||
"data": &r.data,
|
||||
})).collect::<Vec<_>>(),
|
||||
}))
|
||||
} else {
|
||||
AgentResult::failure(
|
||||
format!("{}/{} subtasks succeeded\n\n{}", successes, total, combined_output),
|
||||
total_cost,
|
||||
)
|
||||
.with_data(json!({
|
||||
"subtasks_total": total,
|
||||
"subtasks_succeeded": successes,
|
||||
"recursive_execution": true,
|
||||
"results": results.iter().map(|r| json!({
|
||||
"success": r.success,
|
||||
"output": &r.output,
|
||||
"data": &r.data,
|
||||
})).collect::<Vec<_>>(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute subtasks with tree updates for visualization.
|
||||
/// Uses wave-based parallel execution for independent tasks.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `original_task_description`: The user's original request, used for synthesizing the final output
|
||||
async fn execute_subtasks_with_tree(
|
||||
&self,
|
||||
subtask_plan: SubtaskPlan,
|
||||
parent_budget: &Budget,
|
||||
child_ctx: &AgentContext,
|
||||
root_tree: &mut crate::api::control::AgentTreeNode,
|
||||
ctx: &AgentContext,
|
||||
requested_model: Option<&str>,
|
||||
original_task_description: &str,
|
||||
) -> AgentResult {
|
||||
use super::NodeAgent;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
// Get execution waves for parallel processing
|
||||
let waves = match subtask_plan.execution_waves() {
|
||||
Ok(w) => w,
|
||||
Err(e) => return AgentResult::failure(format!("Invalid subtask dependencies: {}", e), 0),
|
||||
};
|
||||
|
||||
let mut tasks = match subtask_plan.into_tasks(parent_budget) {
|
||||
Ok(t) => t,
|
||||
Err(e) => return AgentResult::failure(format!("Failed to create subtasks: {}", e), 0),
|
||||
};
|
||||
|
||||
// Propagate requested_model to all subtasks
|
||||
if let Some(model) = requested_model {
|
||||
for task in &mut tasks {
|
||||
task.analysis_mut().requested_model = Some(model.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let total_subtasks = tasks.len();
|
||||
let num_waves = waves.len();
|
||||
|
||||
tracing::info!(
|
||||
"RootAgent executing {} subtasks in {} wave(s) (child depth: {})",
|
||||
total_subtasks,
|
||||
num_waves,
|
||||
child_ctx.max_split_depth
|
||||
);
|
||||
|
||||
// Wrap tree in Arc<Mutex> for thread-safe parallel updates
|
||||
let tree = Arc::new(Mutex::new(root_tree.clone()));
|
||||
let mut all_results = Vec::new();
|
||||
let mut total_cost = 0u64;
|
||||
|
||||
// Execute each wave in parallel
|
||||
for (wave_idx, wave) in waves.iter().enumerate() {
|
||||
let is_parallel = wave.len() > 1;
|
||||
|
||||
tracing::info!(
|
||||
"RootAgent wave {}/{}: {} task(s) {}",
|
||||
wave_idx + 1,
|
||||
num_waves,
|
||||
wave.len(),
|
||||
if is_parallel { "(parallel)" } else { "(sequential)" }
|
||||
);
|
||||
|
||||
// Mark all tasks in this wave as running
|
||||
{
|
||||
let mut tree_guard = tree.lock().await;
|
||||
for &idx in wave {
|
||||
let subtask_id = format!("subtask-{}", idx + 1);
|
||||
if let Some(node) = tree_guard.children.iter_mut().find(|n| n.id == subtask_id) {
|
||||
node.status = "running".to_string();
|
||||
}
|
||||
}
|
||||
ctx.emit_tree(tree_guard.clone());
|
||||
}
|
||||
|
||||
// Execute tasks in this wave in parallel
|
||||
let wave_futures: Vec<_> = wave.iter().map(|&idx| {
|
||||
let subtask_id = format!("subtask-{}", idx + 1);
|
||||
let task = tasks[idx].clone();
|
||||
let child_ctx = child_ctx.clone();
|
||||
let tree = Arc::clone(&tree);
|
||||
let ctx = ctx.clone();
|
||||
|
||||
async move {
|
||||
let mut task = task;
|
||||
let node_agent = NodeAgent::new(subtask_id.clone());
|
||||
|
||||
tracing::info!(
|
||||
"RootAgent delegating subtask {}: {}",
|
||||
subtask_id,
|
||||
task.description().chars().take(80).collect::<String>()
|
||||
);
|
||||
|
||||
// Execute (without tree updates for parallel - update after)
|
||||
let result = node_agent.execute(&mut task, &child_ctx).await;
|
||||
|
||||
// Update tree with result
|
||||
{
|
||||
let mut tree_guard = tree.lock().await;
|
||||
if let Some(node) = tree_guard.children.iter_mut().find(|n| n.id == subtask_id) {
|
||||
node.status = if result.success { "completed".to_string() } else { "failed".to_string() };
|
||||
node.budget_spent = result.cost_cents;
|
||||
}
|
||||
ctx.emit_tree(tree_guard.clone());
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Subtask {} {}: {}",
|
||||
subtask_id,
|
||||
if result.success { "succeeded" } else { "failed" },
|
||||
result.output.chars().take(100).collect::<String>()
|
||||
);
|
||||
|
||||
(idx, result)
|
||||
}
|
||||
}).collect();
|
||||
|
||||
// Wait for all tasks in wave to complete
|
||||
let wave_results = futures::future::join_all(wave_futures).await;
|
||||
|
||||
for (idx, result) in wave_results {
|
||||
total_cost += result.cost_cents;
|
||||
// Store result at correct index
|
||||
while all_results.len() <= idx {
|
||||
all_results.push(None);
|
||||
}
|
||||
all_results[idx] = Some(result);
|
||||
}
|
||||
|
||||
// Emit progress update after each wave
|
||||
let completed = all_results.iter().filter(|r| r.is_some()).count();
|
||||
let current_subtask = if wave_idx + 1 < num_waves {
|
||||
// Next wave's first task description
|
||||
waves.get(wave_idx + 1).and_then(|w| w.first()).map(|&idx| {
|
||||
tasks.get(idx).map(|t| t.description().chars().take(50).collect::<String>())
|
||||
}).flatten()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
ctx.emit_progress(total_subtasks, completed, current_subtask, 1);
|
||||
}
|
||||
|
||||
// Collect results in order
|
||||
let results: Vec<AgentResult> = all_results.into_iter().filter_map(|r| r).collect();
|
||||
|
||||
// Update the original tree from our Arc<Mutex> version
|
||||
*root_tree = tree.lock().await.clone();
|
||||
|
||||
// Update verifier to running (repurposed as "synthesizer" for complex tasks)
|
||||
if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "verifier") {
|
||||
node.status = "running".to_string();
|
||||
node.description = "Synthesizing final output...".to_string();
|
||||
}
|
||||
ctx.emit_tree(root_tree.clone());
|
||||
|
||||
// Aggregate results
|
||||
let successes = results.iter().filter(|r| r.success).count();
|
||||
let total = results.len();
|
||||
|
||||
// Synthesize final output from all subtask results
|
||||
let synthesized_output = if successes > 0 {
|
||||
self.synthesize_final_output(original_task_description, &results, ctx).await
|
||||
} else {
|
||||
format!("{}/{} subtasks succeeded ({} waves)", successes, total, num_waves)
|
||||
};
|
||||
|
||||
// Update verifier/synthesizer to completed
|
||||
if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "verifier") {
|
||||
node.status = if successes == total { "completed".to_string() } else { "failed".to_string() };
|
||||
node.budget_spent = 5;
|
||||
}
|
||||
ctx.emit_tree(root_tree.clone());
|
||||
|
||||
if successes == total {
|
||||
AgentResult::success(synthesized_output, total_cost)
|
||||
.with_data(json!({
|
||||
"subtasks_total": total,
|
||||
"subtasks_succeeded": successes,
|
||||
"recursive_execution": true,
|
||||
"parallel_waves": num_waves,
|
||||
"results": results.iter().map(|r| json!({
|
||||
"success": r.success,
|
||||
"output": &r.output,
|
||||
"data": &r.data,
|
||||
})).collect::<Vec<_>>(),
|
||||
}))
|
||||
} else {
|
||||
AgentResult::failure(
|
||||
format!("{}/{} subtasks succeeded ({} waves)\n\n{}", successes, total, num_waves, synthesized_output),
|
||||
total_cost,
|
||||
)
|
||||
.with_data(json!({
|
||||
"subtasks_total": total,
|
||||
"subtasks_succeeded": successes,
|
||||
"recursive_execution": true,
|
||||
"parallel_waves": num_waves,
|
||||
"results": results.iter().map(|r| json!({
|
||||
"success": r.success,
|
||||
"output": &r.output,
|
||||
"data": &r.data,
|
||||
})).collect::<Vec<_>>(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RootAgent {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Agent for RootAgent {
|
||||
fn id(&self) -> &AgentId {
|
||||
&self.id
|
||||
}
|
||||
|
||||
fn agent_type(&self) -> AgentType {
|
||||
AgentType::Root
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Root orchestrator: estimates complexity, splits tasks, delegates execution"
|
||||
}
|
||||
|
||||
async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
|
||||
use crate::api::control::AgentTreeNode;
|
||||
|
||||
let mut total_cost = 0u64;
|
||||
let task_desc = task.description().chars().take(60).collect::<String>();
|
||||
let budget_cents = task.budget().total_cents();
|
||||
|
||||
// Build initial tree structure
|
||||
let mut root_tree = AgentTreeNode::new("root", "Root", "Root Agent", &task_desc)
|
||||
.with_budget(budget_cents, 0)
|
||||
.with_status("running");
|
||||
|
||||
// Add child agent nodes
|
||||
root_tree.add_child(
|
||||
AgentTreeNode::new("complexity", "ComplexityEstimator", "Complexity Estimator", "Analyzing task difficulty")
|
||||
.with_budget(10, 0)
|
||||
.with_status("running")
|
||||
);
|
||||
ctx.emit_tree(root_tree.clone());
|
||||
|
||||
// Step 1: Estimate complexity
|
||||
ctx.emit_phase("estimating_complexity", Some("Analyzing task difficulty..."), Some("RootAgent"));
|
||||
let complexity = self.estimate_complexity(task, ctx).await;
|
||||
total_cost += 1;
|
||||
|
||||
// Update complexity node
|
||||
if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "complexity") {
|
||||
node.status = "completed".to_string();
|
||||
node.complexity = Some(complexity.score());
|
||||
node.budget_spent = 5;
|
||||
}
|
||||
ctx.emit_tree(root_tree.clone());
|
||||
|
||||
tracing::info!(
|
||||
"Task complexity: {:.2} (should_split: {})",
|
||||
complexity.score(),
|
||||
complexity.should_split()
|
||||
);
|
||||
|
||||
// Step 2: Decide execution strategy
|
||||
if complexity.should_split() && ctx.can_split() {
|
||||
ctx.emit_phase("splitting_task", Some("Decomposing into subtasks..."), Some("RootAgent"));
|
||||
match self.split_task(task, ctx).await {
|
||||
Ok(plan) => {
|
||||
total_cost += 2;
|
||||
|
||||
// Add subtask nodes to tree
|
||||
for (i, subtask) in plan.subtasks().iter().enumerate() {
|
||||
let subtask_node = AgentTreeNode::new(
|
||||
&format!("subtask-{}", i + 1),
|
||||
"Node",
|
||||
&format!("Subtask {}", i + 1),
|
||||
&subtask.description.chars().take(50).collect::<String>(),
|
||||
)
|
||||
.with_budget(budget_cents / plan.subtasks().len() as u64, 0)
|
||||
.with_status("pending");
|
||||
root_tree.add_child(subtask_node);
|
||||
}
|
||||
|
||||
// Add verifier node
|
||||
root_tree.add_child(
|
||||
AgentTreeNode::new("verifier", "Verifier", "Verifier", "Verify task completion")
|
||||
.with_budget(80, 0)
|
||||
.with_status("pending")
|
||||
);
|
||||
ctx.emit_tree(root_tree.clone());
|
||||
|
||||
// Execute subtasks with tree updates
|
||||
let child_ctx = ctx.child_context();
|
||||
let requested_model = task.analysis().requested_model.as_deref();
|
||||
let original_task_desc = task.description();
|
||||
let result = self.execute_subtasks_with_tree(plan, task.budget(), &child_ctx, &mut root_tree, ctx, requested_model, original_task_desc).await;
|
||||
|
||||
// Update root status
|
||||
root_tree.status = if result.success { "completed".to_string() } else { "failed".to_string() };
|
||||
root_tree.budget_spent = total_cost + result.cost_cents;
|
||||
ctx.emit_tree(root_tree);
|
||||
|
||||
return AgentResult {
|
||||
success: result.success,
|
||||
output: result.output,
|
||||
cost_cents: total_cost + result.cost_cents,
|
||||
model_used: result.model_used,
|
||||
data: result.data,
|
||||
};
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Couldn't split task, executing directly: {}", e.output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Simple task: add remaining nodes
|
||||
// Check if there's a model override - show it immediately on the node
|
||||
let requested_model = task.analysis().requested_model.clone();
|
||||
let model_selector_node = if let Some(ref model) = requested_model {
|
||||
AgentTreeNode::new("model-selector", "ModelSelector", "Model Selector", &format!("Using: {}", model))
|
||||
.with_budget(10, 0)
|
||||
.with_status("running")
|
||||
.with_model(model)
|
||||
} else {
|
||||
AgentTreeNode::new("model-selector", "ModelSelector", "Model Selector", "Selecting optimal model")
|
||||
.with_budget(10, 0)
|
||||
.with_status("running")
|
||||
};
|
||||
root_tree.add_child(model_selector_node);
|
||||
ctx.emit_tree(root_tree.clone());
|
||||
|
||||
ctx.emit_phase("selecting_model", Some("Choosing optimal model..."), Some("RootAgent"));
|
||||
|
||||
let has_benchmarks = if let Some(b) = &ctx.benchmarks {
|
||||
let registry = b.read().await;
|
||||
registry.benchmark_count() > 0
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
let selected_model = if has_benchmarks {
|
||||
let sel_result = self.model_selector.execute(task, ctx).await;
|
||||
total_cost += sel_result.cost_cents;
|
||||
// Model already resolved by ModelSelector
|
||||
task.analysis().selected_model.clone().unwrap_or_else(|| ctx.config.default_model.clone())
|
||||
} else {
|
||||
// No benchmarks - resolve default model to latest version
|
||||
let default_model = if let Some(resolver) = &ctx.resolver {
|
||||
let resolver = resolver.read().await;
|
||||
let resolved = resolver.resolve(&ctx.config.default_model);
|
||||
if resolved.upgraded {
|
||||
tracing::info!(
|
||||
"RootAgent: default model auto-upgraded: {} → {}",
|
||||
resolved.original, resolved.resolved
|
||||
);
|
||||
}
|
||||
resolved.resolved
|
||||
} else {
|
||||
ctx.config.default_model.clone()
|
||||
};
|
||||
let a = task.analysis_mut();
|
||||
a.selected_model = Some(default_model.clone());
|
||||
default_model
|
||||
};
|
||||
|
||||
// Update model selector node with final selected model
|
||||
if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "model-selector") {
|
||||
node.status = "completed".to_string();
|
||||
node.selected_model = Some(selected_model.clone());
|
||||
node.description = format!("Using: {}", selected_model);
|
||||
node.budget_spent = 3;
|
||||
}
|
||||
|
||||
// Add executor and verifier nodes
|
||||
root_tree.add_child(
|
||||
AgentTreeNode::new("executor", "TaskExecutor", "Task Executor", "Executing task")
|
||||
.with_budget(budget_cents - 100, 0)
|
||||
.with_status("running")
|
||||
);
|
||||
root_tree.add_child(
|
||||
AgentTreeNode::new("verifier", "Verifier", "Verifier", "Verify task completion")
|
||||
.with_budget(80, 0)
|
||||
.with_status("pending")
|
||||
);
|
||||
ctx.emit_tree(root_tree.clone());
|
||||
|
||||
ctx.emit_phase("executing", Some("Running task..."), Some("RootAgent"));
|
||||
let result = self.task_executor.execute(task, ctx).await;
|
||||
|
||||
// Update executor node
|
||||
if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "executor") {
|
||||
node.status = if result.success { "completed".to_string() } else { "failed".to_string() };
|
||||
node.budget_spent = result.cost_cents;
|
||||
}
|
||||
ctx.emit_tree(root_tree.clone());
|
||||
|
||||
// Store the executor output for verification
|
||||
task.set_last_output(result.output.clone());
|
||||
|
||||
// Step 3: Verify
|
||||
if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "verifier") {
|
||||
node.status = "running".to_string();
|
||||
}
|
||||
ctx.emit_tree(root_tree.clone());
|
||||
|
||||
ctx.emit_phase("verifying", Some("Checking results..."), Some("RootAgent"));
|
||||
let verification = self.verifier.execute(task, ctx).await;
|
||||
total_cost += verification.cost_cents;
|
||||
|
||||
// Update verifier node
|
||||
if let Some(node) = root_tree.children.iter_mut().find(|n| n.id == "verifier") {
|
||||
node.status = if verification.success { "completed".to_string() } else { "failed".to_string() };
|
||||
node.budget_spent = verification.cost_cents;
|
||||
}
|
||||
|
||||
// Update root status
|
||||
root_tree.status = if result.success && verification.success { "completed".to_string() } else { "failed".to_string() };
|
||||
root_tree.budget_spent = total_cost + result.cost_cents;
|
||||
ctx.emit_tree(root_tree);
|
||||
|
||||
AgentResult {
|
||||
success: result.success && verification.success,
|
||||
output: if verification.success {
|
||||
result.output
|
||||
} else {
|
||||
format!("{}\n\nVerification failed: {}", result.output, verification.output)
|
||||
},
|
||||
cost_cents: total_cost + result.cost_cents,
|
||||
model_used: result.model_used,
|
||||
data: json!({
|
||||
"complexity": complexity.score(),
|
||||
"was_split": false,
|
||||
"verification": verification.data,
|
||||
"execution": result.data,
|
||||
}).into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl OrchestratorAgent for RootAgent {
|
||||
fn children(&self) -> Vec<AgentRef> {
|
||||
vec![
|
||||
Arc::clone(&self.complexity_estimator) as AgentRef,
|
||||
Arc::clone(&self.model_selector) as AgentRef,
|
||||
Arc::clone(&self.task_executor) as AgentRef,
|
||||
Arc::clone(&self.verifier) as AgentRef,
|
||||
]
|
||||
}
|
||||
|
||||
fn find_child(&self, agent_type: AgentType) -> Option<AgentRef> {
|
||||
match agent_type {
|
||||
AgentType::ComplexityEstimator => Some(Arc::clone(&self.complexity_estimator) as AgentRef),
|
||||
AgentType::ModelSelector => Some(Arc::clone(&self.model_selector) as AgentRef),
|
||||
AgentType::TaskExecutor => Some(Arc::clone(&self.task_executor) as AgentRef),
|
||||
AgentType::Verifier => Some(Arc::clone(&self.verifier) as AgentRef),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
async fn delegate(&self, task: &mut Task, child: AgentRef, ctx: &AgentContext) -> AgentResult {
|
||||
child.execute(task, ctx).await
|
||||
}
|
||||
|
||||
async fn delegate_all(&self, tasks: &mut [Task], ctx: &AgentContext) -> Vec<AgentResult> {
|
||||
let mut results = Vec::with_capacity(tasks.len());
|
||||
|
||||
for task in tasks {
|
||||
let result = self.task_executor.execute(task, ctx).await;
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
177
src/agents/simple.rs
Normal file
177
src/agents/simple.rs
Normal file
@@ -0,0 +1,177 @@
|
||||
//! Simple agent - streamlined single-agent executor.
|
||||
//!
|
||||
//! Replaces the complex RootAgent → NodeAgent → ComplexityEstimator → ModelSelector → TaskExecutor → Verifier
|
||||
//! hierarchy with a single agent that directly executes tasks.
|
||||
//!
|
||||
//! # Why SimpleAgent?
|
||||
//! The multi-agent hierarchy added overhead without reliable benefits:
|
||||
//! - ComplexityEstimator: LLM-based estimation was unreliable
|
||||
//! - ModelSelector: U-curve optimization rarely matched simple "use default" strategy
|
||||
//! - NodeAgent: Recursive splitting lost context and produced worse results
|
||||
//! - Verifier: Rubber-stamped everything (LLMs are bad at self-verification)
|
||||
//!
|
||||
//! # Design
|
||||
//! - Direct model selection: mission override > config default
|
||||
//! - No automatic task splitting (user controls granularity)
|
||||
//! - Built-in blocker detection via system prompt
|
||||
//! - Mission completion via complete_mission tool
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::agents::{
|
||||
Agent, AgentContext, AgentId, AgentResult, AgentType,
|
||||
leaf::TaskExecutor,
|
||||
};
|
||||
use crate::api::control::AgentTreeNode;
|
||||
use crate::task::Task;
|
||||
|
||||
/// Simple agent - unified executor without orchestration overhead.
|
||||
///
|
||||
/// # Execution Flow
|
||||
/// 1. Resolve model (mission override or config default)
|
||||
/// 2. Build tree for visualization
|
||||
/// 3. Execute task via TaskExecutor
|
||||
/// 4. Return result (no verification layer)
|
||||
pub struct SimpleAgent {
|
||||
id: AgentId,
|
||||
task_executor: Arc<TaskExecutor>,
|
||||
}
|
||||
|
||||
impl SimpleAgent {
|
||||
/// Create a new simple agent.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
id: AgentId::new(),
|
||||
task_executor: Arc::new(TaskExecutor::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve the model to use for execution.
|
||||
///
|
||||
/// Priority:
|
||||
/// 1. Task's requested model (from mission override)
|
||||
/// 2. Config default model (auto-upgraded via resolver)
|
||||
async fn resolve_model(&self, task: &Task, ctx: &AgentContext) -> String {
|
||||
// Check for explicit model request (from mission override)
|
||||
if let Some(requested) = &task.analysis().requested_model {
|
||||
// Resolve to latest version if using resolver
|
||||
if let Some(resolver) = &ctx.resolver {
|
||||
let resolver = resolver.read().await;
|
||||
let resolved = resolver.resolve(requested);
|
||||
if resolved.upgraded {
|
||||
tracing::info!(
|
||||
"SimpleAgent: requested model auto-upgraded: {} → {}",
|
||||
resolved.original, resolved.resolved
|
||||
);
|
||||
}
|
||||
return resolved.resolved;
|
||||
}
|
||||
return requested.clone();
|
||||
}
|
||||
|
||||
// Fall back to config default, resolved to latest version
|
||||
if let Some(resolver) = &ctx.resolver {
|
||||
let resolver = resolver.read().await;
|
||||
let resolved = resolver.resolve(&ctx.config.default_model);
|
||||
if resolved.upgraded {
|
||||
tracing::info!(
|
||||
"SimpleAgent: default model auto-upgraded: {} → {}",
|
||||
resolved.original, resolved.resolved
|
||||
);
|
||||
}
|
||||
resolved.resolved
|
||||
} else {
|
||||
ctx.config.default_model.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a simple agent tree for visualization.
|
||||
fn build_tree(&self, task_desc: &str, budget_cents: u64, model: &str) -> AgentTreeNode {
|
||||
let mut root = AgentTreeNode::new("root", "Simple", "Simple Agent", task_desc)
|
||||
.with_budget(budget_cents, 0)
|
||||
.with_status("running");
|
||||
|
||||
// Add executor node
|
||||
root.add_child(
|
||||
AgentTreeNode::new("executor", "TaskExecutor", "Task Executor", "Executing task")
|
||||
.with_budget(budget_cents, 0)
|
||||
.with_status("running")
|
||||
.with_model(model)
|
||||
);
|
||||
|
||||
root
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SimpleAgent {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Agent for SimpleAgent {
|
||||
fn id(&self) -> &AgentId {
|
||||
&self.id
|
||||
}
|
||||
|
||||
fn agent_type(&self) -> AgentType {
|
||||
AgentType::Root // Presents as Root for compatibility with tree visualization
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Simple agent: direct task execution without orchestration overhead"
|
||||
}
|
||||
|
||||
async fn execute(&self, task: &mut Task, ctx: &AgentContext) -> AgentResult {
|
||||
let task_desc = task.description().chars().take(60).collect::<String>();
|
||||
let budget_cents = task.budget().total_cents();
|
||||
|
||||
// Step 1: Resolve model
|
||||
let model = self.resolve_model(task, ctx).await;
|
||||
|
||||
// Update task analysis with selected model
|
||||
task.analysis_mut().selected_model = Some(model.clone());
|
||||
|
||||
tracing::info!(
|
||||
"SimpleAgent executing task with model '{}': {}...",
|
||||
model,
|
||||
task_desc
|
||||
);
|
||||
|
||||
// Step 2: Build and emit tree
|
||||
let mut tree = self.build_tree(&task_desc, budget_cents, &model);
|
||||
ctx.emit_tree(tree.clone());
|
||||
|
||||
// Step 3: Emit phase (for frontend progress indicator)
|
||||
ctx.emit_phase("executing", Some("Running task..."), Some("SimpleAgent"));
|
||||
|
||||
// Step 4: Execute via TaskExecutor
|
||||
let result = self.task_executor.execute(task, ctx).await;
|
||||
|
||||
// Step 5: Update tree with result
|
||||
if let Some(node) = tree.children.iter_mut().find(|n| n.id == "executor") {
|
||||
node.status = if result.success { "completed".to_string() } else { "failed".to_string() };
|
||||
node.budget_spent = result.cost_cents;
|
||||
}
|
||||
tree.status = if result.success { "completed".to_string() } else { "failed".to_string() };
|
||||
tree.budget_spent = result.cost_cents;
|
||||
ctx.emit_tree(tree);
|
||||
|
||||
// Step 6: Return result with metadata
|
||||
AgentResult {
|
||||
success: result.success,
|
||||
output: result.output,
|
||||
cost_cents: result.cost_cents,
|
||||
model_used: result.model_used.or(Some(model)),
|
||||
data: Some(json!({
|
||||
"agent": "SimpleAgent",
|
||||
"execution": result.data,
|
||||
})),
|
||||
terminal_reason: result.terminal_reason,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,49 +1,24 @@
|
||||
//! Empirical tuning parameters for agent heuristics.
|
||||
//! Tuning parameters (legacy).
|
||||
//!
|
||||
//! This module exists to support **trial-and-error calibration**:
|
||||
//! we run tasks, compare predicted vs actual usage/cost, and update parameters.
|
||||
//!
|
||||
//! The core agent logic should remain correct even if tuning values are absent
|
||||
//! (defaults apply).
|
||||
//! This module is kept for backwards compatibility but is largely unused
|
||||
//! since SimpleAgent doesn't require tuning.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::agents::leaf::ComplexityPromptVariant;
|
||||
|
||||
/// Top-level tuning parameters.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
/// Top-level tuning parameters (legacy).
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct TuningParams {
|
||||
pub complexity: ComplexityTuning,
|
||||
pub model_selector: ModelSelectorTuning,
|
||||
}
|
||||
|
||||
impl Default for TuningParams {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
complexity: ComplexityTuning::default(),
|
||||
model_selector: ModelSelectorTuning::default(),
|
||||
}
|
||||
}
|
||||
// Empty - SimpleAgent doesn't use tuning
|
||||
}
|
||||
|
||||
impl TuningParams {
|
||||
/// Load tuning parameters from the working directory, if present.
|
||||
///
|
||||
/// # Path
|
||||
/// `{working_dir}/.open_agent/tuning.json`
|
||||
pub async fn load_from_working_dir(working_dir: &Path) -> Self {
|
||||
let path = working_dir.join(".open_agent").join("tuning.json");
|
||||
match tokio::fs::read_to_string(&path).await {
|
||||
Ok(s) => serde_json::from_str::<TuningParams>(&s).unwrap_or_default(),
|
||||
Err(_) => TuningParams::default(),
|
||||
}
|
||||
pub async fn load_from_working_dir(_working_dir: &Path) -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Save tuning parameters to the working directory.
|
||||
///
|
||||
/// # Postcondition
|
||||
/// If successful, subsequent `load_from_working_dir` returns an equivalent value.
|
||||
pub async fn save_to_working_dir(&self, working_dir: &Path) -> anyhow::Result<PathBuf> {
|
||||
let dir = working_dir.join(".open_agent");
|
||||
tokio::fs::create_dir_all(&dir).await?;
|
||||
@@ -53,44 +28,3 @@ impl TuningParams {
|
||||
Ok(path)
|
||||
}
|
||||
}
|
||||
|
||||
/// Tuning parameters for ComplexityEstimator.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ComplexityTuning {
|
||||
pub prompt_variant: ComplexityPromptVariant,
|
||||
pub split_threshold: f64,
|
||||
pub token_multiplier: f64,
|
||||
}
|
||||
|
||||
impl Default for ComplexityTuning {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
prompt_variant: ComplexityPromptVariant::CalibratedV2,
|
||||
split_threshold: 0.60,
|
||||
token_multiplier: 1.00,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Tuning parameters for ModelSelector.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ModelSelectorTuning {
|
||||
/// Retry multiplier cost penalty for failures.
|
||||
pub retry_multiplier: f64,
|
||||
/// Token inefficiency scaling for weaker models.
|
||||
pub inefficiency_scale: f64,
|
||||
/// Cap for failure probability.
|
||||
pub max_failure_probability: f64,
|
||||
}
|
||||
|
||||
impl Default for ModelSelectorTuning {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
retry_multiplier: 1.5,
|
||||
inefficiency_scale: 0.5,
|
||||
max_failure_probability: 0.9,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -60,6 +60,25 @@ impl AgentType {
|
||||
}
|
||||
}
|
||||
|
||||
/// Reason why agent execution terminated (for non-successful completions).
|
||||
///
|
||||
/// Used to determine whether auto-complete should trigger, avoiding substring matching.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum TerminalReason {
|
||||
/// Agent hit the maximum iteration limit
|
||||
MaxIterations,
|
||||
/// Agent was cancelled by user
|
||||
Cancelled,
|
||||
/// Budget was exhausted
|
||||
BudgetExhausted,
|
||||
/// Agent stalled (no progress, timeouts)
|
||||
Stalled,
|
||||
/// Agent got stuck in an infinite loop
|
||||
InfiniteLoop,
|
||||
/// LLM API error
|
||||
LlmError,
|
||||
}
|
||||
|
||||
/// Result of an agent executing a task.
|
||||
///
|
||||
/// # Invariants
|
||||
@@ -81,6 +100,11 @@ pub struct AgentResult {
|
||||
|
||||
/// Detailed result data (type-specific)
|
||||
pub data: Option<serde_json::Value>,
|
||||
|
||||
/// If execution ended due to a terminal condition (not normal completion),
|
||||
/// this indicates why. Used by auto-complete logic to avoid substring matching.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub terminal_reason: Option<TerminalReason>,
|
||||
}
|
||||
|
||||
impl AgentResult {
|
||||
@@ -92,6 +116,7 @@ impl AgentResult {
|
||||
cost_cents,
|
||||
model_used: None,
|
||||
data: None,
|
||||
terminal_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,6 +128,7 @@ impl AgentResult {
|
||||
cost_cents,
|
||||
model_used: None,
|
||||
data: None,
|
||||
terminal_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,6 +143,12 @@ impl AgentResult {
|
||||
self.data = Some(data);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the terminal reason (why execution ended abnormally).
|
||||
pub fn with_terminal_reason(mut self, reason: TerminalReason) -> Self {
|
||||
self.terminal_reason = Some(reason);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Complexity estimation for a task.
|
||||
|
||||
@@ -1740,7 +1740,7 @@ async fn control_actor_loop(
|
||||
.map(|e| (e.role.clone(), e.content.clone()))
|
||||
.collect();
|
||||
*current_mission.write().await = Some(mission_id);
|
||||
|
||||
|
||||
// Update mission status back to active
|
||||
if let Some(mem) = &memory {
|
||||
let _ = mem.supabase.update_mission_status(mission_id, "active").await;
|
||||
@@ -1947,6 +1947,56 @@ async fn control_actor_loop(
|
||||
// Persist to mission
|
||||
persist_mission_history(&memory, ¤t_mission, &history).await;
|
||||
|
||||
// P1 FIX: Auto-complete mission if agent execution ended in a terminal state
|
||||
// without an explicit complete_mission call.
|
||||
// This prevents missions from staying "active" forever after max iterations, stalls, etc.
|
||||
//
|
||||
// We use terminal_reason (structured enum) instead of substring matching to avoid
|
||||
// false positives when agent output legitimately contains words like "infinite loop".
|
||||
// We also check the current mission status from DB to handle:
|
||||
// - Explicit complete_mission calls (which update DB status)
|
||||
// - Parallel missions (each has its own DB status)
|
||||
if agent_result.terminal_reason.is_some() {
|
||||
if let Some(mem) = &memory {
|
||||
if let Some(mission_id) = current_mission.read().await.clone() {
|
||||
// Check current mission status from DB - only auto-complete if still "active"
|
||||
let current_status = mem.supabase.get_mission(mission_id).await
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|m| m.status);
|
||||
|
||||
if current_status.as_deref() == Some("active") {
|
||||
let status = if agent_result.success { "completed" } else { "failed" };
|
||||
tracing::info!(
|
||||
"Auto-completing mission {} with status '{}' (terminal_reason: {:?})",
|
||||
mission_id, status, agent_result.terminal_reason
|
||||
);
|
||||
if let Err(e) = mem.supabase.update_mission_status(mission_id, status).await {
|
||||
tracing::warn!("Failed to auto-complete mission: {}", e);
|
||||
} else {
|
||||
// Emit status change event
|
||||
let new_status = if agent_result.success {
|
||||
MissionStatus::Completed
|
||||
} else {
|
||||
MissionStatus::Failed
|
||||
};
|
||||
let _ = events_tx.send(AgentEvent::MissionStatusChanged {
|
||||
mission_id,
|
||||
status: new_status,
|
||||
summary: Some(format!("Auto-completed: {}",
|
||||
agent_result.output.chars().take(100).collect::<String>())),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
tracing::debug!(
|
||||
"Skipping auto-complete: mission {} already has status {:?}",
|
||||
mission_id, current_status
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let _ = events_tx.send(AgentEvent::AssistantMessage {
|
||||
id: Uuid::new_v4(),
|
||||
content: agent_result.output.clone(),
|
||||
|
||||
@@ -220,6 +220,16 @@ pub async fn upload(
|
||||
format!("{}/{}", q.path, file_name)
|
||||
};
|
||||
|
||||
// Ensure the target directory exists (mkdir -p is idempotent)
|
||||
let target_dir = if q.path.ends_with('/') {
|
||||
q.path.trim_end_matches('/').to_string()
|
||||
} else {
|
||||
q.path.clone()
|
||||
};
|
||||
ssh_exec(&cfg, key_file.path(), "mkdir", &["-p".into(), target_dir])
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to create directory: {}", e)))?;
|
||||
|
||||
let batch = format!("put -p \"{}\" \"{}\"\n", tmp.to_string_lossy(), remote_path);
|
||||
sftp_batch(&cfg, key_file.path(), &batch)
|
||||
.await
|
||||
|
||||
@@ -21,8 +21,8 @@ use tower_http::cors::CorsLayer;
|
||||
use tower_http::trace::TraceLayer;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::agents::orchestrator::RootAgent;
|
||||
use crate::agents::{AgentContext, AgentRef, TuningParams};
|
||||
use crate::agents::SimpleAgent;
|
||||
use crate::agents::{AgentContext, AgentRef};
|
||||
use crate::budget::ModelPricing;
|
||||
use crate::config::Config;
|
||||
use crate::llm::OpenRouterClient;
|
||||
@@ -41,7 +41,7 @@ use super::types::*;
|
||||
pub struct AppState {
|
||||
pub config: Config,
|
||||
pub tasks: RwLock<HashMap<Uuid, TaskState>>,
|
||||
/// The hierarchical root agent
|
||||
/// The agent used for task execution
|
||||
pub root_agent: AgentRef,
|
||||
/// Memory system (optional)
|
||||
pub memory: Option<MemorySystem>,
|
||||
@@ -57,11 +57,8 @@ pub struct AppState {
|
||||
|
||||
/// Start the HTTP server.
|
||||
pub async fn serve(config: Config) -> anyhow::Result<()> {
|
||||
// Load empirically tuned parameters (if present in working directory)
|
||||
let tuning = TuningParams::load_from_working_dir(&config.working_dir).await;
|
||||
|
||||
// Create the root agent (hierarchical)
|
||||
let root_agent: AgentRef = Arc::new(RootAgent::new_with_tuning(&tuning));
|
||||
// Create the simple agent (replaces complex RootAgent hierarchy)
|
||||
let root_agent: AgentRef = Arc::new(SimpleAgent::new());
|
||||
|
||||
// Initialize memory system (optional - needs Supabase config)
|
||||
let memory = memory::init_memory(&config.memory, &config.api_key).await;
|
||||
|
||||
@@ -1,244 +0,0 @@
|
||||
//! Calibration harness for Open Agent estimators.
|
||||
//!
|
||||
//! This binary runs trial tasks in a temporary directory and measures:
|
||||
//! - ComplexityEstimator: predicted tokens vs actual tokens used by TaskExecutor
|
||||
//! - Split decision quality (against a small labeled set)
|
||||
//!
|
||||
//! The goal is *empirical tuning* by trial-and-error, while keeping the core
|
||||
//! agent code maintainable and (eventually) provable.
|
||||
//!
|
||||
//! ## Usage
|
||||
//!
|
||||
//! ```bash
|
||||
//! export OPENROUTER_API_KEY="..."
|
||||
//! cargo run --release --bin calibrate -- --working-dir /tmp/open_agent_calibration --model openai/gpt-4.1-mini
|
||||
//! ```
|
||||
//!
|
||||
//! Notes:
|
||||
//! - This will create and delete files under the given directory.
|
||||
//! - Costs real money. Keep the task set small.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use open_agent::agents::leaf::{ComplexityEstimator, ComplexityPromptVariant, TaskExecutor};
|
||||
use open_agent::agents::{Agent, AgentContext};
|
||||
use open_agent::budget::ModelPricing;
|
||||
use open_agent::config::Config;
|
||||
use open_agent::llm::OpenRouterClient;
|
||||
use open_agent::task::{Task, VerificationCriteria};
|
||||
use open_agent::tools::ToolRegistry;
|
||||
use open_agent::agents::tuning::{TuningParams, ComplexityTuning};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CalibTask {
|
||||
name: &'static str,
|
||||
prompt: &'static str,
|
||||
expected_should_split: bool,
|
||||
}
|
||||
|
||||
fn parse_args() -> (PathBuf, String, bool) {
|
||||
let mut working_dir = None::<PathBuf>;
|
||||
let mut model = None::<String>;
|
||||
let mut write_tuning = false;
|
||||
|
||||
let mut args = std::env::args().skip(1);
|
||||
while let Some(a) = args.next() {
|
||||
match a.as_str() {
|
||||
"--working-dir" | "--workspace" => working_dir = args.next().map(PathBuf::from),
|
||||
"--model" => model = args.next(),
|
||||
"--write-tuning" => write_tuning = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let working_dir = working_dir.unwrap_or_else(|| PathBuf::from("./.open_agent_calibration"));
|
||||
let model = model.unwrap_or_else(|| "openai/gpt-4.1-mini".to_string());
|
||||
(working_dir, model, write_tuning)
|
||||
}
|
||||
|
||||
fn task_set() -> Vec<CalibTask> {
|
||||
vec![
|
||||
CalibTask {
|
||||
name: "hello_world",
|
||||
prompt: "Create a Python script called hello.py that prints 'Hello World'.",
|
||||
expected_should_split: false,
|
||||
},
|
||||
CalibTask {
|
||||
name: "calculator",
|
||||
prompt: "Create a Python script called calculator.py with add/subtract/multiply/divide functions and a small CLI menu.",
|
||||
expected_should_split: false,
|
||||
},
|
||||
CalibTask {
|
||||
name: "mini_project",
|
||||
prompt: "Create a tiny Python project with: (1) src/app.py that reads a name from argv and prints a greeting, (2) tests/test_app.py using pytest, (3) a pyproject.toml. Ensure 'python -m pytest' passes.",
|
||||
expected_should_split: true,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct Score {
|
||||
mean_token_rel_error: f64,
|
||||
split_accuracy: f64,
|
||||
}
|
||||
|
||||
impl Score {
|
||||
fn objective(&self) -> f64 {
|
||||
// Lower is better. Penalize wrong split decisions.
|
||||
self.mean_token_rel_error + (1.0 - self.split_accuracy) * 0.50
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_clean_dir(dir: &Path) -> anyhow::Result<()> {
|
||||
if dir.exists() {
|
||||
tokio::fs::remove_dir_all(dir).await?;
|
||||
}
|
||||
tokio::fs::create_dir_all(dir).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (working_dir_root, exec_model, write_tuning) = parse_args();
|
||||
|
||||
let api_key = std::env::var("OPENROUTER_API_KEY")
|
||||
.map_err(|_| anyhow::anyhow!("OPENROUTER_API_KEY must be set for calibration"))?;
|
||||
|
||||
let tasks = task_set();
|
||||
|
||||
// Grid to try.
|
||||
let variants = [
|
||||
ComplexityPromptVariant::RubricV1,
|
||||
ComplexityPromptVariant::CalibratedV2,
|
||||
];
|
||||
let split_thresholds = [0.55, 0.60, 0.65];
|
||||
let token_multipliers = [0.9, 1.0, 1.1, 1.2, 1.3];
|
||||
|
||||
let llm: Arc<dyn open_agent::llm::LlmClient> = Arc::new(OpenRouterClient::new(api_key));
|
||||
let pricing = Arc::new(ModelPricing::new());
|
||||
|
||||
let mut best = None::<(ComplexityPromptVariant, f64, f64, Score)>;
|
||||
|
||||
for &variant in &variants {
|
||||
for &split_threshold in &split_thresholds {
|
||||
for &token_mult in &token_multipliers {
|
||||
let mut rel_errors = Vec::new();
|
||||
let mut correct_split = 0usize;
|
||||
|
||||
for t in &tasks {
|
||||
let wd = working_dir_root.join(format!(
|
||||
"{}_st{}_tm{}",
|
||||
t.name,
|
||||
(split_threshold * 100.0) as u64,
|
||||
(token_mult * 100.0) as u64
|
||||
));
|
||||
ensure_clean_dir(&wd).await?;
|
||||
|
||||
// Minimal config for context.
|
||||
let cfg = Config::new("<redacted>".to_string(), exec_model.clone(), wd.clone());
|
||||
|
||||
let ctx = AgentContext::new(
|
||||
cfg,
|
||||
Arc::clone(&llm),
|
||||
ToolRegistry::new(),
|
||||
Arc::clone(&pricing),
|
||||
wd.clone(),
|
||||
);
|
||||
|
||||
// Build task with generous budget.
|
||||
let budget = open_agent::budget::Budget::new(10_000); // $100 in cents
|
||||
let mut task = Task::new(
|
||||
t.prompt.to_string(),
|
||||
VerificationCriteria::None,
|
||||
budget,
|
||||
)?;
|
||||
|
||||
// Run estimator (with candidate params).
|
||||
let estimator = ComplexityEstimator::with_params(variant, split_threshold, token_mult);
|
||||
let _ = estimator.execute(&mut task, &ctx).await;
|
||||
|
||||
let predicted_tokens = task.analysis().estimated_total_tokens.unwrap_or(2000);
|
||||
let predicted_split = task.analysis().should_split.unwrap_or(false);
|
||||
if predicted_split == t.expected_should_split {
|
||||
correct_split += 1;
|
||||
}
|
||||
|
||||
// Force execution model for comparability.
|
||||
task.analysis_mut().selected_model = Some(exec_model.clone());
|
||||
|
||||
let executor = TaskExecutor::new();
|
||||
let _exec_res = executor.execute(&mut task, &ctx).await;
|
||||
|
||||
let actual_tokens = task
|
||||
.analysis()
|
||||
.actual_usage
|
||||
.as_ref()
|
||||
.map(|u| u.total_tokens)
|
||||
.unwrap_or(predicted_tokens);
|
||||
|
||||
let denom = (actual_tokens as f64).max(1.0);
|
||||
let rel = ((predicted_tokens as f64) - (actual_tokens as f64)).abs() / denom;
|
||||
rel_errors.push(rel);
|
||||
}
|
||||
|
||||
let mean_token_rel_error = if rel_errors.is_empty() {
|
||||
1.0
|
||||
} else {
|
||||
rel_errors.iter().sum::<f64>() / (rel_errors.len() as f64)
|
||||
};
|
||||
|
||||
let split_accuracy = (correct_split as f64) / (tasks.len() as f64);
|
||||
let score = Score {
|
||||
mean_token_rel_error,
|
||||
split_accuracy,
|
||||
};
|
||||
|
||||
let candidate = (variant, split_threshold, token_mult, score.clone());
|
||||
let better = best
|
||||
.as_ref()
|
||||
.map(|(_, _, _, s)| score.objective() < s.objective())
|
||||
.unwrap_or(true);
|
||||
|
||||
if better {
|
||||
best = Some(candidate);
|
||||
eprintln!(
|
||||
"New best: variant={:?} split={:.2} mult={:.2} token_err={:.3} split_acc={:.2} obj={:.3}",
|
||||
variant,
|
||||
split_threshold,
|
||||
token_mult,
|
||||
score.mean_token_rel_error,
|
||||
score.split_accuracy,
|
||||
score.objective()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((variant, split_threshold, token_mult, score)) = best {
|
||||
println!("=== Recommended ComplexityEstimator Settings ===");
|
||||
println!("prompt_variant: {:?}", variant);
|
||||
println!("split_threshold: {:.2}", split_threshold);
|
||||
println!("token_multiplier: {:.2}", token_mult);
|
||||
println!("mean_token_rel_error: {:.3}", score.mean_token_rel_error);
|
||||
println!("split_accuracy: {:.2}", score.split_accuracy);
|
||||
|
||||
if write_tuning {
|
||||
let mut tuning = TuningParams::default();
|
||||
tuning.complexity = ComplexityTuning {
|
||||
prompt_variant: variant,
|
||||
split_threshold,
|
||||
token_multiplier: token_mult,
|
||||
};
|
||||
let path = tuning.save_to_working_dir(&working_dir_root).await?;
|
||||
println!("Wrote tuning file to {}", path.to_string_lossy());
|
||||
}
|
||||
} else {
|
||||
println!("No calibration result produced.");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -238,7 +238,10 @@ impl ModelPricing {
|
||||
// Flagship tier
|
||||
"openai/o1",
|
||||
"openai/o1-preview",
|
||||
"openai/gpt-5.2-pro",
|
||||
// Mid tier
|
||||
"openai/gpt-5.2",
|
||||
"openai/gpt-5.2-chat",
|
||||
"openai/gpt-4.1",
|
||||
"openai/gpt-4o",
|
||||
"openai/gpt-4-turbo",
|
||||
|
||||
32
src/lib.rs
32
src/lib.rs
@@ -4,33 +4,33 @@
|
||||
//!
|
||||
//! This library provides:
|
||||
//! - An HTTP API for task submission and monitoring
|
||||
//! - A hierarchical agent tree for complex task handling
|
||||
//! - A simple agent architecture for direct task execution
|
||||
//! - Tool-based execution for autonomous code editing
|
||||
//! - Integration with OpenRouter for LLM access
|
||||
//!
|
||||
//! ## Architecture (v2: Hierarchical Agent Tree)
|
||||
//! ## Architecture (v3: SimpleAgent)
|
||||
//!
|
||||
//! ```text
|
||||
//! ┌─────────────┐
|
||||
//! │ RootAgent │
|
||||
//! └──────┬──────┘
|
||||
//! ┌─────────────────┼─────────────────┐
|
||||
//! ▼ ▼ ▼
|
||||
//! ┌───────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
//! │ Complexity │ │ Model │ │ Task │
|
||||
//! │ Estimator │ │ Selector │ │ Executor │
|
||||
//! └───────────────┘ └─────────────┘ └─────────────┘
|
||||
//! ┌──────────────────────────────────┐
|
||||
//! │ SimpleAgent │
|
||||
//! │ (direct execution, no overhead) │
|
||||
//! └────────────────┬─────────────────┘
|
||||
//! │
|
||||
//! ▼
|
||||
//! ┌─────────────────┐
|
||||
//! │ TaskExecutor │
|
||||
//! │ (tool loop) │
|
||||
//! └─────────────────┘
|
||||
//! ```
|
||||
//!
|
||||
//! ## Task Flow
|
||||
//! 1. Receive task via API
|
||||
//! 2. Estimate complexity (should we split?)
|
||||
//! 3. Select optimal model (U-curve cost optimization)
|
||||
//! 4. Execute (directly or via subtasks)
|
||||
//! 5. Verify completion (programmatic + LLM hybrid)
|
||||
//! 2. Resolve model (user override or config default)
|
||||
//! 3. Execute via TaskExecutor (tool loop)
|
||||
//! 4. Return result (mission completion via complete_mission tool)
|
||||
//!
|
||||
//! ## Modules
|
||||
//! - `agents`: Hierarchical agent tree (Root, Node, Leaf agents)
|
||||
//! - `agents`: SimpleAgent and TaskExecutor
|
||||
//! - `task`: Task, subtask, and verification types
|
||||
//! - `budget`: Cost tracking and model pricing
|
||||
|
||||
|
||||
@@ -129,3 +129,6 @@ struct EmbeddingUsage {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -58,7 +58,27 @@ impl Tool for ReadFile {
|
||||
return Err(anyhow::anyhow!("File not found: {} (resolved to: {})", path, resolution.resolved.display()));
|
||||
}
|
||||
|
||||
let content = tokio::fs::read_to_string(&resolution.resolved).await?;
|
||||
// Try to read as UTF-8 text, detect binary files
|
||||
let bytes = tokio::fs::read(&resolution.resolved).await?;
|
||||
let content = match String::from_utf8(bytes) {
|
||||
Ok(text) => text,
|
||||
Err(_) => {
|
||||
// Binary file detected - don't try to display content
|
||||
let ext = resolution.resolved.extension()
|
||||
.map(|e| e.to_string_lossy().to_lowercase())
|
||||
.unwrap_or_default();
|
||||
return Ok(format!(
|
||||
"Binary file detected: {} ({} bytes)\n\n\
|
||||
Cannot display binary content directly. For this file type:\n\
|
||||
- .jar/.zip: Use `run_command` with `unzip -l` to list contents, or `jar tf` for JAR files\n\
|
||||
- .class: Use `run_command` with a Java decompiler like `javap -c` or `cfr`\n\
|
||||
- Images: Use appropriate tools to process\n\
|
||||
- Executables: Use `file` command to identify, `strings` to extract text",
|
||||
resolution.resolved.display(),
|
||||
resolution.resolved.metadata().map(|m| m.len()).unwrap_or(0)
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
// Handle optional line range
|
||||
let start_line = args["start_line"].as_u64().map(|n| n as usize);
|
||||
|
||||
@@ -632,3 +632,6 @@ Note: GitHub code search requires authentication. Set GH_TOKEN env var."
|
||||
Ok(format!("## Search results for '{}'\n\n{}", query, stdout))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -15,6 +15,34 @@ use tokio::process::Command;
|
||||
|
||||
use super::{resolve_path_simple as resolve_path, Tool};
|
||||
|
||||
/// Sanitize command output to be safe for LLM consumption.
|
||||
/// Removes binary garbage while preserving valid text.
|
||||
fn sanitize_output(bytes: &[u8]) -> String {
|
||||
// Check if output appears to be mostly binary
|
||||
let non_printable_count = bytes.iter()
|
||||
.filter(|&&b| b < 0x20 && b != b'\n' && b != b'\r' && b != b'\t')
|
||||
.count();
|
||||
|
||||
// If more than 10% is non-printable (excluding newlines/tabs), it's likely binary
|
||||
if bytes.len() > 100 && non_printable_count > bytes.len() / 10 {
|
||||
return format!(
|
||||
"[Binary output detected - {} bytes, {}% non-printable. \
|
||||
Use appropriate tools to process binary data.]",
|
||||
bytes.len(),
|
||||
non_printable_count * 100 / bytes.len()
|
||||
);
|
||||
}
|
||||
|
||||
// Convert to string, replacing invalid UTF-8
|
||||
let text = String::from_utf8_lossy(bytes);
|
||||
|
||||
// Remove null bytes and other problematic control characters
|
||||
// Keep: newlines, tabs, carriage returns
|
||||
text.chars()
|
||||
.filter(|&c| c == '\n' || c == '\r' || c == '\t' || (c >= ' ' && c != '\u{FFFD}'))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Dangerous command patterns that should be blocked.
|
||||
/// These patterns cause infinite loops or could damage the system.
|
||||
const DANGEROUS_PATTERNS: &[(&str, &str)] = &[
|
||||
@@ -146,8 +174,8 @@ impl Tool for RunCommand {
|
||||
}
|
||||
};
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
let stdout = sanitize_output(&output.stdout);
|
||||
let stderr = sanitize_output(&output.stderr);
|
||||
let exit_code = output.status.code().unwrap_or(-1);
|
||||
|
||||
tracing::debug!("Command completed: exit={}, stdout_len={}, stderr_len={}",
|
||||
@@ -157,6 +185,11 @@ impl Tool for RunCommand {
|
||||
|
||||
result.push_str(&format!("Exit code: {}\n", exit_code));
|
||||
|
||||
// Add hint when non-zero exit but output exists (common with tools that warn but succeed)
|
||||
if exit_code != 0 && !stdout.is_empty() {
|
||||
result.push_str("Note: Non-zero exit code but output was produced. The command may have succeeded with warnings - verify output files exist.\n");
|
||||
}
|
||||
|
||||
if !stdout.is_empty() {
|
||||
result.push_str("\n--- stdout ---\n");
|
||||
result.push_str(&stdout);
|
||||
|
||||
Reference in New Issue
Block a user