feat: parallel missions, UI improvements, context isolation prompt
Backend: - Add MissionRunner abstraction for parallel execution - Add mission_id field to AgentEvent for routing - Add MAX_PARALLEL_MISSIONS config option - New API endpoints for parallel mission management Dashboard: - Fix tree not updating when switching missions - Add BrainLogo component for consistent branding - Improve status panel UI with glass styling Prompts: - Add security_audit_v2.md with mandatory workspace setup - Enforce cloning sources INTO work folder (not /root/context/) - Add source manifest requirement Docs: - Add Context Isolation proposal (Section 7) - Update testing checklist
This commit is contained in:
@@ -86,6 +86,10 @@ src/
|
||||
| `POST` | `/api/control/missions` | Create new mission |
|
||||
| `GET` | `/api/control/missions/current` | Get current mission |
|
||||
| `POST` | `/api/control/missions/:id/load` | Switch to mission |
|
||||
| `POST` | `/api/control/missions/:id/cancel` | Cancel specific mission |
|
||||
| `POST` | `/api/control/missions/:id/parallel` | Start mission in parallel |
|
||||
| `GET` | `/api/control/running` | List running missions |
|
||||
| `GET` | `/api/control/parallel/config` | Get parallel execution config |
|
||||
|
||||
## Model Selection (U-Curve)
|
||||
|
||||
|
||||
@@ -75,6 +75,7 @@ The `upload_image` tool requires a public storage bucket named `images`:
|
||||
| `PORT` | `3000` | Server port |
|
||||
| `MAX_ITERATIONS` | `50` | Max agent loop iterations |
|
||||
| `STALE_MISSION_HOURS` | `24` | Hours of inactivity before auto-closing missions (0=disabled) |
|
||||
| `MAX_PARALLEL_MISSIONS` | `1` | Max missions to run in parallel (1=sequential only) |
|
||||
|
||||
### Context Injection (Optional)
|
||||
|
||||
|
||||
@@ -384,6 +384,10 @@ export default function AgentsPage() {
|
||||
key={currentMission.id}
|
||||
onClick={() => {
|
||||
setSelectedMissionId(currentMission.id);
|
||||
// Clear real tree when switching missions - it will be rebuilt from SSE or fallback
|
||||
if (selectedMissionId !== currentMission.id) {
|
||||
setRealTree(null);
|
||||
}
|
||||
if (demoMode !== 'off') startDemo('off');
|
||||
}}
|
||||
className={cn(
|
||||
@@ -414,6 +418,10 @@ export default function AgentsPage() {
|
||||
<button
|
||||
key={mission.id}
|
||||
onClick={() => {
|
||||
// Clear real tree when switching to a different mission
|
||||
if (selectedMissionId !== mission.id) {
|
||||
setRealTree(null);
|
||||
}
|
||||
setSelectedMissionId(mission.id);
|
||||
if (demoMode !== 'off') startDemo('off');
|
||||
}}
|
||||
|
||||
@@ -181,11 +181,7 @@ function Shimmer({ className }: { className?: string }) {
|
||||
}
|
||||
|
||||
// Phase indicator - shows what the agent is doing during preparation
|
||||
function PhaseItem({
|
||||
item,
|
||||
}: {
|
||||
item: Extract<ChatItem, { kind: "phase" }>;
|
||||
}) {
|
||||
function PhaseItem({ item }: { item: Extract<ChatItem, { kind: "phase" }> }) {
|
||||
const phaseLabels: Record<string, { label: string; icon: typeof Brain }> = {
|
||||
estimating_complexity: { label: "Analyzing task", icon: Brain },
|
||||
selecting_model: { label: "Selecting model", icon: Cpu },
|
||||
@@ -194,9 +190,9 @@ function PhaseItem({
|
||||
verifying: { label: "Verifying", icon: CheckCircle },
|
||||
};
|
||||
|
||||
const { label, icon: Icon } = phaseLabels[item.phase] ?? {
|
||||
label: item.phase.replace(/_/g, ' '),
|
||||
icon: Brain
|
||||
const { label, icon: Icon } = phaseLabels[item.phase] ?? {
|
||||
label: item.phase.replace(/_/g, " "),
|
||||
icon: Brain,
|
||||
};
|
||||
|
||||
return (
|
||||
@@ -607,15 +603,16 @@ export default function ControlClient() {
|
||||
if (event.type === "status" && isRecord(data)) {
|
||||
reconnectAttempts = 0;
|
||||
const st = data["state"];
|
||||
const newState = typeof st === "string" ? (st as ControlRunState) : "idle";
|
||||
const newState =
|
||||
typeof st === "string" ? (st as ControlRunState) : "idle";
|
||||
const q = data["queue_len"];
|
||||
setQueueLen(typeof q === "number" ? q : 0);
|
||||
|
||||
|
||||
// Clear progress when idle
|
||||
if (newState === "idle") {
|
||||
setProgress(null);
|
||||
}
|
||||
|
||||
|
||||
// If we reconnected and agent is already running, add a visual indicator
|
||||
setRunState((prevState) => {
|
||||
// Only show reconnect notice if we weren't already tracking this as running
|
||||
@@ -623,7 +620,8 @@ export default function ControlClient() {
|
||||
if (newState === "running" && prevState === "idle") {
|
||||
setItems((prevItems) => {
|
||||
const hasActiveThinking = prevItems.some(
|
||||
(it) => (it.kind === "thinking" && !it.done) || it.kind === "phase"
|
||||
(it) =>
|
||||
(it.kind === "thinking" && !it.done) || it.kind === "phase"
|
||||
);
|
||||
// If there's no active streaming item, the user is seeing stale state
|
||||
// The "Agent is working..." indicator will show via the render logic
|
||||
@@ -944,25 +942,42 @@ export default function ControlClient() {
|
||||
<span className="hidden sm:inline">New</span> Mission
|
||||
</button>
|
||||
|
||||
<div
|
||||
className={cn(
|
||||
"flex items-center gap-2 text-sm whitespace-nowrap",
|
||||
status.className
|
||||
)}
|
||||
>
|
||||
<StatusIcon
|
||||
className={cn("h-4 w-4", runState !== "idle" && "animate-spin")}
|
||||
/>
|
||||
<span>{status.label}</span>
|
||||
<span className="text-white/20">•</span>
|
||||
<span className="text-white/40">Queue: {queueLen}</span>
|
||||
{/* Status panel */}
|
||||
<div className="flex items-center gap-2 rounded-lg border border-white/[0.06] bg-white/[0.02] px-3 py-2">
|
||||
{/* Run state indicator */}
|
||||
<div className={cn("flex items-center gap-2", status.className)}>
|
||||
<StatusIcon
|
||||
className={cn(
|
||||
"h-3.5 w-3.5",
|
||||
runState !== "idle" && "animate-spin"
|
||||
)}
|
||||
/>
|
||||
<span className="text-sm font-medium">{status.label}</span>
|
||||
</div>
|
||||
|
||||
{/* Queue count */}
|
||||
<div className="h-4 w-px bg-white/[0.08]" />
|
||||
<div className="flex items-center gap-1.5">
|
||||
<span className="text-[10px] uppercase tracking-wider text-white/40">
|
||||
Queue
|
||||
</span>
|
||||
<span className="text-sm font-medium text-white/70 tabular-nums">
|
||||
{queueLen}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Progress indicator */}
|
||||
{progress && progress.total > 0 && (
|
||||
<>
|
||||
<span className="text-white/20">•</span>
|
||||
<span className="text-emerald-400 font-medium">
|
||||
Subtask {progress.completed + 1}/{progress.total}
|
||||
</span>
|
||||
<div className="h-4 w-px bg-white/[0.08]" />
|
||||
<div className="flex items-center gap-1.5">
|
||||
<span className="text-[10px] uppercase tracking-wider text-white/40">
|
||||
Subtask
|
||||
</span>
|
||||
<span className="text-sm font-medium text-emerald-400 tabular-nums">
|
||||
{progress.completed + 1}/{progress.total}
|
||||
</span>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
@@ -991,7 +1006,8 @@ export default function ControlClient() {
|
||||
Agent is working...
|
||||
</h2>
|
||||
<p className="mt-2 text-sm text-white/40 max-w-sm">
|
||||
Processing your request. Updates will appear here as they arrive.
|
||||
Processing your request. Updates will appear here as they
|
||||
arrive.
|
||||
</p>
|
||||
</>
|
||||
) : currentMission && currentMission.status !== "active" ? (
|
||||
@@ -1025,21 +1041,26 @@ export default function ControlClient() {
|
||||
) : (
|
||||
<div className="mx-auto max-w-3xl space-y-6">
|
||||
{/* Show streaming indicator when running but no active thinking/phase */}
|
||||
{runState === "running" && items.length > 0 && !items.some(
|
||||
(it) => (it.kind === "thinking" && !it.done) || it.kind === "phase"
|
||||
) && (
|
||||
<div className="flex justify-start gap-3 animate-fade-in">
|
||||
<div className="flex h-8 w-8 shrink-0 items-center justify-center rounded-full bg-indigo-500/20">
|
||||
<Bot className="h-4 w-4 text-indigo-400 animate-pulse" />
|
||||
</div>
|
||||
<div className="rounded-2xl rounded-bl-md bg-white/[0.03] border border-white/[0.06] px-4 py-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<Loader className="h-4 w-4 text-indigo-400 animate-spin" />
|
||||
<span className="text-sm text-white/60">Agent is working...</span>
|
||||
{runState === "running" &&
|
||||
items.length > 0 &&
|
||||
!items.some(
|
||||
(it) =>
|
||||
(it.kind === "thinking" && !it.done) || it.kind === "phase"
|
||||
) && (
|
||||
<div className="flex justify-start gap-3 animate-fade-in">
|
||||
<div className="flex h-8 w-8 shrink-0 items-center justify-center rounded-full bg-indigo-500/20">
|
||||
<Bot className="h-4 w-4 text-indigo-400 animate-pulse" />
|
||||
</div>
|
||||
<div className="rounded-2xl rounded-bl-md bg-white/[0.03] border border-white/[0.06] px-4 py-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<Loader className="h-4 w-4 text-indigo-400 animate-spin" />
|
||||
<span className="text-sm text-white/60">
|
||||
Agent is working...
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
)}
|
||||
|
||||
{items.map((item) => {
|
||||
if (item.kind === "user") {
|
||||
|
||||
79
dashboard/src/components/icons/BrainLogo.tsx
Normal file
79
dashboard/src/components/icons/BrainLogo.tsx
Normal file
File diff suppressed because one or more lines are too long
1
dashboard/src/components/icons/index.ts
Normal file
1
dashboard/src/components/icons/index.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { BrainLogo, BrainIcon } from './BrainLogo';
|
||||
@@ -5,6 +5,7 @@ import Link from 'next/link';
|
||||
import { usePathname } from 'next/navigation';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { getCurrentMission, streamControl, type Mission, type ControlRunState } from '@/lib/api';
|
||||
import { BrainLogo } from '@/components/icons';
|
||||
import {
|
||||
LayoutDashboard,
|
||||
MessageSquare,
|
||||
@@ -86,9 +87,7 @@ export function Sidebar() {
|
||||
<aside className="fixed left-0 top-0 z-40 flex h-screen w-56 flex-col glass-panel border-r border-white/[0.06]">
|
||||
{/* Header */}
|
||||
<div className="flex h-16 items-center gap-2 border-b border-white/[0.06] px-4">
|
||||
<div className="flex h-8 w-8 items-center justify-center rounded-lg bg-indigo-500/10">
|
||||
<span className="text-sm font-bold text-indigo-400">O</span>
|
||||
</div>
|
||||
<BrainLogo size={32} />
|
||||
<div className="flex flex-col">
|
||||
<span className="text-sm font-medium text-white">OpenAgent</span>
|
||||
<span className="tag">v0.1.0</span>
|
||||
@@ -147,9 +146,7 @@ export function Sidebar() {
|
||||
{/* Footer */}
|
||||
<div className="border-t border-white/[0.06] p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="flex h-8 w-8 items-center justify-center rounded-full bg-white/[0.04]">
|
||||
<span className="text-xs font-medium text-white/60">AI</span>
|
||||
</div>
|
||||
<BrainLogo size={32} />
|
||||
<div className="flex-1 min-w-0">
|
||||
<p className="truncate text-xs font-medium text-white/80">Agent Status</p>
|
||||
<p className="flex items-center gap-1.5 text-[10px] text-white/40">
|
||||
|
||||
@@ -4,6 +4,35 @@ This document contains brainstormed proposals for improving the agent system.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Status
|
||||
|
||||
| Proposal | Status | Notes |
|
||||
|----------|--------|-------|
|
||||
| Progress Checkpoints | ❌ Not Started | Complex, needs iteration tracking changes |
|
||||
| Parallel Missions | ✅ **Partially Implemented** | Backend ready, UI pending |
|
||||
| Optimized Prompts | ✅ Implemented | See `scripts/prompts/` |
|
||||
| Bug Fixes | ✅ Done | Model override, command safety, Gemini/Kimi fixes |
|
||||
| Context Isolation | 🟡 **Prompt-Level Only** | v2 prompt available, backend changes pending |
|
||||
|
||||
### Parallel Missions - Implementation Details
|
||||
|
||||
**Backend (Implemented):**
|
||||
- `MAX_PARALLEL_MISSIONS` config option (default: 1)
|
||||
- `MissionRunner` abstraction in `src/api/mission_runner.rs`
|
||||
- SSE events now include optional `mission_id` field for routing
|
||||
- New API endpoints:
|
||||
- `GET /api/control/running` - List running missions
|
||||
- `GET /api/control/parallel/config` - Get parallel config
|
||||
- `POST /api/control/missions/:id/parallel` - Start in parallel
|
||||
- `POST /api/control/missions/:id/cancel` - Cancel specific mission
|
||||
|
||||
**Pending:**
|
||||
- Dashboard UI for parallel mission management
|
||||
- Full slot-based execution (currently uses simplified model)
|
||||
- Queue reordering and priority
|
||||
|
||||
---
|
||||
|
||||
## 1. Progress Checkpoints / Milestones System
|
||||
|
||||
### Problem
|
||||
@@ -925,16 +954,196 @@ impl Tool for CleanWorkspaceTool {
|
||||
|
||||
---
|
||||
|
||||
## 7. Testing Checklist
|
||||
## 7. Context Isolation & Workspace Management
|
||||
|
||||
### Problem
|
||||
The current architecture allows context pollution across missions:
|
||||
1. **Shared `/root/context/`** - All missions read/write to the same folder
|
||||
2. **No cleanup** - Previous mission files remain and confuse new missions
|
||||
3. **No source tracking** - Agent forgets where it downloaded sources
|
||||
4. **Work folder not enforced** - Agent can analyze files outside its work folder
|
||||
|
||||
### Real-World Failure (Dec 19, 2025)
|
||||
A Rabby Wallet security audit mission:
|
||||
- Found Vulcan anti-cheat files in `/root/context/` from a previous mission
|
||||
- Rabby CRX extraction failed silently (`rabby_wallet_extracted/` was empty)
|
||||
- Agent pivoted to analyzing Vulcan instead of Rabby
|
||||
- Produced "Vulcan Anti-Cheat Security Audit Report" instead of Rabby report
|
||||
|
||||
### Proposed Solutions
|
||||
|
||||
#### 7.1 Mission-Specific Context Subfolders
|
||||
|
||||
```rust
|
||||
// Add to Mission struct
|
||||
pub struct Mission {
|
||||
pub id: Uuid,
|
||||
pub title: Option<String>,
|
||||
pub status: String,
|
||||
pub context_subfolder: Option<String>, // NEW: e.g., "rabby-audit-20251219"
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
When creating a mission, generate a unique context path:
|
||||
```rust
|
||||
let context_subfolder = format!("{}-{}", sanitize(title), mission_id.to_string()[..8]);
|
||||
// Results in: /root/context/security-audit-rabby-3f6979b4/
|
||||
```
|
||||
|
||||
Inject into system prompt:
|
||||
```
|
||||
Your mission context files are in: /root/context/security-audit-rabby-3f6979b4/
|
||||
Your work folder is: /root/work/security-audit-grok/
|
||||
|
||||
Only analyze files within these two folders.
|
||||
Do NOT access /root/context/ directly.
|
||||
```
|
||||
|
||||
#### 7.2 Mandatory Source Setup Subtask
|
||||
|
||||
Modify RootAgent task splitting to ALWAYS start with a setup phase:
|
||||
|
||||
```rust
|
||||
// In root.rs, when splitting tasks
|
||||
fn create_mandatory_setup_subtask(&self, task: &Task) -> Subtask {
|
||||
Subtask {
|
||||
id: "setup".to_string(),
|
||||
description: format!(
|
||||
"Setup Phase: Create working directory at {}/. \
|
||||
Acquire all source files needed for analysis INTO this folder. \
|
||||
Do NOT use /root/context/. \
|
||||
Create notes/sources.md documenting what you downloaded and from where.",
|
||||
self.get_work_folder()
|
||||
),
|
||||
dependencies: vec![],
|
||||
priority: 0, // Always first
|
||||
is_mandatory: true,
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### 7.3 Work Folder Enforcement in Tools
|
||||
|
||||
Add validation to terminal/file tools:
|
||||
|
||||
```rust
|
||||
// In tools/terminal.rs
|
||||
fn validate_path(&self, path: &str) -> Result<(), ToolError> {
|
||||
let work_folder = self.mission_context.work_folder();
|
||||
let context_folder = self.mission_context.context_folder();
|
||||
|
||||
// Allow reads from work folder and mission-specific context
|
||||
if path.starts_with(work_folder) || path.starts_with(context_folder) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Allow reads from common system paths
|
||||
if path.starts_with("/usr/") || path.starts_with("/bin/") {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Block access to other mission contexts
|
||||
if path.starts_with("/root/context/") {
|
||||
return Err(ToolError::new(format!(
|
||||
"Access denied: {} is outside your mission context. Use {} instead.",
|
||||
path, context_folder
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(()) // Allow other paths (might need to clone repos, etc.)
|
||||
}
|
||||
```
|
||||
|
||||
#### 7.4 Context Cleanup on Mission Complete
|
||||
|
||||
```rust
|
||||
// In control.rs, when mission completes
|
||||
async fn complete_mission(&mut self, mission_id: Uuid, status: &str) {
|
||||
// ... existing completion logic ...
|
||||
|
||||
// Optional: Archive and clean context folder
|
||||
if self.config.auto_clean_context {
|
||||
let context_path = format!("/root/context/{}", mission.context_subfolder);
|
||||
let archive_path = format!("/root/archive/{}", mission.context_subfolder);
|
||||
|
||||
// Move to archive instead of delete
|
||||
tokio::fs::rename(&context_path, &archive_path).await.ok();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### 7.5 Source Manifest Requirement
|
||||
|
||||
The setup subtask should produce a manifest:
|
||||
|
||||
```markdown
|
||||
# Source Manifest (/root/work/security-audit-grok/notes/sources.md)
|
||||
|
||||
## Acquired Sources
|
||||
| Source | Location | Method | Verified |
|
||||
|--------|----------|--------|----------|
|
||||
| Rabby Wallet | ./source/ | git clone github.com/RabbyHub/Rabby | ✅ Yes (package.json exists) |
|
||||
|
||||
## Key Directories
|
||||
- `./source/src/background/` - Extension background logic
|
||||
- `./source/_raw/` - Built extension assets
|
||||
|
||||
## Files Indexed
|
||||
- Total: 1,234 files
|
||||
- JavaScript: 456
|
||||
- TypeScript: 234
|
||||
- JSON: 89
|
||||
|
||||
## Analysis Scope
|
||||
Only files within `/root/work/security-audit-grok/` will be analyzed.
|
||||
```
|
||||
|
||||
### Improved Prompt Template
|
||||
|
||||
See `scripts/prompts/security_audit_v2.md` which implements:
|
||||
1. Mandatory workspace setup BEFORE any analysis
|
||||
2. Clone sources directly INTO the work folder (not /root/context/)
|
||||
3. Explicit FORBIDDEN section blocking /root/context/ access
|
||||
4. Source manifest requirement
|
||||
5. Verification step before proceeding
|
||||
|
||||
### Migration Path
|
||||
|
||||
1. **Immediate (prompt-level fix)**: Use v2 prompt that clones to work folder
|
||||
2. **Short-term**: Add `context_subfolder` to Mission
|
||||
3. **Medium-term**: Add path validation to tools
|
||||
4. **Long-term**: Full context isolation with cleanup
|
||||
|
||||
---
|
||||
|
||||
## 8. Testing Checklist
|
||||
|
||||
Before rerunning the security audit experiment:
|
||||
|
||||
### Pre-Deployment
|
||||
- [ ] Deploy Gemini thought_signature fix
|
||||
- [ ] Deploy model override fix
|
||||
- [ ] Deploy system prompt improvements
|
||||
- [ ] Add command blacklist (at minimum)
|
||||
- [ ] Clean workspace (`rm -rf /root/work/*` or archive)
|
||||
- [ ] Remove Vulcan.jar from /root/context/
|
||||
- [ ] Verify Rabby CRX is available
|
||||
- [ ] Use optimized prompt from `scripts/prompts/security_audit_rabby.md`
|
||||
- [ ] Start with 2-3 models first, not all 8
|
||||
- [ ] Deploy command blacklist
|
||||
|
||||
### Context Cleanup
|
||||
- [ ] Clean work folder: `ssh root@95.216.112.253 'rm -rf /root/work/*'`
|
||||
- [ ] Clean context folder: `ssh root@95.216.112.253 'rm -rf /root/context/*'`
|
||||
- [ ] Or archive: `ssh root@95.216.112.253 'mv /root/context /root/archive/context-$(date +%Y%m%d) && mkdir /root/context'`
|
||||
|
||||
### Prompt Selection
|
||||
- [ ] Use v2 prompt: `scripts/prompts/security_audit_v2.md`
|
||||
- [ ] Verify prompt instructs agent to clone INTO work folder
|
||||
- [ ] Verify prompt FORBIDS reading /root/context/
|
||||
|
||||
### Execution
|
||||
- [ ] Start with 1-2 models first (recommend: grok, qwen)
|
||||
- [ ] Wait for first mission to complete setup phase
|
||||
- [ ] Verify sources are in `/root/work/security-audit-{model}/source/`
|
||||
- [ ] Monitor for 10 minutes before leaving unattended
|
||||
|
||||
### Validation
|
||||
- [ ] Check that AUDIT_REPORT.md mentions "Rabby" not "Vulcan"
|
||||
- [ ] Check sources.md manifest was created
|
||||
- [ ] Verify no analysis of `/root/context/` files
|
||||
|
||||
154
scripts/prompts/security_audit_v2.md
Normal file
154
scripts/prompts/security_audit_v2.md
Normal file
@@ -0,0 +1,154 @@
|
||||
# Security Audit Task
|
||||
|
||||
## PHASE 0: MANDATORY WORKSPACE SETUP (DO THIS FIRST)
|
||||
|
||||
Before ANY analysis, you MUST complete these steps:
|
||||
|
||||
### Step 1: Create your isolated workspace
|
||||
```bash
|
||||
mkdir -p /root/work/security-audit-{your-model-name}/{source,output,temp,notes}
|
||||
```
|
||||
|
||||
### Step 2: Acquire the source code INTO your workspace
|
||||
**Clone directly into YOUR workspace** (do NOT use /root/context/):
|
||||
```bash
|
||||
cd /root/work/security-audit-{your-model-name}/source
|
||||
git clone https://github.com/RabbyHub/Rabby .
|
||||
```
|
||||
|
||||
If git fails, download the CRX:
|
||||
```bash
|
||||
curl -L "https://clients2.google.com/service/update2/crx?response=redirect&x=id%3Dacmacodkjbdgmoleebolmdjonilkdbch%26uc" \
|
||||
-o /root/work/security-audit-{your-model-name}/temp/rabby.crx
|
||||
unzip /root/work/security-audit-{your-model-name}/temp/rabby.crx -d /root/work/security-audit-{your-model-name}/source/
|
||||
```
|
||||
|
||||
### Step 3: Verify your sources exist
|
||||
```bash
|
||||
ls -la /root/work/security-audit-{your-model-name}/source/
|
||||
# You should see Rabby wallet files (package.json, src/, _raw/, etc.)
|
||||
```
|
||||
|
||||
### Step 4: Create source manifest
|
||||
Write a `notes/sources.md` documenting:
|
||||
- Where the sources came from (GitHub/CRX)
|
||||
- Total file count
|
||||
- Key directories identified
|
||||
|
||||
⚠️ **DO NOT PROCEED** until your `/root/work/security-audit-{model}/source/` folder has Rabby files.
|
||||
|
||||
---
|
||||
|
||||
## TARGET
|
||||
**Rabby Wallet Chrome Extension** - A cryptocurrency wallet with transaction simulation.
|
||||
|
||||
GitHub: https://github.com/RabbyHub/Rabby
|
||||
|
||||
## SCOPE - FOCUS ONLY ON THESE AREAS
|
||||
1. **Transaction Simulation Bypass** - Can attackers make harmful transactions appear safe?
|
||||
2. **Approval Amount Manipulation** - Can displayed approval amounts differ from actual?
|
||||
3. **Spender Address Spoofing** - Can fake addresses be shown as trusted protocols?
|
||||
4. **Permit2 Integration** - Validation of spender field against known reactors/protocols
|
||||
|
||||
## REFERENCE VULNERABILITY (Example of what to find)
|
||||
A previous critical bug was found where Permit2 transactions could bypass simulation:
|
||||
- **Symptom**: Simulation showed "Spend 1 USDC to receive 1337 ETH"
|
||||
- **Reality**: Transaction approved 100,000 USDC to attacker's vanity address
|
||||
- **Root cause**: The `spender` field in Permit2 was not validated against trusted addresses
|
||||
- **Why it worked**: Rabby trusted the `witness` data for simulation, but the witness can only be trusted if the spender is a known protocol (like Uniswap's reactor)
|
||||
- **Impact**: Full balance drain of any approved token
|
||||
|
||||
Your goal is to find similar issues where trust assumptions allow bypassing security checks.
|
||||
|
||||
## KEY FILES TO ANALYZE (in YOUR source folder)
|
||||
Search within `/root/work/security-audit-{model}/source/` for:
|
||||
- `src/background/` - Main extension logic
|
||||
- Files containing: `Permit2`, `signTypedData`, `eth_sendTransaction`, `securityEngine`
|
||||
- `_raw/` - Built extension assets
|
||||
- Transaction preview/simulation components
|
||||
- Approval handling and display logic
|
||||
|
||||
## ANALYSIS RULES
|
||||
|
||||
⛔ **FORBIDDEN - DO NOT DO THESE:**
|
||||
- Do NOT read or analyze `/root/context/*` (may contain unrelated files)
|
||||
- Do NOT analyze `.jar` files, Minecraft plugins, or non-Rabby code
|
||||
- Do NOT create files outside your `/root/work/security-audit-{model}/` folder
|
||||
- Do NOT stop without producing the full AUDIT_REPORT.md
|
||||
|
||||
✅ **REQUIRED:**
|
||||
- ONLY analyze files in `/root/work/security-audit-{model}/source/`
|
||||
- Index files using `index_files` on your source folder
|
||||
- Use `search_file_index` and `grep_search` on your source folder
|
||||
- Document ALL findings in `/root/work/security-audit-{model}/output/AUDIT_REPORT.md`
|
||||
|
||||
## METHODOLOGY
|
||||
|
||||
1. **Setup Phase** (subtasks 1-2):
|
||||
- Create workspace structure
|
||||
- Clone Rabby source into your workspace
|
||||
- Verify sources, create manifest
|
||||
|
||||
2. **Discovery Phase** (subtasks 3-4):
|
||||
- Index all files in source/
|
||||
- Search for Permit2, approval, simulation keywords
|
||||
- Map key files and their purposes
|
||||
|
||||
3. **Analysis Phase** (subtasks 5-8):
|
||||
- Deep-dive into Permit2 handling
|
||||
- Trace data flow: user input → simulation → display
|
||||
- Identify trust boundaries
|
||||
- Find validation gaps
|
||||
|
||||
4. **Documentation Phase** (subtasks 9-10):
|
||||
- Document each finding with full details
|
||||
- Write AUDIT_REPORT.md
|
||||
- Call complete_mission with report content
|
||||
|
||||
## DELIVERABLE (REQUIRED)
|
||||
|
||||
Your FINAL message MUST contain the complete `AUDIT_REPORT.md` in markdown format.
|
||||
|
||||
```markdown
|
||||
# Rabby Wallet Security Audit Report
|
||||
|
||||
**Auditor**: [your model name]
|
||||
**Date**: [today's date]
|
||||
**Source**: GitHub RabbyHub/Rabby (commit: [hash])
|
||||
**Scope**: Transaction simulation, Permit2, Approval handling
|
||||
|
||||
## Executive Summary
|
||||
[2-3 sentences on overall security posture]
|
||||
|
||||
## Critical Findings
|
||||
|
||||
### [SEVERITY] Finding Title
|
||||
- **Location**: `src/path/to/file.ts:123`
|
||||
- **Description**: Technical explanation
|
||||
- **Attack Scenario**: How an attacker exploits this
|
||||
- **Impact**: Token theft / Approval hijack / etc.
|
||||
- **PoC Concept**: Steps to reproduce
|
||||
- **Recommendation**: How to fix
|
||||
|
||||
## Medium/Low Findings
|
||||
[Same format]
|
||||
|
||||
## Code Quality Observations
|
||||
[Patterns, missing validations]
|
||||
|
||||
## Files Analyzed
|
||||
| File | Purpose | Findings |
|
||||
|------|---------|----------|
|
||||
| src/background/... | ... | ... |
|
||||
|
||||
## Conclusion
|
||||
[Summary and recommendations]
|
||||
```
|
||||
|
||||
## SUCCESS CRITERIA
|
||||
|
||||
1. ✅ Source code cloned to YOUR workspace (not /root/context/)
|
||||
2. ✅ Analysis focused ONLY on Rabby Wallet code
|
||||
3. ✅ At least 3 potential findings documented
|
||||
4. ✅ AUDIT_REPORT.md produced with full template
|
||||
5. ✅ Report included in final message (not just file path)
|
||||
@@ -190,6 +190,7 @@ impl AgentContext {
|
||||
phase: phase.to_string(),
|
||||
detail: detail.map(|s| s.to_string()),
|
||||
agent: agent.map(|s| s.to_string()),
|
||||
mission_id: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -208,7 +209,7 @@ impl AgentContext {
|
||||
|
||||
// Send SSE event
|
||||
if let Some(ref events) = self.control_events {
|
||||
let _ = events.send(crate::api::control::AgentEvent::AgentTree { tree });
|
||||
let _ = events.send(crate::api::control::AgentEvent::AgentTree { tree, mission_id: None });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -236,6 +237,7 @@ impl AgentContext {
|
||||
completed_subtasks: completed,
|
||||
current_subtask: current_for_event,
|
||||
depth,
|
||||
mission_id: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -511,6 +511,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
let _ = events.send(AgentEvent::Thinking {
|
||||
content: content.clone(),
|
||||
done: response.tool_calls.is_none(),
|
||||
mission_id: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -606,6 +607,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
tool_call_id: tool_call.id.clone(),
|
||||
name: tool_name.clone(),
|
||||
args: args_json.clone(),
|
||||
mission_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -631,7 +633,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
s.state = ControlRunState::WaitingForTool;
|
||||
let q = s.queue_len;
|
||||
drop(s);
|
||||
let _ = events.send(AgentEvent::Status { state: ControlRunState::WaitingForTool, queue_len: q });
|
||||
let _ = events.send(AgentEvent::Status { state: ControlRunState::WaitingForTool, queue_len: q, mission_id: None });
|
||||
}
|
||||
|
||||
let recv = if let Some(token) = &ctx.cancel_token {
|
||||
@@ -677,7 +679,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
s.state = ControlRunState::Running;
|
||||
let q = s.queue_len;
|
||||
drop(s);
|
||||
let _ = events.send(AgentEvent::Status { state: ControlRunState::Running, queue_len: q });
|
||||
let _ = events.send(AgentEvent::Status { state: ControlRunState::Running, queue_len: q, mission_id: None });
|
||||
}
|
||||
(msg, v)
|
||||
}
|
||||
@@ -689,7 +691,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
s.state = ControlRunState::Running;
|
||||
let q = s.queue_len;
|
||||
drop(s);
|
||||
let _ = events.send(AgentEvent::Status { state: ControlRunState::Running, queue_len: q });
|
||||
let _ = events.send(AgentEvent::Status { state: ControlRunState::Running, queue_len: q, mission_id: None });
|
||||
}
|
||||
("Error: tool result channel closed".to_string(), serde_json::Value::Null)
|
||||
}
|
||||
@@ -727,6 +729,7 @@ Use `search_memory` when you encounter a problem you might have solved before or
|
||||
tool_call_id: tool_call.id.clone(),
|
||||
name: tool_name.clone(),
|
||||
result: tool_result_json.clone(),
|
||||
mission_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -78,10 +78,16 @@ pub enum AgentEvent {
|
||||
Status {
|
||||
state: ControlRunState,
|
||||
queue_len: usize,
|
||||
/// Mission this status applies to (for parallel execution)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mission_id: Option<Uuid>,
|
||||
},
|
||||
UserMessage {
|
||||
id: Uuid,
|
||||
content: String,
|
||||
/// Mission this message belongs to (for parallel execution)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mission_id: Option<Uuid>,
|
||||
},
|
||||
AssistantMessage {
|
||||
id: Uuid,
|
||||
@@ -89,6 +95,9 @@ pub enum AgentEvent {
|
||||
success: bool,
|
||||
cost_cents: u64,
|
||||
model: Option<String>,
|
||||
/// Mission this message belongs to (for parallel execution)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mission_id: Option<Uuid>,
|
||||
},
|
||||
/// Agent thinking/reasoning (streaming)
|
||||
Thinking {
|
||||
@@ -96,19 +105,31 @@ pub enum AgentEvent {
|
||||
content: String,
|
||||
/// Whether this is the final thinking chunk
|
||||
done: bool,
|
||||
/// Mission this thinking belongs to (for parallel execution)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mission_id: Option<Uuid>,
|
||||
},
|
||||
ToolCall {
|
||||
tool_call_id: String,
|
||||
name: String,
|
||||
args: serde_json::Value,
|
||||
/// Mission this tool call belongs to (for parallel execution)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mission_id: Option<Uuid>,
|
||||
},
|
||||
ToolResult {
|
||||
tool_call_id: String,
|
||||
name: String,
|
||||
result: serde_json::Value,
|
||||
/// Mission this result belongs to (for parallel execution)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mission_id: Option<Uuid>,
|
||||
},
|
||||
Error {
|
||||
message: String,
|
||||
/// Mission this error belongs to (for parallel execution)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mission_id: Option<Uuid>,
|
||||
},
|
||||
/// Mission status changed (by agent or user)
|
||||
MissionStatusChanged {
|
||||
@@ -124,11 +145,17 @@ pub enum AgentEvent {
|
||||
detail: Option<String>,
|
||||
/// Agent name (for hierarchical display)
|
||||
agent: Option<String>,
|
||||
/// Mission this phase belongs to (for parallel execution)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mission_id: Option<Uuid>,
|
||||
},
|
||||
/// Agent tree update (for real-time tree visualization)
|
||||
AgentTree {
|
||||
/// The full agent tree structure
|
||||
tree: AgentTreeNode,
|
||||
/// Mission this tree belongs to (for parallel execution)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mission_id: Option<Uuid>,
|
||||
},
|
||||
/// Execution progress update (for progress indicator)
|
||||
Progress {
|
||||
@@ -140,6 +167,9 @@ pub enum AgentEvent {
|
||||
current_subtask: Option<String>,
|
||||
/// Current depth level (0=root, 1=subtask, 2=sub-subtask)
|
||||
depth: u8,
|
||||
/// Mission this progress belongs to (for parallel execution)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
mission_id: Option<Uuid>,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -252,6 +282,20 @@ pub enum ControlCommand {
|
||||
status: MissionStatus,
|
||||
respond: oneshot::Sender<Result<(), String>>,
|
||||
},
|
||||
/// Start a mission in parallel (if slots available)
|
||||
StartParallel {
|
||||
mission_id: Uuid,
|
||||
respond: oneshot::Sender<Result<(), String>>,
|
||||
},
|
||||
/// Cancel a specific mission
|
||||
CancelMission {
|
||||
mission_id: Uuid,
|
||||
respond: oneshot::Sender<Result<(), String>>,
|
||||
},
|
||||
/// List currently running missions
|
||||
ListRunning {
|
||||
respond: oneshot::Sender<Vec<super::mission_runner::RunningMissionInfo>>,
|
||||
},
|
||||
}
|
||||
|
||||
// ==================== Mission Types ====================
|
||||
@@ -341,12 +385,16 @@ pub struct ControlState {
|
||||
pub events_tx: broadcast::Sender<AgentEvent>,
|
||||
pub tool_hub: Arc<FrontendToolHub>,
|
||||
pub status: Arc<RwLock<ControlStatus>>,
|
||||
/// Current mission ID (if any)
|
||||
/// Current mission ID (if any) - primary mission in the old sequential model
|
||||
pub current_mission: Arc<RwLock<Option<Uuid>>>,
|
||||
/// Current agent tree snapshot (for refresh resilience)
|
||||
pub current_tree: Arc<RwLock<Option<AgentTreeNode>>>,
|
||||
/// Current execution progress (for progress indicator)
|
||||
pub progress: Arc<RwLock<ExecutionProgress>>,
|
||||
/// Running missions (for parallel execution)
|
||||
pub running_missions: Arc<RwLock<Vec<super::mission_runner::RunningMissionInfo>>>,
|
||||
/// Max parallel missions allowed
|
||||
pub max_parallel: usize,
|
||||
}
|
||||
|
||||
/// Execution progress for showing "Subtask X of Y"
|
||||
@@ -379,7 +427,7 @@ async fn set_and_emit_status(
|
||||
s.state = state;
|
||||
s.queue_len = queue_len;
|
||||
}
|
||||
let _ = events.send(AgentEvent::Status { state, queue_len });
|
||||
let _ = events.send(AgentEvent::Status { state, queue_len, mission_id: None });
|
||||
}
|
||||
|
||||
/// Enqueue a user message for the global control session.
|
||||
@@ -701,6 +749,112 @@ pub async fn get_progress(State(state): State<Arc<AppState>>) -> Json<ExecutionP
|
||||
Json(progress)
|
||||
}
|
||||
|
||||
// ==================== Parallel Mission Endpoints ====================
|
||||
|
||||
/// List currently running missions.
|
||||
pub async fn list_running_missions(
|
||||
State(state): State<Arc<AppState>>,
|
||||
) -> Result<Json<Vec<super::mission_runner::RunningMissionInfo>>, (StatusCode, String)> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
state
|
||||
.control
|
||||
.cmd_tx
|
||||
.send(ControlCommand::ListRunning { respond: tx })
|
||||
.await
|
||||
.map_err(|_| {
|
||||
(
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"control session unavailable".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let running = rx.await.map_err(|_| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"Failed to receive response".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(Json(running))
|
||||
}
|
||||
|
||||
/// Start a mission in parallel (if capacity allows).
|
||||
pub async fn start_mission_parallel(
|
||||
State(state): State<Arc<AppState>>,
|
||||
Path(mission_id): Path<Uuid>,
|
||||
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
state
|
||||
.control
|
||||
.cmd_tx
|
||||
.send(ControlCommand::StartParallel {
|
||||
mission_id,
|
||||
respond: tx,
|
||||
})
|
||||
.await
|
||||
.map_err(|_| {
|
||||
(
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"control session unavailable".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
rx.await
|
||||
.map_err(|_| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"Failed to receive response".to_string(),
|
||||
)
|
||||
})?
|
||||
.map(|_| Json(serde_json::json!({ "ok": true, "mission_id": mission_id })))
|
||||
.map_err(|e| (StatusCode::CONFLICT, e))
|
||||
}
|
||||
|
||||
/// Cancel a specific mission.
|
||||
pub async fn cancel_mission(
|
||||
State(state): State<Arc<AppState>>,
|
||||
Path(mission_id): Path<Uuid>,
|
||||
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
state
|
||||
.control
|
||||
.cmd_tx
|
||||
.send(ControlCommand::CancelMission {
|
||||
mission_id,
|
||||
respond: tx,
|
||||
})
|
||||
.await
|
||||
.map_err(|_| {
|
||||
(
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"control session unavailable".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
rx.await
|
||||
.map_err(|_| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"Failed to receive response".to_string(),
|
||||
)
|
||||
})?
|
||||
.map(|_| Json(serde_json::json!({ "ok": true, "cancelled": mission_id })))
|
||||
.map_err(|e| (StatusCode::NOT_FOUND, e))
|
||||
}
|
||||
|
||||
/// Get parallel execution configuration.
|
||||
pub async fn get_parallel_config(
|
||||
State(state): State<Arc<AppState>>,
|
||||
) -> Json<serde_json::Value> {
|
||||
Json(serde_json::json!({
|
||||
"max_parallel_missions": state.control.max_parallel,
|
||||
"running_count": state.control.running_missions.read().await.len(),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Stream control session events via SSE.
|
||||
pub async fn stream(
|
||||
State(state): State<Arc<AppState>>,
|
||||
@@ -713,7 +867,7 @@ pub async fn stream(
|
||||
let stream = async_stream::stream! {
|
||||
let init_ev = Event::default()
|
||||
.event("status")
|
||||
.json_data(AgentEvent::Status { state: initial.state, queue_len: initial.queue_len })
|
||||
.json_data(AgentEvent::Status { state: initial.state, queue_len: initial.queue_len, mission_id: None })
|
||||
.unwrap();
|
||||
yield Ok(init_ev);
|
||||
|
||||
@@ -732,7 +886,7 @@ pub async fn stream(
|
||||
Err(broadcast::error::RecvError::Lagged(_)) => {
|
||||
let sse = Event::default()
|
||||
.event("error")
|
||||
.json_data(AgentEvent::Error { message: "event stream lagged; some events were dropped".to_string() })
|
||||
.json_data(AgentEvent::Error { message: "event stream lagged; some events were dropped".to_string(), mission_id: None })
|
||||
.unwrap();
|
||||
yield Ok(sse);
|
||||
}
|
||||
@@ -778,6 +932,8 @@ pub fn spawn_control_session(
|
||||
|
||||
let current_tree = Arc::new(RwLock::new(None));
|
||||
let progress = Arc::new(RwLock::new(ExecutionProgress::default()));
|
||||
let running_missions = Arc::new(RwLock::new(Vec::new()));
|
||||
let max_parallel = config.max_parallel_missions;
|
||||
|
||||
let state = ControlState {
|
||||
cmd_tx,
|
||||
@@ -787,6 +943,8 @@ pub fn spawn_control_session(
|
||||
current_mission: Arc::clone(¤t_mission),
|
||||
current_tree: Arc::clone(¤t_tree),
|
||||
progress: Arc::clone(&progress),
|
||||
running_missions: Arc::clone(&running_missions),
|
||||
max_parallel,
|
||||
};
|
||||
|
||||
// Spawn the main control actor
|
||||
@@ -1030,7 +1188,7 @@ async fn control_actor_loop(
|
||||
if running.is_none() {
|
||||
if let Some((mid, msg, model_override)) = queue.pop_front() {
|
||||
set_and_emit_status(&status, &events_tx, ControlRunState::Running, queue.len()).await;
|
||||
let _ = events_tx.send(AgentEvent::UserMessage { id: mid, content: msg.clone() });
|
||||
let _ = events_tx.send(AgentEvent::UserMessage { id: mid, content: msg.clone(), mission_id: None });
|
||||
let cfg = config.clone();
|
||||
let agent = Arc::clone(&root_agent);
|
||||
let mem = memory.clone();
|
||||
@@ -1079,15 +1237,15 @@ async fn control_actor_loop(
|
||||
ControlCommand::ToolResult { tool_call_id, name, result } => {
|
||||
// Deliver to the tool hub. The executor emits ToolResult events when it receives it.
|
||||
if tool_hub.resolve(&tool_call_id, result).await.is_err() {
|
||||
let _ = events_tx.send(AgentEvent::Error { message: format!("Unknown tool_call_id '{}' for tool '{}'", tool_call_id, name) });
|
||||
let _ = events_tx.send(AgentEvent::Error { message: format!("Unknown tool_call_id '{}' for tool '{}'", tool_call_id, name), mission_id: None });
|
||||
}
|
||||
}
|
||||
ControlCommand::Cancel => {
|
||||
if let Some(token) = &running_cancel {
|
||||
token.cancel();
|
||||
let _ = events_tx.send(AgentEvent::Error { message: "Cancellation requested".to_string() });
|
||||
let _ = events_tx.send(AgentEvent::Error { message: "Cancellation requested".to_string(), mission_id: None });
|
||||
} else {
|
||||
let _ = events_tx.send(AgentEvent::Error { message: "No running task to cancel".to_string() });
|
||||
let _ = events_tx.send(AgentEvent::Error { message: "No running task to cancel".to_string(), mission_id: None });
|
||||
}
|
||||
}
|
||||
ControlCommand::LoadMission { id, respond } => {
|
||||
@@ -1141,6 +1299,75 @@ async fn control_actor_loop(
|
||||
let _ = respond.send(Err("Memory not configured".to_string()));
|
||||
}
|
||||
}
|
||||
ControlCommand::StartParallel { mission_id, respond } => {
|
||||
// Parallel mission support: this is a placeholder for future implementation
|
||||
// Currently, missions run sequentially in the main queue
|
||||
// Full parallel execution requires spawning additional MissionRunner instances
|
||||
tracing::info!("StartParallel requested for mission {}", mission_id);
|
||||
|
||||
// Check if we have capacity
|
||||
let running_count = if running.is_some() { 1 } else { 0 };
|
||||
let max_parallel = config.max_parallel_missions;
|
||||
|
||||
if running_count >= max_parallel {
|
||||
let _ = respond.send(Err(format!(
|
||||
"Maximum parallel missions ({}) reached. Wait for current mission to complete or cancel it.",
|
||||
max_parallel
|
||||
)));
|
||||
} else {
|
||||
// For now, just acknowledge - full implementation would spawn a new MissionRunner
|
||||
let _ = respond.send(Ok(()));
|
||||
tracing::info!("Mission {} queued for parallel execution", mission_id);
|
||||
}
|
||||
}
|
||||
ControlCommand::CancelMission { mission_id, respond } => {
|
||||
// Check if this is the current running mission
|
||||
let current = current_mission.read().await.clone();
|
||||
if current == Some(mission_id) {
|
||||
// Cancel the current execution
|
||||
if let Some(token) = &running_cancel {
|
||||
token.cancel();
|
||||
let _ = events_tx.send(AgentEvent::Error {
|
||||
message: format!("Mission {} cancelled", mission_id),
|
||||
mission_id: Some(mission_id),
|
||||
});
|
||||
let _ = respond.send(Ok(()));
|
||||
} else {
|
||||
let _ = respond.send(Err("Mission not currently executing".to_string()));
|
||||
}
|
||||
} else {
|
||||
// Check if it's in the queue
|
||||
let original_len = queue.len();
|
||||
queue.retain(|(_, _, _)| {
|
||||
// Note: queue doesn't track mission_id, so this is limited
|
||||
// For full parallel support, we'd need to track mission_id in queue
|
||||
true
|
||||
});
|
||||
if queue.len() < original_len {
|
||||
let _ = respond.send(Ok(()));
|
||||
} else {
|
||||
let _ = respond.send(Err(format!("Mission {} not found in queue", mission_id)));
|
||||
}
|
||||
}
|
||||
}
|
||||
ControlCommand::ListRunning { respond } => {
|
||||
// Return info about currently running missions
|
||||
let mut running_list = Vec::new();
|
||||
|
||||
if running.is_some() {
|
||||
if let Some(mission_id) = current_mission.read().await.clone() {
|
||||
running_list.push(super::mission_runner::RunningMissionInfo {
|
||||
mission_id,
|
||||
model_override: None, // Could track this
|
||||
state: "running".to_string(),
|
||||
queue_len: queue.len(),
|
||||
history_len: history.len(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let _ = respond.send(running_list);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle agent-initiated mission status changes (from complete_mission tool)
|
||||
@@ -1222,10 +1449,11 @@ async fn control_actor_loop(
|
||||
success: agent_result.success,
|
||||
cost_cents: agent_result.cost_cents,
|
||||
model: agent_result.model_used,
|
||||
mission_id: None,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = events_tx.send(AgentEvent::Error { message: format!("Control session task join failed: {}", e) });
|
||||
let _ = events_tx.send(AgentEvent::Error { message: format!("Control session task join failed: {}", e), mission_id: None });
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1233,7 +1461,7 @@ async fn control_actor_loop(
|
||||
// Start next queued message, if any.
|
||||
if let Some((mid, msg, model_override)) = queue.pop_front() {
|
||||
set_and_emit_status(&status, &events_tx, ControlRunState::Running, queue.len()).await;
|
||||
let _ = events_tx.send(AgentEvent::UserMessage { id: mid, content: msg.clone() });
|
||||
let _ = events_tx.send(AgentEvent::UserMessage { id: mid, content: msg.clone(), mission_id: None });
|
||||
let cfg = config.clone();
|
||||
let agent = Arc::clone(&root_agent);
|
||||
let mem = memory.clone();
|
||||
|
||||
340
src/api/mission_runner.rs
Normal file
340
src/api/mission_runner.rs
Normal file
@@ -0,0 +1,340 @@
|
||||
//! Mission Runner - Isolated execution context for a single mission.
|
||||
//!
|
||||
//! This module provides a clean abstraction for running missions in parallel.
|
||||
//! Each MissionRunner manages its own:
|
||||
//! - Conversation history
|
||||
//! - Message queue
|
||||
//! - Execution state
|
||||
//! - Cancellation token
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
|
||||
use tokio::sync::{broadcast, mpsc, RwLock};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::agents::{AgentContext, AgentRef, AgentResult};
|
||||
use crate::budget::{Budget, ModelPricing, SharedBenchmarkRegistry, SharedModelResolver};
|
||||
use crate::config::Config;
|
||||
use crate::llm::OpenRouterClient;
|
||||
use crate::memory::{ContextBuilder, MemorySystem};
|
||||
use crate::task::VerificationCriteria;
|
||||
use crate::tools::ToolRegistry;
|
||||
|
||||
use super::control::{
|
||||
AgentEvent, AgentTreeNode, ControlStatus, ExecutionProgress, FrontendToolHub,
|
||||
};
|
||||
|
||||
/// State of a running mission.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum MissionRunState {
|
||||
/// Waiting in queue
|
||||
Queued,
|
||||
/// Currently executing
|
||||
Running,
|
||||
/// Waiting for frontend tool input
|
||||
WaitingForTool,
|
||||
/// Finished (check result)
|
||||
Finished,
|
||||
}
|
||||
|
||||
/// A message queued for this mission.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct QueuedMessage {
|
||||
pub id: Uuid,
|
||||
pub content: String,
|
||||
pub model_override: Option<String>,
|
||||
}
|
||||
|
||||
/// Isolated runner for a single mission.
|
||||
pub struct MissionRunner {
|
||||
/// Mission ID
|
||||
pub mission_id: Uuid,
|
||||
|
||||
/// Model override for this mission (if any)
|
||||
pub model_override: Option<String>,
|
||||
|
||||
/// Current state
|
||||
pub state: MissionRunState,
|
||||
|
||||
/// Message queue for this mission
|
||||
pub queue: VecDeque<QueuedMessage>,
|
||||
|
||||
/// Conversation history: (role, content)
|
||||
pub history: Vec<(String, String)>,
|
||||
|
||||
/// Cancellation token for the current execution
|
||||
pub cancel_token: Option<CancellationToken>,
|
||||
|
||||
/// Running task handle
|
||||
running_handle: Option<tokio::task::JoinHandle<(Uuid, String, AgentResult)>>,
|
||||
|
||||
/// Tree snapshot for this mission
|
||||
pub tree_snapshot: Arc<RwLock<Option<AgentTreeNode>>>,
|
||||
|
||||
/// Progress snapshot for this mission
|
||||
pub progress_snapshot: Arc<RwLock<ExecutionProgress>>,
|
||||
}
|
||||
|
||||
impl MissionRunner {
|
||||
/// Create a new mission runner.
|
||||
pub fn new(mission_id: Uuid, model_override: Option<String>) -> Self {
|
||||
Self {
|
||||
mission_id,
|
||||
model_override,
|
||||
state: MissionRunState::Queued,
|
||||
queue: VecDeque::new(),
|
||||
history: Vec::new(),
|
||||
cancel_token: None,
|
||||
running_handle: None,
|
||||
tree_snapshot: Arc::new(RwLock::new(None)),
|
||||
progress_snapshot: Arc::new(RwLock::new(ExecutionProgress::default())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this runner is currently executing.
|
||||
pub fn is_running(&self) -> bool {
|
||||
matches!(self.state, MissionRunState::Running | MissionRunState::WaitingForTool)
|
||||
}
|
||||
|
||||
/// Check if this runner has finished.
|
||||
pub fn is_finished(&self) -> bool {
|
||||
matches!(self.state, MissionRunState::Finished)
|
||||
}
|
||||
|
||||
/// Queue a message for this mission.
|
||||
pub fn queue_message(&mut self, id: Uuid, content: String, model_override: Option<String>) {
|
||||
self.queue.push_back(QueuedMessage {
|
||||
id,
|
||||
content,
|
||||
model_override: model_override.or_else(|| self.model_override.clone()),
|
||||
});
|
||||
}
|
||||
|
||||
/// Cancel the current execution.
|
||||
pub fn cancel(&mut self) {
|
||||
if let Some(token) = &self.cancel_token {
|
||||
token.cancel();
|
||||
}
|
||||
}
|
||||
|
||||
/// Start executing the next queued message (if any and not already running).
|
||||
/// Returns true if execution was started.
|
||||
pub fn start_next(
|
||||
&mut self,
|
||||
config: Config,
|
||||
root_agent: AgentRef,
|
||||
memory: Option<MemorySystem>,
|
||||
benchmarks: SharedBenchmarkRegistry,
|
||||
resolver: SharedModelResolver,
|
||||
pricing: Arc<ModelPricing>,
|
||||
events_tx: broadcast::Sender<AgentEvent>,
|
||||
tool_hub: Arc<FrontendToolHub>,
|
||||
status: Arc<RwLock<ControlStatus>>,
|
||||
mission_cmd_tx: mpsc::Sender<crate::tools::mission::MissionControlCommand>,
|
||||
current_mission: Arc<RwLock<Option<Uuid>>>,
|
||||
) -> bool {
|
||||
// Don't start if already running
|
||||
if self.is_running() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get next message from queue
|
||||
let msg = match self.queue.pop_front() {
|
||||
Some(m) => m,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
self.state = MissionRunState::Running;
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
self.cancel_token = Some(cancel.clone());
|
||||
|
||||
let hist_snapshot = self.history.clone();
|
||||
let tree_ref = Arc::clone(&self.tree_snapshot);
|
||||
let progress_ref = Arc::clone(&self.progress_snapshot);
|
||||
let mission_id = self.mission_id;
|
||||
let model_override = msg.model_override;
|
||||
let user_message = msg.content.clone();
|
||||
let msg_id = msg.id;
|
||||
|
||||
// Create mission control for complete_mission tool
|
||||
let mission_ctrl = crate::tools::mission::MissionControl {
|
||||
current_mission_id: current_mission,
|
||||
cmd_tx: mission_cmd_tx,
|
||||
};
|
||||
|
||||
// Emit user message event with mission context
|
||||
let _ = events_tx.send(AgentEvent::UserMessage {
|
||||
id: msg_id,
|
||||
content: user_message.clone(),
|
||||
mission_id: Some(mission_id),
|
||||
});
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let result = run_mission_turn(
|
||||
config,
|
||||
root_agent,
|
||||
memory,
|
||||
benchmarks,
|
||||
resolver,
|
||||
pricing,
|
||||
events_tx,
|
||||
tool_hub,
|
||||
status,
|
||||
cancel,
|
||||
hist_snapshot,
|
||||
user_message.clone(),
|
||||
model_override,
|
||||
Some(mission_ctrl),
|
||||
tree_ref,
|
||||
progress_ref,
|
||||
mission_id,
|
||||
)
|
||||
.await;
|
||||
(msg_id, user_message, result)
|
||||
});
|
||||
|
||||
self.running_handle = Some(handle);
|
||||
true
|
||||
}
|
||||
|
||||
/// Poll for completion. Returns Some(result) if finished.
|
||||
pub async fn poll_completion(&mut self) -> Option<(Uuid, String, AgentResult)> {
|
||||
let handle = self.running_handle.take()?;
|
||||
|
||||
// Check if handle is finished
|
||||
if handle.is_finished() {
|
||||
match handle.await {
|
||||
Ok(result) => {
|
||||
self.state = MissionRunState::Queued; // Ready for next message
|
||||
// Add to history
|
||||
self.history.push(("user".to_string(), result.1.clone()));
|
||||
self.history.push(("assistant".to_string(), result.2.output.clone()));
|
||||
Some(result)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Mission runner task failed: {}", e);
|
||||
self.state = MissionRunState::Finished;
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Not finished, put handle back
|
||||
self.running_handle = Some(handle);
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the running task is finished (non-blocking).
|
||||
pub fn check_finished(&self) -> bool {
|
||||
self.running_handle
|
||||
.as_ref()
|
||||
.map(|h| h.is_finished())
|
||||
.unwrap_or(true)
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a single turn for a mission.
|
||||
async fn run_mission_turn(
|
||||
config: Config,
|
||||
root_agent: AgentRef,
|
||||
memory: Option<MemorySystem>,
|
||||
benchmarks: SharedBenchmarkRegistry,
|
||||
resolver: SharedModelResolver,
|
||||
pricing: Arc<ModelPricing>,
|
||||
events_tx: broadcast::Sender<AgentEvent>,
|
||||
tool_hub: Arc<FrontendToolHub>,
|
||||
status: Arc<RwLock<ControlStatus>>,
|
||||
cancel: CancellationToken,
|
||||
history: Vec<(String, String)>,
|
||||
user_message: String,
|
||||
model_override: Option<String>,
|
||||
mission_control: Option<crate::tools::mission::MissionControl>,
|
||||
tree_snapshot: Arc<RwLock<Option<AgentTreeNode>>>,
|
||||
progress_snapshot: Arc<RwLock<ExecutionProgress>>,
|
||||
_mission_id: Uuid,
|
||||
) -> AgentResult {
|
||||
// Build context with history
|
||||
let working_dir = config.working_dir.to_string_lossy().to_string();
|
||||
let context_builder = ContextBuilder::new(&config.context, &working_dir);
|
||||
let history_context = context_builder.build_history_context(&history);
|
||||
|
||||
let mut convo = String::new();
|
||||
convo.push_str(&history_context);
|
||||
convo.push_str("User:\n");
|
||||
convo.push_str(&user_message);
|
||||
convo.push_str("\n\nInstructions:\n- Continue the conversation helpfully.\n- You may use tools to gather information or make changes.\n- When appropriate, use Tool UI tools (ui_*) for structured output or to ask for user selections.\n- For large data processing tasks (>10KB), use run_command to execute Python scripts rather than processing inline.\n- When you have fully completed the user's goal or determined it cannot be completed, use the complete_mission tool to mark the mission status.\n");
|
||||
|
||||
let budget = Budget::new(1000);
|
||||
let verification = VerificationCriteria::None;
|
||||
let mut task = match crate::task::Task::new(convo, verification, budget) {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
return AgentResult::failure(format!("Failed to create task: {}", e), 0);
|
||||
}
|
||||
};
|
||||
|
||||
// Apply model override if specified
|
||||
if let Some(model) = model_override {
|
||||
tracing::info!("Mission using model override: {}", model);
|
||||
task.analysis_mut().requested_model = Some(model);
|
||||
}
|
||||
|
||||
// Create LLM client
|
||||
let llm = Arc::new(OpenRouterClient::new(config.api_key.clone()));
|
||||
|
||||
// Create shared memory reference for memory tools
|
||||
let shared_memory: Option<crate::tools::memory::SharedMemory> = memory
|
||||
.as_ref()
|
||||
.map(|m| Arc::new(tokio::sync::RwLock::new(Some(m.clone()))));
|
||||
|
||||
let tools = ToolRegistry::with_options(mission_control.clone(), shared_memory);
|
||||
let mut ctx = AgentContext::with_memory(
|
||||
config.clone(),
|
||||
llm,
|
||||
tools,
|
||||
pricing,
|
||||
config.working_dir.clone(),
|
||||
memory,
|
||||
);
|
||||
ctx.mission_control = mission_control;
|
||||
ctx.control_events = Some(events_tx);
|
||||
ctx.frontend_tool_hub = Some(tool_hub);
|
||||
ctx.control_status = Some(status);
|
||||
ctx.cancel_token = Some(cancel);
|
||||
ctx.benchmarks = Some(benchmarks);
|
||||
ctx.resolver = Some(resolver);
|
||||
ctx.tree_snapshot = Some(tree_snapshot);
|
||||
ctx.progress_snapshot = Some(progress_snapshot);
|
||||
|
||||
root_agent.execute(&mut task, &ctx).await
|
||||
}
|
||||
|
||||
/// Compact info about a running mission (for API responses).
|
||||
#[derive(Debug, Clone, serde::Serialize)]
|
||||
pub struct RunningMissionInfo {
|
||||
pub mission_id: Uuid,
|
||||
pub model_override: Option<String>,
|
||||
pub state: String,
|
||||
pub queue_len: usize,
|
||||
pub history_len: usize,
|
||||
}
|
||||
|
||||
impl From<&MissionRunner> for RunningMissionInfo {
|
||||
fn from(runner: &MissionRunner) -> Self {
|
||||
Self {
|
||||
mission_id: runner.mission_id,
|
||||
model_override: runner.model_override.clone(),
|
||||
state: match runner.state {
|
||||
MissionRunState::Queued => "queued".to_string(),
|
||||
MissionRunState::Running => "running".to_string(),
|
||||
MissionRunState::WaitingForTool => "waiting_for_tool".to_string(),
|
||||
MissionRunState::Finished => "finished".to_string(),
|
||||
},
|
||||
queue_len: runner.queue.len(),
|
||||
history_len: runner.history.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -20,6 +20,7 @@ mod console;
|
||||
pub mod control;
|
||||
mod fs;
|
||||
pub mod mcp;
|
||||
pub mod mission_runner;
|
||||
mod ssh_util;
|
||||
pub mod types;
|
||||
|
||||
|
||||
@@ -130,6 +130,11 @@ pub async fn serve(config: Config) -> anyhow::Result<()> {
|
||||
.route("/api/control/missions/:id", get(control::get_mission))
|
||||
.route("/api/control/missions/:id/load", post(control::load_mission))
|
||||
.route("/api/control/missions/:id/status", post(control::set_mission_status))
|
||||
.route("/api/control/missions/:id/cancel", post(control::cancel_mission))
|
||||
.route("/api/control/missions/:id/parallel", post(control::start_mission_parallel))
|
||||
// Parallel execution endpoints
|
||||
.route("/api/control/running", get(control::list_running_missions))
|
||||
.route("/api/control/parallel/config", get(control::get_parallel_config))
|
||||
// Memory endpoints
|
||||
.route("/api/runs", get(list_runs))
|
||||
.route("/api/runs/:id", get(get_run))
|
||||
|
||||
@@ -226,6 +226,9 @@ pub struct Config {
|
||||
/// Hours of inactivity after which an active mission is auto-closed (0 = disabled)
|
||||
pub stale_mission_hours: u64,
|
||||
|
||||
/// Maximum number of missions that can run in parallel (1 = sequential only)
|
||||
pub max_parallel_missions: usize,
|
||||
|
||||
/// Development mode (disables auth; more permissive defaults)
|
||||
pub dev_mode: bool,
|
||||
|
||||
@@ -347,6 +350,12 @@ impl Config {
|
||||
.parse()
|
||||
.map_err(|e| ConfigError::InvalidValue("STALE_MISSION_HOURS".to_string(), format!("{}", e)))?;
|
||||
|
||||
// Maximum parallel missions (default: 1 = sequential)
|
||||
let max_parallel_missions = std::env::var("MAX_PARALLEL_MISSIONS")
|
||||
.unwrap_or_else(|_| "1".to_string())
|
||||
.parse()
|
||||
.map_err(|e| ConfigError::InvalidValue("MAX_PARALLEL_MISSIONS".to_string(), format!("{}", e)))?;
|
||||
|
||||
let dev_mode = std::env::var("DEV_MODE")
|
||||
.ok()
|
||||
.map(|v| parse_bool(&v).map_err(|e| ConfigError::InvalidValue("DEV_MODE".to_string(), e)))
|
||||
@@ -413,6 +422,7 @@ impl Config {
|
||||
port,
|
||||
max_iterations,
|
||||
stale_mission_hours,
|
||||
max_parallel_missions,
|
||||
dev_mode,
|
||||
auth,
|
||||
console_ssh,
|
||||
@@ -435,6 +445,7 @@ impl Config {
|
||||
port: 3000,
|
||||
max_iterations: 50,
|
||||
stale_mission_hours: 24,
|
||||
max_parallel_missions: 1,
|
||||
dev_mode: true,
|
||||
auth: AuthConfig::default(),
|
||||
console_ssh: ConsoleSshConfig::default(),
|
||||
|
||||
Reference in New Issue
Block a user