diff --git a/scripts/check_all_tasks.py b/scripts/check_all_tasks.py new file mode 100644 index 0000000..e7ed051 --- /dev/null +++ b/scripts/check_all_tasks.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +"""Check all security analysis task results.""" + +import json +import requests +import os + +API_URL = "https://agent-backend.thomas.md" + +# All security analysis task IDs (latest run after service restart) +TASKS = { + "moonshotai/kimi-k2-thinking": "ec103234-9fe5-4814-ab24-58dd7856dd43", + "x-ai/grok-4.1-fast": "97e654e1-a381-49da-911b-1b835449bb55", + "google/gemini-3-flash-preview": "989ecdb8-b900-44d8-a9e5-a9e9394a9077", + "deepseek/deepseek-v3.2": "7a589315-9b9b-4805-9e14-da01224717e1", + "qwen/qwen3-vl-235b-a22b-thinking": "d19711d5-3158-48cd-aa88-81bb4d22262c", + "mistralai/mistral-large-2512": "e0c3b62e-7b64-425f-8289-0a1b274e5dd4", + "amazon/nova-pro-v1": "3fe39368-a7fe-4852-961f-44863128b426", + "z-ai/glm-4.6v": "eb161e08-c923-44b5-8c8d-6f0d01366082", + "anthropic/claude-sonnet-4.5": "8943e1ef-2c95-4485-bcee-8d3bc611fa6d", +} + + +def get_token(): + """Get auth token.""" + secrets_path = os.path.join(os.path.dirname(__file__), "..", "secrets.json") + password = "" + if os.path.exists(secrets_path): + with open(secrets_path) as f: + secrets = json.load(f) + password = secrets.get("auth", {}).get("dashboard_password", "") + if not password: + password = os.environ.get("DASHBOARD_PASSWORD", "") + + if not password: + print("Error: No dashboard password found") + return None + + resp = requests.post(f"{API_URL}/api/auth/login", json={"password": password}) + return resp.json().get("token") + + +def check_task(token, model, task_id): + """Check a task's status.""" + headers = {"Authorization": f"Bearer {token}"} + try: + resp = requests.get(f"{API_URL}/api/task/{task_id}", headers=headers) + data = resp.json() + return { + "model": model, + "task_id": task_id, + "status": data.get("status", "unknown"), + "iterations": data.get("iterations", 0), + "result_length": len(data.get("result") or ""), + "result_preview": (data.get("result") or "")[:200], + "error": "Error:" in (data.get("result") or ""), + } + except Exception as e: + return { + "model": model, + "task_id": task_id, + "status": "error", + "iterations": 0, + "result_length": 0, + "result_preview": str(e), + "error": True, + } + + +def main(): + token = get_token() + if not token: + return + + print("=" * 80) + print("Security Analysis Task Status") + print("=" * 80) + print() + + results = [] + for model, task_id in TASKS.items(): + result = check_task(token, model, task_id) + results.append(result) + + # Print summary table + print(f"{'Model':<40} | {'Status':<10} | {'Iters':<5} | {'Chars':<8}") + print("-" * 40 + "-+-" + "-" * 10 + "-+-" + "-" * 5 + "-+-" + "-" * 8) + + for r in results: + print(f"{r['model']:<40} | {r['status']:<10} | {r['iterations']:<5} | {r['result_length']:<8}") + + # Categorize + completed = [r for r in results if r["status"] == "completed" and not r["error"]] + failed = [r for r in results if r["status"] == "failed" or r["error"]] + running = [r for r in results if r["status"] in ("pending", "running")] + + print() + print("=" * 80) + print(f"Summary: {len(completed)} completed, {len(running)} running, {len(failed)} failed") + print("=" * 80) + + if completed: + print(f"\n✓ Completed ({len(completed)}):") + for r in completed: + preview = r['result_preview'].replace('\n', ' ')[:100] + print(f" - {r['model']}: {preview}...") + + if running: + print(f"\n⏳ Running ({len(running)}):") + for r in running: + print(f" - {r['model']}") + + if failed: + print(f"\n❌ Failed ({len(failed)}):") + for r in failed: + preview = r['result_preview'].replace('\n', ' ')[:100] + print(f" - {r['model']}: {preview}...") + + +if __name__ == "__main__": + main() diff --git a/src/api/control.rs b/src/api/control.rs index d086467..ef3b325 100644 --- a/src/api/control.rs +++ b/src/api/control.rs @@ -37,6 +37,10 @@ use super::routes::AppState; #[derive(Debug, Clone, Deserialize)] pub struct ControlMessageRequest { pub content: String, + /// Optional model override for this message. + /// If not specified, uses the server's default model. + #[serde(default)] + pub model: Option, } #[derive(Debug, Clone, Serialize)] @@ -224,6 +228,8 @@ pub enum ControlCommand { UserMessage { id: Uuid, content: String, + /// Optional model override for this message + model: Option, }, ToolResult { tool_call_id: String, @@ -388,7 +394,7 @@ pub async fn post_message( state .control .cmd_tx - .send(ControlCommand::UserMessage { id, content }) + .send(ControlCommand::UserMessage { id, content, model: req.model }) .await .map_err(|_| { ( @@ -809,7 +815,8 @@ async fn control_actor_loop( current_tree: Arc>>, progress: Arc>, ) { - let mut queue: VecDeque<(Uuid, String)> = VecDeque::new(); + // Queue stores (id, content, model_override) + let mut queue: VecDeque<(Uuid, String, Option)> = VecDeque::new(); let mut history: Vec<(String, String)> = Vec::new(); // (role, content) pairs (user/assistant) let pricing = Arc::new(ModelPricing::new()); @@ -928,7 +935,7 @@ async fn control_actor_loop( cmd = cmd_rx.recv() => { let Some(cmd) = cmd else { break }; match cmd { - ControlCommand::UserMessage { id, content } => { + ControlCommand::UserMessage { id, content, model } => { // Auto-create mission on first message if none exists { let mission_id = current_mission.read().await.clone(); @@ -940,7 +947,7 @@ async fn control_actor_loop( } } - queue.push_back((id, content)); + queue.push_back((id, content, model)); set_and_emit_status( &status, &events_tx, @@ -948,7 +955,7 @@ async fn control_actor_loop( queue.len(), ).await; if running.is_none() { - if let Some((mid, msg)) = queue.pop_front() { + if let Some((mid, msg, model_override)) = queue.pop_front() { set_and_emit_status(&status, &events_tx, ControlRunState::Running, queue.len()).await; let _ = events_tx.send(AgentEvent::UserMessage { id: mid, content: msg.clone() }); let cfg = config.clone(); @@ -983,6 +990,7 @@ async fn control_actor_loop( cancel, hist_snapshot, msg.clone(), + model_override, Some(mission_ctrl), tree_ref, progress_ref, @@ -1150,7 +1158,7 @@ async fn control_actor_loop( } // Start next queued message, if any. - if let Some((mid, msg)) = queue.pop_front() { + if let Some((mid, msg, model_override)) = queue.pop_front() { set_and_emit_status(&status, &events_tx, ControlRunState::Running, queue.len()).await; let _ = events_tx.send(AgentEvent::UserMessage { id: mid, content: msg.clone() }); let cfg = config.clone(); @@ -1185,6 +1193,7 @@ async fn control_actor_loop( cancel, hist_snapshot, msg.clone(), + model_override, Some(mission_ctrl), tree_ref, progress_ref, @@ -1213,6 +1222,7 @@ async fn run_single_control_turn( cancel: CancellationToken, history: Vec<(String, String)>, user_message: String, + model_override: Option, mission_control: Option, tree_snapshot: Arc>>, progress_snapshot: Arc>, @@ -1239,6 +1249,12 @@ async fn run_single_control_turn( } }; + // Apply model override if specified + if let Some(model) = model_override { + tracing::info!("Using model override: {}", model); + task.analysis_mut().requested_model = Some(model); + } + // Context for agent execution. let llm = Arc::new(OpenRouterClient::new(config.api_key.clone()));