Add model override support to control session API

The /api/control/message endpoint now accepts an optional `model` field
to specify which model to use for a particular message. This enables:
- Model comparison tests from the dashboard
- Per-message model selection in the control session

The model override is passed through to the task's requested_model field,
which the ModelSelector respects when choosing the execution model.
This commit is contained in:
Thomas Marchand
2025-12-19 08:35:27 +00:00
parent 1f155bd3b5
commit 1fd2ad0702
2 changed files with 143 additions and 6 deletions

121
scripts/check_all_tasks.py Normal file
View File

@@ -0,0 +1,121 @@
#!/usr/bin/env python3
"""Check all security analysis task results."""
import json
import requests
import os
API_URL = "https://agent-backend.thomas.md"
# All security analysis task IDs (latest run after service restart)
TASKS = {
"moonshotai/kimi-k2-thinking": "ec103234-9fe5-4814-ab24-58dd7856dd43",
"x-ai/grok-4.1-fast": "97e654e1-a381-49da-911b-1b835449bb55",
"google/gemini-3-flash-preview": "989ecdb8-b900-44d8-a9e5-a9e9394a9077",
"deepseek/deepseek-v3.2": "7a589315-9b9b-4805-9e14-da01224717e1",
"qwen/qwen3-vl-235b-a22b-thinking": "d19711d5-3158-48cd-aa88-81bb4d22262c",
"mistralai/mistral-large-2512": "e0c3b62e-7b64-425f-8289-0a1b274e5dd4",
"amazon/nova-pro-v1": "3fe39368-a7fe-4852-961f-44863128b426",
"z-ai/glm-4.6v": "eb161e08-c923-44b5-8c8d-6f0d01366082",
"anthropic/claude-sonnet-4.5": "8943e1ef-2c95-4485-bcee-8d3bc611fa6d",
}
def get_token():
"""Get auth token."""
secrets_path = os.path.join(os.path.dirname(__file__), "..", "secrets.json")
password = ""
if os.path.exists(secrets_path):
with open(secrets_path) as f:
secrets = json.load(f)
password = secrets.get("auth", {}).get("dashboard_password", "")
if not password:
password = os.environ.get("DASHBOARD_PASSWORD", "")
if not password:
print("Error: No dashboard password found")
return None
resp = requests.post(f"{API_URL}/api/auth/login", json={"password": password})
return resp.json().get("token")
def check_task(token, model, task_id):
"""Check a task's status."""
headers = {"Authorization": f"Bearer {token}"}
try:
resp = requests.get(f"{API_URL}/api/task/{task_id}", headers=headers)
data = resp.json()
return {
"model": model,
"task_id": task_id,
"status": data.get("status", "unknown"),
"iterations": data.get("iterations", 0),
"result_length": len(data.get("result") or ""),
"result_preview": (data.get("result") or "")[:200],
"error": "Error:" in (data.get("result") or ""),
}
except Exception as e:
return {
"model": model,
"task_id": task_id,
"status": "error",
"iterations": 0,
"result_length": 0,
"result_preview": str(e),
"error": True,
}
def main():
token = get_token()
if not token:
return
print("=" * 80)
print("Security Analysis Task Status")
print("=" * 80)
print()
results = []
for model, task_id in TASKS.items():
result = check_task(token, model, task_id)
results.append(result)
# Print summary table
print(f"{'Model':<40} | {'Status':<10} | {'Iters':<5} | {'Chars':<8}")
print("-" * 40 + "-+-" + "-" * 10 + "-+-" + "-" * 5 + "-+-" + "-" * 8)
for r in results:
print(f"{r['model']:<40} | {r['status']:<10} | {r['iterations']:<5} | {r['result_length']:<8}")
# Categorize
completed = [r for r in results if r["status"] == "completed" and not r["error"]]
failed = [r for r in results if r["status"] == "failed" or r["error"]]
running = [r for r in results if r["status"] in ("pending", "running")]
print()
print("=" * 80)
print(f"Summary: {len(completed)} completed, {len(running)} running, {len(failed)} failed")
print("=" * 80)
if completed:
print(f"\n✓ Completed ({len(completed)}):")
for r in completed:
preview = r['result_preview'].replace('\n', ' ')[:100]
print(f" - {r['model']}: {preview}...")
if running:
print(f"\n⏳ Running ({len(running)}):")
for r in running:
print(f" - {r['model']}")
if failed:
print(f"\n❌ Failed ({len(failed)}):")
for r in failed:
preview = r['result_preview'].replace('\n', ' ')[:100]
print(f" - {r['model']}: {preview}...")
if __name__ == "__main__":
main()