diff --git a/dashboard/src/app/control/control-client.tsx b/dashboard/src/app/control/control-client.tsx
index 1265df4..f6fc9c3 100644
--- a/dashboard/src/app/control/control-client.tsx
+++ b/dashboard/src/app/control/control-client.tsx
@@ -21,6 +21,7 @@ import {
getRunningMissions,
cancelMission,
listModels,
+ getModelDisplayName,
type ControlRunState,
type Mission,
type MissionStatus,
@@ -1202,7 +1203,7 @@ export default function ControlClient() {
{availableModels.map((model) => (
))}
diff --git a/dashboard/src/lib/api.ts b/dashboard/src/lib/api.ts
index 12fe350..b8896c6 100644
--- a/dashboard/src/lib/api.ts
+++ b/dashboard/src/lib/api.ts
@@ -763,3 +763,46 @@ export async function listModels(tier?: string): Promise {
if (!res.ok) throw new Error("Failed to fetch models");
return res.json();
}
+
+// Friendly display names for models
+const MODEL_DISPLAY_NAMES: Record = {
+ // OpenAI - simplified (newest first)
+ "openai/gpt-5.2-pro": "gpt-5.2-pro",
+ "openai/gpt-5.2": "gpt-5.2",
+ "openai/gpt-5.2-chat": "gpt-5.2",
+ "openai/gpt-4.1-mini": "gpt-4-mini",
+ "openai/gpt-4.1": "gpt-4",
+ "openai/o1": "o1",
+ "openai/o3-mini-high": "o3-mini",
+ // Anthropic - simplified
+ "anthropic/claude-sonnet-4.5": "4.5-sonnet",
+ "anthropic/claude-opus-4.5": "4.5-opus",
+ "anthropic/claude-haiku-4.5": "4.5-haiku",
+ // Google
+ "google/gemini-3-flash-preview": "gemini-3-flash",
+ "google/gemini-3-pro-image-preview": "gemini-3-pro",
+ // DeepSeek
+ "deepseek/deepseek-r1": "deepseek-r1",
+ "deepseek/deepseek-chat-v3-0324": "deepseek-v3",
+ // Qwen
+ "qwen/qwq-32b": "qwq-32b",
+ "qwen/qwen-2.5-72b-instruct": "qwen-72b",
+ "qwen/qwen3-next-80b-a3b-thinking": "qwen3-thinking",
+ // Mistral
+ "mistralai/mistral-small-24b-instruct-2501": "mistral-small",
+ "mistralai/mistral-medium-3.1": "mistral-medium",
+ "mistralai/mistral-large-2512": "mistral-large",
+ // Meta
+ "meta-llama/llama-3.1-405b": "llama-405b",
+ "meta-llama/llama-3.2-90b-vision-instruct": "llama-90b-vision",
+ "meta-llama/llama-3.3-70b-instruct:free": "llama-70b (free)",
+};
+
+// Get display name for a model
+export function getModelDisplayName(modelId: string): string {
+ if (MODEL_DISPLAY_NAMES[modelId]) {
+ return MODEL_DISPLAY_NAMES[modelId];
+ }
+ // Fallback: strip provider prefix
+ return modelId.includes("/") ? modelId.split("/").pop()! : modelId;
+}
diff --git a/models_with_benchmarks.json b/models_with_benchmarks.json
index 5e51390..c690ac3 100644
--- a/models_with_benchmarks.json
+++ b/models_with_benchmarks.json
@@ -1,7 +1,7 @@
{
- "generated_at": "2025-12-19T07:20:20Z",
+ "generated_at": "2025-12-22T20:23:08Z",
"total_models": 350,
- "models_with_benchmarks": 158,
+ "models_with_benchmarks": 157,
"categories": [
"code",
"math",
@@ -35,6 +35,21 @@
],
"tier": "fast"
},
+ "gpt-5": {
+ "latest": "openai/gpt-5.2-chat",
+ "members": [
+ "openai/gpt-5.2-chat",
+ "openai/gpt-5.2"
+ ],
+ "tier": "mid"
+ },
+ "gpt-5-pro": {
+ "latest": "openai/gpt-5.2-pro",
+ "members": [
+ "openai/gpt-5.2-pro"
+ ],
+ "tier": "flagship"
+ },
"mistral-large": {
"latest": "mistralai/mistral-large-2512",
"members": [
@@ -85,6 +100,13 @@
],
"tier": "mid"
},
+ "qwen3-thinking": {
+ "latest": "qwen/qwen3-next-80b-a3b-thinking",
+ "members": [
+ "qwen/qwen3-next-80b-a3b-thinking"
+ ],
+ "tier": "flagship"
+ },
"deepseek-chat": {
"latest": "deepseek/deepseek-chat-v3-0324",
"members": [
@@ -204,6 +226,10 @@
"mistralai/mistral-small-creative": "mistralai/mistral-small-24b-instruct-2501",
"mistral-small-creative": "mistralai/mistral-small-24b-instruct-2501",
"mistral-small": "mistralai/mistral-small-24b-instruct-2501",
+ "openai/gpt-5.2": "openai/gpt-5.2-chat",
+ "gpt-5.2": "openai/gpt-5.2-chat",
+ "gpt-5": "openai/gpt-5.2-chat",
+ "gpt-5-pro": "openai/gpt-5.2-pro",
"mistralai/mistral-large-2411": "mistralai/mistral-large-2512",
"mistral-large-2411": "mistralai/mistral-large-2512",
"mistralai/mistral-large-2407": "mistralai/mistral-large-2512",
@@ -244,6 +270,7 @@
"claude-sonnet": "anthropic/claude-sonnet-4.5",
"sonnet": "anthropic/claude-sonnet-4.5",
"claude sonnet": "anthropic/claude-sonnet-4.5",
+ "qwen3-thinking": "qwen/qwen3-next-80b-a3b-thinking",
"deepseek/deepseek-chat-v3.1": "deepseek/deepseek-chat-v3-0324",
"deepseek-chat-v3.1": "deepseek/deepseek-chat-v3-0324",
"deepseek/deepseek-chat": "deepseek/deepseek-chat-v3-0324",
@@ -280,6 +307,33 @@
"llama-3-405b": "meta-llama/llama-3.1-405b"
},
"models": [
+ {
+ "id": "z-ai/glm-4.7",
+ "name": "Z.AI: GLM 4.7",
+ "context_length": 202752,
+ "architecture": {
+ "modality": "text->text",
+ "input_modalities": [
+ "text"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "Other",
+ "instruct_type": null
+ },
+ "pricing": {
+ "prompt": "0.00000044",
+ "completion": "0.00000174",
+ "request": "0",
+ "image": "0",
+ "web_search": "0",
+ "internal_reasoning": "0",
+ "input_cache_read": "0"
+ },
+ "benchmarks": null,
+ "category_scores": null
+ },
{
"id": "google/gemini-3-flash-preview",
"name": "Google: Gemini 3 Flash Preview",
@@ -661,8 +715,8 @@
"instruct_type": null
},
"pricing": {
- "prompt": "0.00000015",
- "completion": "0.0000006",
+ "prompt": "0.00000005",
+ "completion": "0.00000022",
"request": "0",
"image": "0",
"web_search": "0",
@@ -1113,8 +1167,8 @@
"instruct_type": null
},
"pricing": {
- "prompt": "0.00000024",
- "completion": "0.00000038",
+ "prompt": "0.000000239",
+ "completion": "0.000000378",
"request": "0",
"image": "0",
"web_search": "0",
@@ -1660,8 +1714,8 @@
"instruct_type": null
},
"pricing": {
- "prompt": "0.00000045",
- "completion": "0.00000235",
+ "prompt": "0.0000004",
+ "completion": "0.00000175",
"request": "0",
"image": "0",
"web_search": "0",
@@ -3354,8 +3408,8 @@
"instruct_type": null
},
"pricing": {
- "prompt": "0.00000015",
- "completion": "0.00000075",
+ "prompt": "0.0000002",
+ "completion": "0.0000008",
"request": "0",
"image": "0",
"web_search": "0",
@@ -4471,7 +4525,7 @@
{
"id": "qwen/qwen3-coder-30b-a3b-instruct",
"name": "Qwen: Qwen3 Coder 30B A3B Instruct",
- "context_length": 262144,
+ "context_length": 160000,
"architecture": {
"modality": "text->text",
"input_modalities": [
@@ -4484,8 +4538,8 @@
"instruct_type": null
},
"pricing": {
- "prompt": "0.00000006",
- "completion": "0.00000025",
+ "prompt": "0.00000007",
+ "completion": "0.00000027",
"request": "0",
"image": "0",
"web_search": "0",
@@ -6711,60 +6765,6 @@
"general": 0.938
}
},
- {
- "id": "qwen/qwen3-235b-a22b:free",
- "name": "Qwen: Qwen3 235B A22B (free)",
- "context_length": 131072,
- "architecture": {
- "modality": "text->text",
- "input_modalities": [
- "text"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "Qwen3",
- "instruct_type": "qwen3"
- },
- "pricing": {
- "prompt": "0",
- "completion": "0",
- "request": "0",
- "image": "0",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": {
- "code": {
- "livecodebench": 0.707,
- "mbpp": 0.814
- },
- "math": {
- "aime-2025": 0.815,
- "aime-2024": 0.857,
- "gsm8k": 0.9439,
- "math": 0.7184
- },
- "reasoning": {
- "gpqa": 0.4747,
- "mmlu-pro": 0.6818,
- "mmlu": 0.8781
- },
- "tool_calling": {
- "bfcl": 0.708
- },
- "general": {
- "arena-hard": 0.956
- }
- },
- "category_scores": {
- "code": 0.7605,
- "math": 0.8336,
- "reasoning": 0.6782,
- "tool_calling": 0.708,
- "general": 0.956
- }
- },
{
"id": "qwen/qwen3-235b-a22b",
"name": "Qwen: Qwen3 235B A22B",
@@ -8632,8 +8632,8 @@
"instruct_type": null
},
"pricing": {
- "prompt": "0.0000002",
- "completion": "0.0000002",
+ "prompt": "0.0000008",
+ "completion": "0.0000016",
"request": "0",
"image": "0",
"web_search": "0",
@@ -8713,8 +8713,8 @@
"instruct_type": null
},
"pricing": {
- "prompt": "0.00000003",
- "completion": "0.00000013",
+ "prompt": "0.00000007",
+ "completion": "0.00000026",
"request": "0",
"image": "0",
"web_search": "0",
@@ -9025,7 +9025,7 @@
},
"pricing": {
"prompt": "0.00000003",
- "completion": "0.00000013",
+ "completion": "0.00000011",
"request": "0",
"image": "0",
"web_search": "0",
@@ -9922,6 +9922,47 @@
"benchmarks": null,
"category_scores": null
},
+ {
+ "id": "mistralai/ministral-3b",
+ "name": "Mistral: Ministral 3B",
+ "context_length": 131072,
+ "architecture": {
+ "modality": "text->text",
+ "input_modalities": [
+ "text"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "Mistral",
+ "instruct_type": null
+ },
+ "pricing": {
+ "prompt": "0.00000004",
+ "completion": "0.00000004",
+ "request": "0",
+ "image": "0",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": {
+ "code": {
+ "livecodebench": 0.548
+ },
+ "math": {
+ "aime-2025": 0.721,
+ "aime-2024": 0.775
+ },
+ "reasoning": {
+ "gpqa": 0.534
+ }
+ },
+ "category_scores": {
+ "code": 0.548,
+ "math": 0.748,
+ "reasoning": 0.534
+ }
+ },
{
"id": "mistralai/ministral-8b",
"name": "Mistral: Ministral 8B",
@@ -9968,47 +10009,6 @@
"general": 0.7695
}
},
- {
- "id": "mistralai/ministral-3b",
- "name": "Mistral: Ministral 3B",
- "context_length": 131072,
- "architecture": {
- "modality": "text->text",
- "input_modalities": [
- "text"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "Mistral",
- "instruct_type": null
- },
- "pricing": {
- "prompt": "0.00000004",
- "completion": "0.00000004",
- "request": "0",
- "image": "0",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": {
- "code": {
- "livecodebench": 0.548
- },
- "math": {
- "aime-2025": 0.721,
- "aime-2024": 0.775
- },
- "reasoning": {
- "gpqa": 0.534
- }
- },
- "category_scores": {
- "code": 0.548,
- "math": 0.748,
- "reasoning": 0.534
- }
- },
{
"id": "qwen/qwen-2.5-7b-instruct",
"name": "Qwen: Qwen2.5 7B Instruct",
@@ -10102,8 +10102,8 @@
}
},
{
- "id": "inflection/inflection-3-pi",
- "name": "Inflection: Inflection 3 Pi",
+ "id": "inflection/inflection-3-productivity",
+ "name": "Inflection: Inflection 3 Productivity",
"context_length": 8000,
"architecture": {
"modality": "text->text",
@@ -10128,8 +10128,8 @@
"category_scores": null
},
{
- "id": "inflection/inflection-3-productivity",
- "name": "Inflection: Inflection 3 Productivity",
+ "id": "inflection/inflection-3-pi",
+ "name": "Inflection: Inflection 3 Pi",
"context_length": 8000,
"architecture": {
"modality": "text->text",
@@ -10206,33 +10206,6 @@
"benchmarks": null,
"category_scores": null
},
- {
- "id": "meta-llama/llama-3.2-11b-vision-instruct",
- "name": "Meta: Llama 3.2 11B Vision Instruct",
- "context_length": 131072,
- "architecture": {
- "modality": "text+image->text",
- "input_modalities": [
- "text",
- "image"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "Llama3",
- "instruct_type": "llama3"
- },
- "pricing": {
- "prompt": "0.000000049",
- "completion": "0.000000049",
- "request": "0",
- "image": "0.00007948",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": null,
- "category_scores": null
- },
{
"id": "meta-llama/llama-3.2-1b-instruct",
"name": "Meta: Llama 3.2 1B Instruct",
@@ -10361,6 +10334,33 @@
"general": 0.774
}
},
+ {
+ "id": "meta-llama/llama-3.2-11b-vision-instruct",
+ "name": "Meta: Llama 3.2 11B Vision Instruct",
+ "context_length": 131072,
+ "architecture": {
+ "modality": "text+image->text",
+ "input_modalities": [
+ "text",
+ "image"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "Llama3",
+ "instruct_type": "llama3"
+ },
+ "pricing": {
+ "prompt": "0.000000049",
+ "completion": "0.000000049",
+ "request": "0",
+ "image": "0.00007948",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": null,
+ "category_scores": null
+ },
{
"id": "qwen/qwen-2.5-72b-instruct",
"name": "Qwen2.5 72B Instruct",
@@ -10377,8 +10377,8 @@
"instruct_type": "chatml"
},
"pricing": {
- "prompt": "0.00000007",
- "completion": "0.00000026",
+ "prompt": "0.00000012",
+ "completion": "0.00000039",
"request": "0",
"image": "0",
"web_search": "0",
@@ -10535,6 +10535,32 @@
"benchmarks": null,
"category_scores": null
},
+ {
+ "id": "sao10k/l3.1-euryale-70b",
+ "name": "Sao10K: Llama 3.1 Euryale 70B v2.2",
+ "context_length": 32768,
+ "architecture": {
+ "modality": "text->text",
+ "input_modalities": [
+ "text"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "Llama3",
+ "instruct_type": "llama3"
+ },
+ "pricing": {
+ "prompt": "0.00000065",
+ "completion": "0.00000075",
+ "request": "0",
+ "image": "0",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": null,
+ "category_scores": null
+ },
{
"id": "qwen/qwen-2.5-vl-7b-instruct:free",
"name": "Qwen: Qwen2.5-VL 7B Instruct (free)",
@@ -10589,32 +10615,6 @@
"benchmarks": null,
"category_scores": null
},
- {
- "id": "sao10k/l3.1-euryale-70b",
- "name": "Sao10K: Llama 3.1 Euryale 70B v2.2",
- "context_length": 32768,
- "architecture": {
- "modality": "text->text",
- "input_modalities": [
- "text"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "Llama3",
- "instruct_type": "llama3"
- },
- "pricing": {
- "prompt": "0.00000065",
- "completion": "0.00000075",
- "request": "0",
- "image": "0",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": null,
- "category_scores": null
- },
{
"id": "microsoft/phi-3.5-mini-128k-instruct",
"name": "Microsoft: Phi-3.5 Mini 128K Instruct",
@@ -10874,6 +10874,53 @@
"general": 0.886
}
},
+ {
+ "id": "meta-llama/llama-3.1-70b-instruct",
+ "name": "Meta: Llama 3.1 70B Instruct",
+ "context_length": 131072,
+ "architecture": {
+ "modality": "text->text",
+ "input_modalities": [
+ "text"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "Llama3",
+ "instruct_type": "llama3"
+ },
+ "pricing": {
+ "prompt": "0.0000004",
+ "completion": "0.0000004",
+ "request": "0",
+ "image": "0",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": {
+ "code": {
+ "humaneval": 0.805
+ },
+ "reasoning": {
+ "gpqa": 0.417,
+ "mmlu-pro": 0.664,
+ "mmlu": 0.836
+ },
+ "tool_calling": {
+ "bfcl": 0.848,
+ "nexus": 0.567
+ },
+ "general": {
+ "ifeval": 0.875
+ }
+ },
+ "category_scores": {
+ "code": 0.805,
+ "reasoning": 0.639,
+ "tool_calling": 0.7075,
+ "general": 0.875
+ }
+ },
{
"id": "meta-llama/llama-3.1-405b-instruct:free",
"name": "Meta: Llama 3.1 405B Instruct (free)",
@@ -11025,53 +11072,6 @@
"general": 0.804
}
},
- {
- "id": "meta-llama/llama-3.1-70b-instruct",
- "name": "Meta: Llama 3.1 70B Instruct",
- "context_length": 131072,
- "architecture": {
- "modality": "text->text",
- "input_modalities": [
- "text"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "Llama3",
- "instruct_type": "llama3"
- },
- "pricing": {
- "prompt": "0.0000004",
- "completion": "0.0000004",
- "request": "0",
- "image": "0",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": {
- "code": {
- "humaneval": 0.805
- },
- "reasoning": {
- "gpqa": 0.417,
- "mmlu-pro": 0.664,
- "mmlu": 0.836
- },
- "tool_calling": {
- "bfcl": 0.848,
- "nexus": 0.567
- },
- "general": {
- "ifeval": 0.875
- }
- },
- "category_scores": {
- "code": 0.805,
- "reasoning": 0.639,
- "tool_calling": 0.7075,
- "general": 0.875
- }
- },
{
"id": "mistralai/mistral-nemo",
"name": "Mistral: Mistral Nemo",
@@ -11108,8 +11108,8 @@
}
},
{
- "id": "openai/gpt-4o-mini-2024-07-18",
- "name": "OpenAI: GPT-4o-mini (2024-07-18)",
+ "id": "openai/gpt-4o-mini",
+ "name": "OpenAI: GPT-4o-mini",
"context_length": 128000,
"architecture": {
"modality": "text+image->text",
@@ -11128,7 +11128,7 @@
"prompt": "0.00000015",
"completion": "0.0000006",
"request": "0",
- "image": "0.007225",
+ "image": "0.000217",
"web_search": "0",
"internal_reasoning": "0",
"input_cache_read": "0.000000075"
@@ -11153,8 +11153,8 @@
}
},
{
- "id": "openai/gpt-4o-mini",
- "name": "OpenAI: GPT-4o-mini",
+ "id": "openai/gpt-4o-mini-2024-07-18",
+ "name": "OpenAI: GPT-4o-mini (2024-07-18)",
"context_length": 128000,
"architecture": {
"modality": "text+image->text",
@@ -11173,7 +11173,7 @@
"prompt": "0.00000015",
"completion": "0.0000006",
"request": "0",
- "image": "0.000217",
+ "image": "0.007225",
"web_search": "0",
"internal_reasoning": "0",
"input_cache_read": "0.000000075"
@@ -11311,58 +11311,6 @@
"benchmarks": null,
"category_scores": null
},
- {
- "id": "mistralai/mistral-7b-instruct:free",
- "name": "Mistral: Mistral 7B Instruct (free)",
- "context_length": 32768,
- "architecture": {
- "modality": "text->text",
- "input_modalities": [
- "text"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "Mistral",
- "instruct_type": "mistral"
- },
- "pricing": {
- "prompt": "0",
- "completion": "0",
- "request": "0",
- "image": "0",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": null,
- "category_scores": null
- },
- {
- "id": "mistralai/mistral-7b-instruct",
- "name": "Mistral: Mistral 7B Instruct",
- "context_length": 32768,
- "architecture": {
- "modality": "text->text",
- "input_modalities": [
- "text"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "Mistral",
- "instruct_type": "mistral"
- },
- "pricing": {
- "prompt": "0.000000028",
- "completion": "0.000000054",
- "request": "0",
- "image": "0",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": null,
- "category_scores": null
- },
{
"id": "nousresearch/hermes-2-pro-llama-3-8b",
"name": "NousResearch: Hermes 2 Pro - Llama-3 8B",
@@ -11415,6 +11363,58 @@
"benchmarks": null,
"category_scores": null
},
+ {
+ "id": "mistralai/mistral-7b-instruct:free",
+ "name": "Mistral: Mistral 7B Instruct (free)",
+ "context_length": 32768,
+ "architecture": {
+ "modality": "text->text",
+ "input_modalities": [
+ "text"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "Mistral",
+ "instruct_type": "mistral"
+ },
+ "pricing": {
+ "prompt": "0",
+ "completion": "0",
+ "request": "0",
+ "image": "0",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": null,
+ "category_scores": null
+ },
+ {
+ "id": "mistralai/mistral-7b-instruct",
+ "name": "Mistral: Mistral 7B Instruct",
+ "context_length": 32768,
+ "architecture": {
+ "modality": "text->text",
+ "input_modalities": [
+ "text"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "Mistral",
+ "instruct_type": "mistral"
+ },
+ "pricing": {
+ "prompt": "0.000000028",
+ "completion": "0.000000054",
+ "request": "0",
+ "image": "0",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": null,
+ "category_scores": null
+ },
{
"id": "microsoft/phi-3-mini-128k-instruct",
"name": "Microsoft: Phi-3 Mini 128K Instruct",
@@ -11467,76 +11467,6 @@
"benchmarks": null,
"category_scores": null
},
- {
- "id": "meta-llama/llama-guard-2-8b",
- "name": "Meta: LlamaGuard 2 8B",
- "context_length": 8192,
- "architecture": {
- "modality": "text->text",
- "input_modalities": [
- "text"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "Llama3",
- "instruct_type": "none"
- },
- "pricing": {
- "prompt": "0.0000002",
- "completion": "0.0000002",
- "request": "0",
- "image": "0",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": null,
- "category_scores": null
- },
- {
- "id": "openai/gpt-4o-2024-05-13",
- "name": "OpenAI: GPT-4o (2024-05-13)",
- "context_length": 128000,
- "architecture": {
- "modality": "text+image->text",
- "input_modalities": [
- "text",
- "image",
- "file"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "GPT",
- "instruct_type": null
- },
- "pricing": {
- "prompt": "0.000005",
- "completion": "0.000015",
- "request": "0",
- "image": "0.007225",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": {
- "code": {
- "humaneval": 0.902
- },
- "math": {
- "math": 0.766
- },
- "reasoning": {
- "gpqa": 0.536,
- "mmlu-pro": 0.726,
- "mmlu": 0.887
- }
- },
- "category_scores": {
- "code": 0.902,
- "math": 0.766,
- "reasoning": 0.7163
- }
- },
{
"id": "openai/gpt-4o",
"name": "OpenAI: GPT-4o",
@@ -11616,8 +11546,8 @@
"category_scores": null
},
{
- "id": "meta-llama/llama-3-70b-instruct",
- "name": "Meta: Llama 3 70B Instruct",
+ "id": "meta-llama/llama-guard-2-8b",
+ "name": "Meta: LlamaGuard 2 8B",
"context_length": 8192,
"architecture": {
"modality": "text->text",
@@ -11628,11 +11558,11 @@
"text"
],
"tokenizer": "Llama3",
- "instruct_type": "llama3"
+ "instruct_type": "none"
},
"pricing": {
- "prompt": "0.0000003",
- "completion": "0.0000004",
+ "prompt": "0.0000002",
+ "completion": "0.0000002",
"request": "0",
"image": "0",
"web_search": "0",
@@ -11641,6 +11571,50 @@
"benchmarks": null,
"category_scores": null
},
+ {
+ "id": "openai/gpt-4o-2024-05-13",
+ "name": "OpenAI: GPT-4o (2024-05-13)",
+ "context_length": 128000,
+ "architecture": {
+ "modality": "text+image->text",
+ "input_modalities": [
+ "text",
+ "image",
+ "file"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "GPT",
+ "instruct_type": null
+ },
+ "pricing": {
+ "prompt": "0.000005",
+ "completion": "0.000015",
+ "request": "0",
+ "image": "0.007225",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": {
+ "code": {
+ "humaneval": 0.902
+ },
+ "math": {
+ "math": 0.766
+ },
+ "reasoning": {
+ "gpqa": 0.536,
+ "mmlu-pro": 0.726,
+ "mmlu": 0.887
+ }
+ },
+ "category_scores": {
+ "code": 0.902,
+ "math": 0.766,
+ "reasoning": 0.7163
+ }
+ },
{
"id": "meta-llama/llama-3-8b-instruct",
"name": "Meta: Llama 3 8B Instruct",
@@ -11667,6 +11641,32 @@
"benchmarks": null,
"category_scores": null
},
+ {
+ "id": "meta-llama/llama-3-70b-instruct",
+ "name": "Meta: Llama 3 70B Instruct",
+ "context_length": 8192,
+ "architecture": {
+ "modality": "text->text",
+ "input_modalities": [
+ "text"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "Llama3",
+ "instruct_type": "llama3"
+ },
+ "pricing": {
+ "prompt": "0.0000003",
+ "completion": "0.0000004",
+ "request": "0",
+ "image": "0",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": null,
+ "category_scores": null
+ },
{
"id": "mistralai/mixtral-8x22b-instruct",
"name": "Mistral: Mixtral 8x22B Instruct",
@@ -11898,32 +11898,6 @@
"general": 0.863
}
},
- {
- "id": "openai/gpt-3.5-turbo-0613",
- "name": "OpenAI: GPT-3.5 Turbo (older v0613)",
- "context_length": 4095,
- "architecture": {
- "modality": "text->text",
- "input_modalities": [
- "text"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "GPT",
- "instruct_type": null
- },
- "pricing": {
- "prompt": "0.000001",
- "completion": "0.000002",
- "request": "0",
- "image": "0",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": null,
- "category_scores": null
- },
{
"id": "openai/gpt-4-turbo-preview",
"name": "OpenAI: GPT-4 Turbo Preview",
@@ -11950,6 +11924,32 @@
"benchmarks": null,
"category_scores": null
},
+ {
+ "id": "openai/gpt-3.5-turbo-0613",
+ "name": "OpenAI: GPT-3.5 Turbo (older v0613)",
+ "context_length": 4095,
+ "architecture": {
+ "modality": "text->text",
+ "input_modalities": [
+ "text"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "GPT",
+ "instruct_type": null
+ },
+ "pricing": {
+ "prompt": "0.000001",
+ "completion": "0.000002",
+ "request": "0",
+ "image": "0",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": null,
+ "category_scores": null
+ },
{
"id": "mistralai/mistral-tiny",
"name": "Mistral Tiny",
@@ -12128,32 +12128,6 @@
"benchmarks": null,
"category_scores": null
},
- {
- "id": "openai/gpt-3.5-turbo-instruct",
- "name": "OpenAI: GPT-3.5 Turbo Instruct",
- "context_length": 4095,
- "architecture": {
- "modality": "text->text",
- "input_modalities": [
- "text"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "GPT",
- "instruct_type": "chatml"
- },
- "pricing": {
- "prompt": "0.0000015",
- "completion": "0.000002",
- "request": "0",
- "image": "0",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": null,
- "category_scores": null
- },
{
"id": "mistralai/mistral-7b-instruct-v0.1",
"name": "Mistral: Mistral 7B Instruct v0.1",
@@ -12180,6 +12154,32 @@
"benchmarks": null,
"category_scores": null
},
+ {
+ "id": "openai/gpt-3.5-turbo-instruct",
+ "name": "OpenAI: GPT-3.5 Turbo Instruct",
+ "context_length": 4095,
+ "architecture": {
+ "modality": "text->text",
+ "input_modalities": [
+ "text"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "GPT",
+ "instruct_type": "chatml"
+ },
+ "pricing": {
+ "prompt": "0.0000015",
+ "completion": "0.000002",
+ "request": "0",
+ "image": "0",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": null,
+ "category_scores": null
+ },
{
"id": "openai/gpt-3.5-turbo-16k",
"name": "OpenAI: GPT-3.5 Turbo 16k",
@@ -12310,6 +12310,47 @@
"benchmarks": null,
"category_scores": null
},
+ {
+ "id": "openai/gpt-3.5-turbo",
+ "name": "OpenAI: GPT-3.5 Turbo",
+ "context_length": 16385,
+ "architecture": {
+ "modality": "text->text",
+ "input_modalities": [
+ "text"
+ ],
+ "output_modalities": [
+ "text"
+ ],
+ "tokenizer": "GPT",
+ "instruct_type": null
+ },
+ "pricing": {
+ "prompt": "0.0000005",
+ "completion": "0.0000015",
+ "request": "0",
+ "image": "0",
+ "web_search": "0",
+ "internal_reasoning": "0"
+ },
+ "benchmarks": {
+ "code": {
+ "humaneval": 0.68
+ },
+ "math": {
+ "math": 0.431
+ },
+ "reasoning": {
+ "gpqa": 0.308,
+ "mmlu": 0.698
+ }
+ },
+ "category_scores": {
+ "code": 0.68,
+ "math": 0.431,
+ "reasoning": 0.503
+ }
+ },
{
"id": "openai/gpt-4",
"name": "OpenAI: GPT-4",
@@ -12356,47 +12397,6 @@
"reasoning": 0.7825,
"general": 0.874
}
- },
- {
- "id": "openai/gpt-3.5-turbo",
- "name": "OpenAI: GPT-3.5 Turbo",
- "context_length": 16385,
- "architecture": {
- "modality": "text->text",
- "input_modalities": [
- "text"
- ],
- "output_modalities": [
- "text"
- ],
- "tokenizer": "GPT",
- "instruct_type": null
- },
- "pricing": {
- "prompt": "0.0000005",
- "completion": "0.0000015",
- "request": "0",
- "image": "0",
- "web_search": "0",
- "internal_reasoning": "0"
- },
- "benchmarks": {
- "code": {
- "humaneval": 0.68
- },
- "math": {
- "math": 0.431
- },
- "reasoning": {
- "gpqa": 0.308,
- "mmlu": 0.698
- }
- },
- "category_scores": {
- "code": 0.68,
- "math": 0.431,
- "reasoning": 0.503
- }
}
]
}
\ No newline at end of file
diff --git a/scripts/merge_benchmarks.py b/scripts/merge_benchmarks.py
index 6f9bb56..b51520e 100644
--- a/scripts/merge_benchmarks.py
+++ b/scripts/merge_benchmarks.py
@@ -76,6 +76,9 @@ MODEL_FAMILY_PATTERNS = [
(r"^anthropic/claude-(\d+\.?\d*)-haiku$", "claude-haiku", "fast"),
# OpenAI GPT
+ (r"^openai/gpt-5\.2-pro$", "gpt-5-pro", "flagship"),
+ (r"^openai/gpt-5\.2$", "gpt-5", "mid"),
+ (r"^openai/gpt-5\.2-chat$", "gpt-5", "mid"),
(r"^openai/gpt-4\.1$", "gpt-4", "mid"),
(r"^openai/gpt-4o$", "gpt-4", "mid"),
(r"^openai/gpt-4-turbo", "gpt-4", "mid"),
@@ -110,6 +113,8 @@ MODEL_FAMILY_PATTERNS = [
# Qwen
(r"^qwen/qwen-2\.5-72b", "qwen-72b", "mid"),
(r"^qwen/qwq-32b", "qwq", "mid"),
+ (r"^qwen/qwen3-next-80b.*thinking", "qwen3-thinking", "flagship"),
+ (r"^qwen/qwen3-235b.*instruct", "qwen3-instruct", "mid"),
]
HEADERS = {
diff --git a/src/budget/pricing.rs b/src/budget/pricing.rs
index e8c15bb..d111588 100644
--- a/src/budget/pricing.rs
+++ b/src/budget/pricing.rs
@@ -238,7 +238,10 @@ impl ModelPricing {
// Flagship tier
"openai/o1",
"openai/o1-preview",
+ "openai/gpt-5.2-pro",
// Mid tier
+ "openai/gpt-5.2",
+ "openai/gpt-5.2-chat",
"openai/gpt-4.1",
"openai/gpt-4o",
"openai/gpt-4-turbo",