From f20b84f17219b5021c7454288ad060002d861eb2 Mon Sep 17 00:00:00 2001 From: Thomas Marchand Date: Mon, 22 Dec 2025 21:39:25 +0100 Subject: [PATCH] feat: add GPT-5.2 and qwen3-thinking models, friendlier display names - Add GPT-5.2 and GPT-5.2-pro to model families - Add qwen3-next-80b-a3b-thinking to model families - Add friendly display names in dashboard dropdown (e.g., "4.5-sonnet", "gpt-5.2-pro") - Update CAPABLE_MODEL_BASES allowlist --- dashboard/src/app/control/control-client.tsx | 3 +- dashboard/src/lib/api.ts | 43 + models_with_benchmarks.json | 890 +++++++++---------- scripts/merge_benchmarks.py | 5 + src/budget/pricing.rs | 3 + 5 files changed, 498 insertions(+), 446 deletions(-) diff --git a/dashboard/src/app/control/control-client.tsx b/dashboard/src/app/control/control-client.tsx index 1265df4..f6fc9c3 100644 --- a/dashboard/src/app/control/control-client.tsx +++ b/dashboard/src/app/control/control-client.tsx @@ -21,6 +21,7 @@ import { getRunningMissions, cancelMission, listModels, + getModelDisplayName, type ControlRunState, type Mission, type MissionStatus, @@ -1202,7 +1203,7 @@ export default function ControlClient() { {availableModels.map((model) => ( ))} diff --git a/dashboard/src/lib/api.ts b/dashboard/src/lib/api.ts index 12fe350..b8896c6 100644 --- a/dashboard/src/lib/api.ts +++ b/dashboard/src/lib/api.ts @@ -763,3 +763,46 @@ export async function listModels(tier?: string): Promise { if (!res.ok) throw new Error("Failed to fetch models"); return res.json(); } + +// Friendly display names for models +const MODEL_DISPLAY_NAMES: Record = { + // OpenAI - simplified (newest first) + "openai/gpt-5.2-pro": "gpt-5.2-pro", + "openai/gpt-5.2": "gpt-5.2", + "openai/gpt-5.2-chat": "gpt-5.2", + "openai/gpt-4.1-mini": "gpt-4-mini", + "openai/gpt-4.1": "gpt-4", + "openai/o1": "o1", + "openai/o3-mini-high": "o3-mini", + // Anthropic - simplified + "anthropic/claude-sonnet-4.5": "4.5-sonnet", + "anthropic/claude-opus-4.5": "4.5-opus", + "anthropic/claude-haiku-4.5": "4.5-haiku", + // Google + "google/gemini-3-flash-preview": "gemini-3-flash", + "google/gemini-3-pro-image-preview": "gemini-3-pro", + // DeepSeek + "deepseek/deepseek-r1": "deepseek-r1", + "deepseek/deepseek-chat-v3-0324": "deepseek-v3", + // Qwen + "qwen/qwq-32b": "qwq-32b", + "qwen/qwen-2.5-72b-instruct": "qwen-72b", + "qwen/qwen3-next-80b-a3b-thinking": "qwen3-thinking", + // Mistral + "mistralai/mistral-small-24b-instruct-2501": "mistral-small", + "mistralai/mistral-medium-3.1": "mistral-medium", + "mistralai/mistral-large-2512": "mistral-large", + // Meta + "meta-llama/llama-3.1-405b": "llama-405b", + "meta-llama/llama-3.2-90b-vision-instruct": "llama-90b-vision", + "meta-llama/llama-3.3-70b-instruct:free": "llama-70b (free)", +}; + +// Get display name for a model +export function getModelDisplayName(modelId: string): string { + if (MODEL_DISPLAY_NAMES[modelId]) { + return MODEL_DISPLAY_NAMES[modelId]; + } + // Fallback: strip provider prefix + return modelId.includes("/") ? modelId.split("/").pop()! : modelId; +} diff --git a/models_with_benchmarks.json b/models_with_benchmarks.json index 5e51390..c690ac3 100644 --- a/models_with_benchmarks.json +++ b/models_with_benchmarks.json @@ -1,7 +1,7 @@ { - "generated_at": "2025-12-19T07:20:20Z", + "generated_at": "2025-12-22T20:23:08Z", "total_models": 350, - "models_with_benchmarks": 158, + "models_with_benchmarks": 157, "categories": [ "code", "math", @@ -35,6 +35,21 @@ ], "tier": "fast" }, + "gpt-5": { + "latest": "openai/gpt-5.2-chat", + "members": [ + "openai/gpt-5.2-chat", + "openai/gpt-5.2" + ], + "tier": "mid" + }, + "gpt-5-pro": { + "latest": "openai/gpt-5.2-pro", + "members": [ + "openai/gpt-5.2-pro" + ], + "tier": "flagship" + }, "mistral-large": { "latest": "mistralai/mistral-large-2512", "members": [ @@ -85,6 +100,13 @@ ], "tier": "mid" }, + "qwen3-thinking": { + "latest": "qwen/qwen3-next-80b-a3b-thinking", + "members": [ + "qwen/qwen3-next-80b-a3b-thinking" + ], + "tier": "flagship" + }, "deepseek-chat": { "latest": "deepseek/deepseek-chat-v3-0324", "members": [ @@ -204,6 +226,10 @@ "mistralai/mistral-small-creative": "mistralai/mistral-small-24b-instruct-2501", "mistral-small-creative": "mistralai/mistral-small-24b-instruct-2501", "mistral-small": "mistralai/mistral-small-24b-instruct-2501", + "openai/gpt-5.2": "openai/gpt-5.2-chat", + "gpt-5.2": "openai/gpt-5.2-chat", + "gpt-5": "openai/gpt-5.2-chat", + "gpt-5-pro": "openai/gpt-5.2-pro", "mistralai/mistral-large-2411": "mistralai/mistral-large-2512", "mistral-large-2411": "mistralai/mistral-large-2512", "mistralai/mistral-large-2407": "mistralai/mistral-large-2512", @@ -244,6 +270,7 @@ "claude-sonnet": "anthropic/claude-sonnet-4.5", "sonnet": "anthropic/claude-sonnet-4.5", "claude sonnet": "anthropic/claude-sonnet-4.5", + "qwen3-thinking": "qwen/qwen3-next-80b-a3b-thinking", "deepseek/deepseek-chat-v3.1": "deepseek/deepseek-chat-v3-0324", "deepseek-chat-v3.1": "deepseek/deepseek-chat-v3-0324", "deepseek/deepseek-chat": "deepseek/deepseek-chat-v3-0324", @@ -280,6 +307,33 @@ "llama-3-405b": "meta-llama/llama-3.1-405b" }, "models": [ + { + "id": "z-ai/glm-4.7", + "name": "Z.AI: GLM 4.7", + "context_length": 202752, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000044", + "completion": "0.00000174", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0", + "input_cache_read": "0" + }, + "benchmarks": null, + "category_scores": null + }, { "id": "google/gemini-3-flash-preview", "name": "Google: Gemini 3 Flash Preview", @@ -661,8 +715,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000015", - "completion": "0.0000006", + "prompt": "0.00000005", + "completion": "0.00000022", "request": "0", "image": "0", "web_search": "0", @@ -1113,8 +1167,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000024", - "completion": "0.00000038", + "prompt": "0.000000239", + "completion": "0.000000378", "request": "0", "image": "0", "web_search": "0", @@ -1660,8 +1714,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000045", - "completion": "0.00000235", + "prompt": "0.0000004", + "completion": "0.00000175", "request": "0", "image": "0", "web_search": "0", @@ -3354,8 +3408,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000015", - "completion": "0.00000075", + "prompt": "0.0000002", + "completion": "0.0000008", "request": "0", "image": "0", "web_search": "0", @@ -4471,7 +4525,7 @@ { "id": "qwen/qwen3-coder-30b-a3b-instruct", "name": "Qwen: Qwen3 Coder 30B A3B Instruct", - "context_length": 262144, + "context_length": 160000, "architecture": { "modality": "text->text", "input_modalities": [ @@ -4484,8 +4538,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000006", - "completion": "0.00000025", + "prompt": "0.00000007", + "completion": "0.00000027", "request": "0", "image": "0", "web_search": "0", @@ -6711,60 +6765,6 @@ "general": 0.938 } }, - { - "id": "qwen/qwen3-235b-a22b:free", - "name": "Qwen: Qwen3 235B A22B (free)", - "context_length": 131072, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Qwen3", - "instruct_type": "qwen3" - }, - "pricing": { - "prompt": "0", - "completion": "0", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": { - "code": { - "livecodebench": 0.707, - "mbpp": 0.814 - }, - "math": { - "aime-2025": 0.815, - "aime-2024": 0.857, - "gsm8k": 0.9439, - "math": 0.7184 - }, - "reasoning": { - "gpqa": 0.4747, - "mmlu-pro": 0.6818, - "mmlu": 0.8781 - }, - "tool_calling": { - "bfcl": 0.708 - }, - "general": { - "arena-hard": 0.956 - } - }, - "category_scores": { - "code": 0.7605, - "math": 0.8336, - "reasoning": 0.6782, - "tool_calling": 0.708, - "general": 0.956 - } - }, { "id": "qwen/qwen3-235b-a22b", "name": "Qwen: Qwen3 235B A22B", @@ -8632,8 +8632,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.0000002", - "completion": "0.0000002", + "prompt": "0.0000008", + "completion": "0.0000016", "request": "0", "image": "0", "web_search": "0", @@ -8713,8 +8713,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000003", - "completion": "0.00000013", + "prompt": "0.00000007", + "completion": "0.00000026", "request": "0", "image": "0", "web_search": "0", @@ -9025,7 +9025,7 @@ }, "pricing": { "prompt": "0.00000003", - "completion": "0.00000013", + "completion": "0.00000011", "request": "0", "image": "0", "web_search": "0", @@ -9922,6 +9922,47 @@ "benchmarks": null, "category_scores": null }, + { + "id": "mistralai/ministral-3b", + "name": "Mistral: Ministral 3B", + "context_length": 131072, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Mistral", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000004", + "completion": "0.00000004", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": { + "code": { + "livecodebench": 0.548 + }, + "math": { + "aime-2025": 0.721, + "aime-2024": 0.775 + }, + "reasoning": { + "gpqa": 0.534 + } + }, + "category_scores": { + "code": 0.548, + "math": 0.748, + "reasoning": 0.534 + } + }, { "id": "mistralai/ministral-8b", "name": "Mistral: Ministral 8B", @@ -9968,47 +10009,6 @@ "general": 0.7695 } }, - { - "id": "mistralai/ministral-3b", - "name": "Mistral: Ministral 3B", - "context_length": 131072, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Mistral", - "instruct_type": null - }, - "pricing": { - "prompt": "0.00000004", - "completion": "0.00000004", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": { - "code": { - "livecodebench": 0.548 - }, - "math": { - "aime-2025": 0.721, - "aime-2024": 0.775 - }, - "reasoning": { - "gpqa": 0.534 - } - }, - "category_scores": { - "code": 0.548, - "math": 0.748, - "reasoning": 0.534 - } - }, { "id": "qwen/qwen-2.5-7b-instruct", "name": "Qwen: Qwen2.5 7B Instruct", @@ -10102,8 +10102,8 @@ } }, { - "id": "inflection/inflection-3-pi", - "name": "Inflection: Inflection 3 Pi", + "id": "inflection/inflection-3-productivity", + "name": "Inflection: Inflection 3 Productivity", "context_length": 8000, "architecture": { "modality": "text->text", @@ -10128,8 +10128,8 @@ "category_scores": null }, { - "id": "inflection/inflection-3-productivity", - "name": "Inflection: Inflection 3 Productivity", + "id": "inflection/inflection-3-pi", + "name": "Inflection: Inflection 3 Pi", "context_length": 8000, "architecture": { "modality": "text->text", @@ -10206,33 +10206,6 @@ "benchmarks": null, "category_scores": null }, - { - "id": "meta-llama/llama-3.2-11b-vision-instruct", - "name": "Meta: Llama 3.2 11B Vision Instruct", - "context_length": 131072, - "architecture": { - "modality": "text+image->text", - "input_modalities": [ - "text", - "image" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.000000049", - "completion": "0.000000049", - "request": "0", - "image": "0.00007948", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": null, - "category_scores": null - }, { "id": "meta-llama/llama-3.2-1b-instruct", "name": "Meta: Llama 3.2 1B Instruct", @@ -10361,6 +10334,33 @@ "general": 0.774 } }, + { + "id": "meta-llama/llama-3.2-11b-vision-instruct", + "name": "Meta: Llama 3.2 11B Vision Instruct", + "context_length": 131072, + "architecture": { + "modality": "text+image->text", + "input_modalities": [ + "text", + "image" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0.000000049", + "completion": "0.000000049", + "request": "0", + "image": "0.00007948", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": null, + "category_scores": null + }, { "id": "qwen/qwen-2.5-72b-instruct", "name": "Qwen2.5 72B Instruct", @@ -10377,8 +10377,8 @@ "instruct_type": "chatml" }, "pricing": { - "prompt": "0.00000007", - "completion": "0.00000026", + "prompt": "0.00000012", + "completion": "0.00000039", "request": "0", "image": "0", "web_search": "0", @@ -10535,6 +10535,32 @@ "benchmarks": null, "category_scores": null }, + { + "id": "sao10k/l3.1-euryale-70b", + "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", + "context_length": 32768, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0.00000065", + "completion": "0.00000075", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": null, + "category_scores": null + }, { "id": "qwen/qwen-2.5-vl-7b-instruct:free", "name": "Qwen: Qwen2.5-VL 7B Instruct (free)", @@ -10589,32 +10615,6 @@ "benchmarks": null, "category_scores": null }, - { - "id": "sao10k/l3.1-euryale-70b", - "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", - "context_length": 32768, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.00000065", - "completion": "0.00000075", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": null, - "category_scores": null - }, { "id": "microsoft/phi-3.5-mini-128k-instruct", "name": "Microsoft: Phi-3.5 Mini 128K Instruct", @@ -10874,6 +10874,53 @@ "general": 0.886 } }, + { + "id": "meta-llama/llama-3.1-70b-instruct", + "name": "Meta: Llama 3.1 70B Instruct", + "context_length": 131072, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0.0000004", + "completion": "0.0000004", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": { + "code": { + "humaneval": 0.805 + }, + "reasoning": { + "gpqa": 0.417, + "mmlu-pro": 0.664, + "mmlu": 0.836 + }, + "tool_calling": { + "bfcl": 0.848, + "nexus": 0.567 + }, + "general": { + "ifeval": 0.875 + } + }, + "category_scores": { + "code": 0.805, + "reasoning": 0.639, + "tool_calling": 0.7075, + "general": 0.875 + } + }, { "id": "meta-llama/llama-3.1-405b-instruct:free", "name": "Meta: Llama 3.1 405B Instruct (free)", @@ -11025,53 +11072,6 @@ "general": 0.804 } }, - { - "id": "meta-llama/llama-3.1-70b-instruct", - "name": "Meta: Llama 3.1 70B Instruct", - "context_length": 131072, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.0000004", - "completion": "0.0000004", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": { - "code": { - "humaneval": 0.805 - }, - "reasoning": { - "gpqa": 0.417, - "mmlu-pro": 0.664, - "mmlu": 0.836 - }, - "tool_calling": { - "bfcl": 0.848, - "nexus": 0.567 - }, - "general": { - "ifeval": 0.875 - } - }, - "category_scores": { - "code": 0.805, - "reasoning": 0.639, - "tool_calling": 0.7075, - "general": 0.875 - } - }, { "id": "mistralai/mistral-nemo", "name": "Mistral: Mistral Nemo", @@ -11108,8 +11108,8 @@ } }, { - "id": "openai/gpt-4o-mini-2024-07-18", - "name": "OpenAI: GPT-4o-mini (2024-07-18)", + "id": "openai/gpt-4o-mini", + "name": "OpenAI: GPT-4o-mini", "context_length": 128000, "architecture": { "modality": "text+image->text", @@ -11128,7 +11128,7 @@ "prompt": "0.00000015", "completion": "0.0000006", "request": "0", - "image": "0.007225", + "image": "0.000217", "web_search": "0", "internal_reasoning": "0", "input_cache_read": "0.000000075" @@ -11153,8 +11153,8 @@ } }, { - "id": "openai/gpt-4o-mini", - "name": "OpenAI: GPT-4o-mini", + "id": "openai/gpt-4o-mini-2024-07-18", + "name": "OpenAI: GPT-4o-mini (2024-07-18)", "context_length": 128000, "architecture": { "modality": "text+image->text", @@ -11173,7 +11173,7 @@ "prompt": "0.00000015", "completion": "0.0000006", "request": "0", - "image": "0.000217", + "image": "0.007225", "web_search": "0", "internal_reasoning": "0", "input_cache_read": "0.000000075" @@ -11311,58 +11311,6 @@ "benchmarks": null, "category_scores": null }, - { - "id": "mistralai/mistral-7b-instruct:free", - "name": "Mistral: Mistral 7B Instruct (free)", - "context_length": 32768, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Mistral", - "instruct_type": "mistral" - }, - "pricing": { - "prompt": "0", - "completion": "0", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": null, - "category_scores": null - }, - { - "id": "mistralai/mistral-7b-instruct", - "name": "Mistral: Mistral 7B Instruct", - "context_length": 32768, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Mistral", - "instruct_type": "mistral" - }, - "pricing": { - "prompt": "0.000000028", - "completion": "0.000000054", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": null, - "category_scores": null - }, { "id": "nousresearch/hermes-2-pro-llama-3-8b", "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", @@ -11415,6 +11363,58 @@ "benchmarks": null, "category_scores": null }, + { + "id": "mistralai/mistral-7b-instruct:free", + "name": "Mistral: Mistral 7B Instruct (free)", + "context_length": 32768, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Mistral", + "instruct_type": "mistral" + }, + "pricing": { + "prompt": "0", + "completion": "0", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": null, + "category_scores": null + }, + { + "id": "mistralai/mistral-7b-instruct", + "name": "Mistral: Mistral 7B Instruct", + "context_length": 32768, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Mistral", + "instruct_type": "mistral" + }, + "pricing": { + "prompt": "0.000000028", + "completion": "0.000000054", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": null, + "category_scores": null + }, { "id": "microsoft/phi-3-mini-128k-instruct", "name": "Microsoft: Phi-3 Mini 128K Instruct", @@ -11467,76 +11467,6 @@ "benchmarks": null, "category_scores": null }, - { - "id": "meta-llama/llama-guard-2-8b", - "name": "Meta: LlamaGuard 2 8B", - "context_length": 8192, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Llama3", - "instruct_type": "none" - }, - "pricing": { - "prompt": "0.0000002", - "completion": "0.0000002", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": null, - "category_scores": null - }, - { - "id": "openai/gpt-4o-2024-05-13", - "name": "OpenAI: GPT-4o (2024-05-13)", - "context_length": 128000, - "architecture": { - "modality": "text+image->text", - "input_modalities": [ - "text", - "image", - "file" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "GPT", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000005", - "completion": "0.000015", - "request": "0", - "image": "0.007225", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": { - "code": { - "humaneval": 0.902 - }, - "math": { - "math": 0.766 - }, - "reasoning": { - "gpqa": 0.536, - "mmlu-pro": 0.726, - "mmlu": 0.887 - } - }, - "category_scores": { - "code": 0.902, - "math": 0.766, - "reasoning": 0.7163 - } - }, { "id": "openai/gpt-4o", "name": "OpenAI: GPT-4o", @@ -11616,8 +11546,8 @@ "category_scores": null }, { - "id": "meta-llama/llama-3-70b-instruct", - "name": "Meta: Llama 3 70B Instruct", + "id": "meta-llama/llama-guard-2-8b", + "name": "Meta: LlamaGuard 2 8B", "context_length": 8192, "architecture": { "modality": "text->text", @@ -11628,11 +11558,11 @@ "text" ], "tokenizer": "Llama3", - "instruct_type": "llama3" + "instruct_type": "none" }, "pricing": { - "prompt": "0.0000003", - "completion": "0.0000004", + "prompt": "0.0000002", + "completion": "0.0000002", "request": "0", "image": "0", "web_search": "0", @@ -11641,6 +11571,50 @@ "benchmarks": null, "category_scores": null }, + { + "id": "openai/gpt-4o-2024-05-13", + "name": "OpenAI: GPT-4o (2024-05-13)", + "context_length": 128000, + "architecture": { + "modality": "text+image->text", + "input_modalities": [ + "text", + "image", + "file" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "GPT", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000005", + "completion": "0.000015", + "request": "0", + "image": "0.007225", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": { + "code": { + "humaneval": 0.902 + }, + "math": { + "math": 0.766 + }, + "reasoning": { + "gpqa": 0.536, + "mmlu-pro": 0.726, + "mmlu": 0.887 + } + }, + "category_scores": { + "code": 0.902, + "math": 0.766, + "reasoning": 0.7163 + } + }, { "id": "meta-llama/llama-3-8b-instruct", "name": "Meta: Llama 3 8B Instruct", @@ -11667,6 +11641,32 @@ "benchmarks": null, "category_scores": null }, + { + "id": "meta-llama/llama-3-70b-instruct", + "name": "Meta: Llama 3 70B Instruct", + "context_length": 8192, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0.0000003", + "completion": "0.0000004", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": null, + "category_scores": null + }, { "id": "mistralai/mixtral-8x22b-instruct", "name": "Mistral: Mixtral 8x22B Instruct", @@ -11898,32 +11898,6 @@ "general": 0.863 } }, - { - "id": "openai/gpt-3.5-turbo-0613", - "name": "OpenAI: GPT-3.5 Turbo (older v0613)", - "context_length": 4095, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "GPT", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000001", - "completion": "0.000002", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": null, - "category_scores": null - }, { "id": "openai/gpt-4-turbo-preview", "name": "OpenAI: GPT-4 Turbo Preview", @@ -11950,6 +11924,32 @@ "benchmarks": null, "category_scores": null }, + { + "id": "openai/gpt-3.5-turbo-0613", + "name": "OpenAI: GPT-3.5 Turbo (older v0613)", + "context_length": 4095, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "GPT", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000001", + "completion": "0.000002", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": null, + "category_scores": null + }, { "id": "mistralai/mistral-tiny", "name": "Mistral Tiny", @@ -12128,32 +12128,6 @@ "benchmarks": null, "category_scores": null }, - { - "id": "openai/gpt-3.5-turbo-instruct", - "name": "OpenAI: GPT-3.5 Turbo Instruct", - "context_length": 4095, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "GPT", - "instruct_type": "chatml" - }, - "pricing": { - "prompt": "0.0000015", - "completion": "0.000002", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": null, - "category_scores": null - }, { "id": "mistralai/mistral-7b-instruct-v0.1", "name": "Mistral: Mistral 7B Instruct v0.1", @@ -12180,6 +12154,32 @@ "benchmarks": null, "category_scores": null }, + { + "id": "openai/gpt-3.5-turbo-instruct", + "name": "OpenAI: GPT-3.5 Turbo Instruct", + "context_length": 4095, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "GPT", + "instruct_type": "chatml" + }, + "pricing": { + "prompt": "0.0000015", + "completion": "0.000002", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": null, + "category_scores": null + }, { "id": "openai/gpt-3.5-turbo-16k", "name": "OpenAI: GPT-3.5 Turbo 16k", @@ -12310,6 +12310,47 @@ "benchmarks": null, "category_scores": null }, + { + "id": "openai/gpt-3.5-turbo", + "name": "OpenAI: GPT-3.5 Turbo", + "context_length": 16385, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "GPT", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000005", + "completion": "0.0000015", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" + }, + "benchmarks": { + "code": { + "humaneval": 0.68 + }, + "math": { + "math": 0.431 + }, + "reasoning": { + "gpqa": 0.308, + "mmlu": 0.698 + } + }, + "category_scores": { + "code": 0.68, + "math": 0.431, + "reasoning": 0.503 + } + }, { "id": "openai/gpt-4", "name": "OpenAI: GPT-4", @@ -12356,47 +12397,6 @@ "reasoning": 0.7825, "general": 0.874 } - }, - { - "id": "openai/gpt-3.5-turbo", - "name": "OpenAI: GPT-3.5 Turbo", - "context_length": 16385, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "GPT", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000005", - "completion": "0.0000015", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "benchmarks": { - "code": { - "humaneval": 0.68 - }, - "math": { - "math": 0.431 - }, - "reasoning": { - "gpqa": 0.308, - "mmlu": 0.698 - } - }, - "category_scores": { - "code": 0.68, - "math": 0.431, - "reasoning": 0.503 - } } ] } \ No newline at end of file diff --git a/scripts/merge_benchmarks.py b/scripts/merge_benchmarks.py index 6f9bb56..b51520e 100644 --- a/scripts/merge_benchmarks.py +++ b/scripts/merge_benchmarks.py @@ -76,6 +76,9 @@ MODEL_FAMILY_PATTERNS = [ (r"^anthropic/claude-(\d+\.?\d*)-haiku$", "claude-haiku", "fast"), # OpenAI GPT + (r"^openai/gpt-5\.2-pro$", "gpt-5-pro", "flagship"), + (r"^openai/gpt-5\.2$", "gpt-5", "mid"), + (r"^openai/gpt-5\.2-chat$", "gpt-5", "mid"), (r"^openai/gpt-4\.1$", "gpt-4", "mid"), (r"^openai/gpt-4o$", "gpt-4", "mid"), (r"^openai/gpt-4-turbo", "gpt-4", "mid"), @@ -110,6 +113,8 @@ MODEL_FAMILY_PATTERNS = [ # Qwen (r"^qwen/qwen-2\.5-72b", "qwen-72b", "mid"), (r"^qwen/qwq-32b", "qwq", "mid"), + (r"^qwen/qwen3-next-80b.*thinking", "qwen3-thinking", "flagship"), + (r"^qwen/qwen3-235b.*instruct", "qwen3-instruct", "mid"), ] HEADERS = { diff --git a/src/budget/pricing.rs b/src/budget/pricing.rs index e8c15bb..d111588 100644 --- a/src/budget/pricing.rs +++ b/src/budget/pricing.rs @@ -238,7 +238,10 @@ impl ModelPricing { // Flagship tier "openai/o1", "openai/o1-preview", + "openai/gpt-5.2-pro", // Mid tier + "openai/gpt-5.2", + "openai/gpt-5.2-chat", "openai/gpt-4.1", "openai/gpt-4o", "openai/gpt-4-turbo",