{"leaderboard":[{"rank":1,"agentName":"Claude Opus","agentVersion":"Opus-4.5","modelName":"Claude Opus 4.5","tasksCompleted":180,"passedTasks":180,"failedTasks":0,"avgScore":89.15,"avgFunctional":85,"avgVisual":80,"avgQuality":80,"avgSecurity":100,"avgCost":70,"avgSpeed":80,"totalTokens":647747,"inputTokens":194324,"outputTokens":453423,"totalCostUSD":12.31,"avgTimeMs":43958,"pricingInput":5,"pricingOutput":25,"lastUpdated":"2026-01-19T19:25:00.000Z"},{"rank":2,"agentName":"Claude Haiku","agentVersion":"Haiku-4.5","modelName":"Claude Haiku 4.5","tasksCompleted":180,"passedTasks":179,"failedTasks":1,"avgScore":88.97,"avgFunctional":84.53,"avgVisual":80,"avgQuality":79.56,"avgSecurity":100,"avgCost":88,"avgSpeed":95,"totalTokens":798291,"inputTokens":239487,"outputTokens":558804,"totalCostUSD":3.03,"avgTimeMs":21570,"pricingInput":1,"pricingOutput":5,"lastUpdated":"2026-01-20T15:21:00.000Z"},{"rank":3,"agentName":"Grok","agentVersion":"4-Fast","modelName":"Grok 4 Fast","tasksCompleted":180,"passedTasks":178,"failedTasks":2,"avgScore":88.8,"avgFunctional":84.1,"avgVisual":80,"avgQuality":80,"avgSecurity":100,"avgCost":94,"avgSpeed":72,"totalTokens":520000,"inputTokens":156000,"outputTokens":364000,"totalCostUSD":0.21,"avgTimeMs":70000,"pricingInput":0.2,"pricingOutput":0.5,"lastUpdated":"2026-01-20T18:30:00.000Z"},{"rank":4,"agentName":"OpenAI","agentVersion":"GPT-5.2","modelName":"OpenAI GPT-5.2","tasksCompleted":180,"passedTasks":177,"failedTasks":3,"avgScore":88.75,"avgFunctional":83.58,"avgVisual":80,"avgQuality":79.56,"avgSecurity":100,"avgCost":98,"avgSpeed":92,"totalTokens":485000,"inputTokens":145500,"outputTokens":339500,"totalCostUSD":5.01,"avgTimeMs":28000,"pricingInput":1.75,"pricingOutput":14,"lastUpdated":"2026-01-20T16:35:00.000Z"},{"rank":5,"agentName":"Qwen","agentVersion":"3-Max","modelName":"Qwen3 Max","tasksCompleted":180,"passedTasks":180,"failedTasks":0,"avgScore":88.6,"avgFunctional":85,"avgVisual":80,"avgQuality":80,"avgSecurity":100,"avgCost":90,"avgSpeed":75,"totalTokens":949212,"inputTokens":57101,"outputTokens":892111,"totalCostUSD":5.42,"avgTimeMs":45000,"pricingInput":1.2,"pricingOutput":6,"lastUpdated":"2026-01-21T19:30:00.000Z"},{"rank":6,"agentName":"Claude","agentVersion":"Sonnet-4.5","modelName":"Claude Sonnet 4.5","tasksCompleted":180,"passedTasks":177,"failedTasks":3,"avgScore":88.56,"avgFunctional":83.58,"avgVisual":80,"avgQuality":80,"avgSecurity":100,"avgCost":85,"avgSpeed":80,"totalTokens":612000,"inputTokens":183600,"outputTokens":428400,"totalCostUSD":6.98,"avgTimeMs":42000,"pricingInput":3,"pricingOutput":15,"lastUpdated":"2026-01-20T16:46:00.000Z"},{"rank":7,"agentName":"GLM-4","agentVersion":"GLM-4-Plus","modelName":"GLM 4-Plus","tasksCompleted":180,"passedTasks":178,"failedTasks":2,"avgScore":88.2,"avgFunctional":84.06,"avgVisual":80,"avgQuality":80,"avgSecurity":100,"avgCost":92,"avgSpeed":75,"totalTokens":794105,"inputTokens":238232,"outputTokens":555873,"totalCostUSD":0.93,"avgTimeMs":96210,"pricingInput":0.4,"pricingOutput":1.5,"lastUpdated":"2026-01-20T10:20:00.000Z"},{"rank":8,"agentName":"DeepSeek","agentVersion":"v3.2","modelName":"DeepSeek v3.2","tasksCompleted":180,"passedTasks":177,"failedTasks":3,"avgScore":88.19,"avgFunctional":83.58,"avgVisual":80,"avgQuality":80,"avgSecurity":100,"avgCost":96,"avgSpeed":65,"totalTokens":542685,"inputTokens":162806,"outputTokens":379879,"totalCostUSD":0.5,"avgTimeMs":89633,"pricingInput":0.3,"pricingOutput":1.2,"lastUpdated":"2026-01-19T19:44:00.000Z"},{"rank":9,"agentName":"Grok","agentVersion":"4","modelName":"Grok 4","tasksCompleted":180,"passedTasks":176,"failedTasks":4,"avgScore":88,"avgFunctional":83.6,"avgVisual":80,"avgQuality":80,"avgSecurity":100,"avgCost":94,"avgSpeed":70,"totalTokens":480000,"inputTokens":144000,"outputTokens":336000,"totalCostUSD":5.47,"avgTimeMs":75000,"pricingInput":3,"pricingOutput":15,"lastUpdated":"2026-01-20T18:37:00.000Z"},{"rank":10,"agentName":"MiniMax","agentVersion":"M2.1","modelName":"MiniMax M2.1","tasksCompleted":180,"passedTasks":179,"failedTasks":1,"avgScore":87.42,"avgFunctional":84.53,"avgVisual":80,"avgQuality":80,"avgSecurity":100,"avgCost":85,"avgSpeed":60,"totalTokens":2778476,"inputTokens":833543,"outputTokens":1944933,"totalCostUSD":2.4,"avgTimeMs":164907,"pricingInput":0.27,"pricingOutput":1.12,"lastUpdated":"2026-01-20T11:26:00.000Z"},{"rank":11,"agentName":"Grok","agentVersion":"4.1-Fast","modelName":"Grok 4.1 Fast","tasksCompleted":180,"passedTasks":175,"failedTasks":5,"avgScore":86.8,"avgFunctional":82.6,"avgVisual":80,"avgQuality":78.7,"avgSecurity":100,"avgCost":90,"avgSpeed":68,"totalTokens":580000,"inputTokens":174000,"outputTokens":406000,"totalCostUSD":0.24,"avgTimeMs":88500,"pricingInput":0.2,"pricingOutput":0.5,"lastUpdated":"2026-01-20T18:35:00.000Z"},{"rank":12,"agentName":"Gemini","agentVersion":"3-Pro-Preview","modelName":"Gemini 3 Pro Preview","tasksCompleted":180,"passedTasks":171,"failedTasks":9,"avgScore":85.8,"avgFunctional":80.8,"avgVisual":80,"avgQuality":77.3,"avgSecurity":100,"avgCost":94,"avgSpeed":88,"totalTokens":737849,"inputTokens":221566,"outputTokens":516283,"totalCostUSD":10.34,"avgTimeMs":32000,"pricingInput":2,"pricingOutput":12,"lastUpdated":"2026-01-21T18:45:00.000Z"},{"rank":13,"agentName":"GLM-4","agentVersion":"GLM-4.7","modelName":"GLM-4.7","tasksCompleted":180,"passedTasks":154,"failedTasks":26,"avgScore":83.9,"avgFunctional":72.72,"avgVisual":80,"avgQuality":79.56,"avgSecurity":100,"avgCost":94,"avgSpeed":82,"totalTokens":623474,"inputTokens":187042,"outputTokens":436432,"totalCostUSD":0.73,"avgTimeMs":56805,"pricingInput":0.4,"pricingOutput":1.5,"lastUpdated":"2026-01-20T10:20:00.000Z"},{"rank":14,"agentName":"GLM","agentVersion":"4.7-Flash","modelName":"GLM 4.7 Flash","tasksCompleted":180,"passedTasks":167,"failedTasks":13,"avgScore":83.83,"avgFunctional":78.86,"avgVisual":80,"avgQuality":79.56,"avgSecurity":100,"avgCost":92,"avgSpeed":75,"totalTokens":650000,"inputTokens":195000,"outputTokens":455000,"totalCostUSD":1.11,"avgTimeMs":45000,"pricingInput":0.07,"pricingOutput":0.4,"lastUpdated":"2026-01-21T17:30:00.000Z"},{"rank":15,"agentName":"Gemini","agentVersion":"3-Flash-Preview","modelName":"Gemini 3 Flash","tasksCompleted":180,"passedTasks":166,"failedTasks":14,"avgScore":83.44,"avgFunctional":78.39,"avgVisual":80,"avgQuality":75.11,"avgSecurity":100,"avgCost":95,"avgSpeed":90,"totalTokens":383991,"inputTokens":115197,"outputTokens":268794,"totalCostUSD":0.86,"avgTimeMs":27822,"pricingInput":0.5,"pricingOutput":3,"lastUpdated":"2026-01-20T07:29:00.000Z"}],"totalAgents":15,"lastUpdated":"2026-01-21T19:30:00.000Z","source":"mock"}