{
  "version": 2,
  "license": "CC-BY-4.0",
  "models": [
    {
      "provider": "Anthropic",
      "provider_url": "https://www.anthropic.com",
      "model_id": "claude-opus-4-7",
      "display_name": "Claude Opus 4.7",
      "model_family": "Claude 4",
      "knowledge_cutoff": "2026-01-31",
      "aliases": ["anthropic.claude-opus-4-7"],
      "context_window": 1000000,
      "max_output_tokens": 128000,
      "input_per_mtok_usd": "5.0",
      "output_per_mtok_usd": "25.0",
      "cache_read_per_mtok_usd": "0.5",
      "cache_write_per_mtok_usd": "6.25",
      "batch_input_per_mtok_usd": "2.5",
      "batch_output_per_mtok_usd": "12.5",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "bedrock", "vertex"],
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.anthropic.com/pricing",
      "notes": "Cache hit pricing is 0.1x base input ($0.50/MTok); 5-minute cache write is 1.25x base ($6.25/MTok); 1-hour cache write is 2x ($10/MTok). Batch API discounts both input and output by 50%. Knowledge cutoff Jan 2026. Uses a new tokenizer vs prior Claude models (may use up to 35% more tokens for identical text). Supports adaptive thinking (no extended-thinking toggle); thinking output tokens are billed at the output rate."
    },
    {
      "provider": "Anthropic",
      "provider_url": "https://www.anthropic.com",
      "model_id": "claude-sonnet-4-6",
      "display_name": "Claude Sonnet 4.6",
      "model_family": "Claude 4",
      "knowledge_cutoff": "2025-08-31",
      "aliases": ["anthropic.claude-sonnet-4-6"],
      "context_window": 1000000,
      "max_output_tokens": 64000,
      "input_per_mtok_usd": "3.0",
      "output_per_mtok_usd": "15.0",
      "cache_read_per_mtok_usd": "0.3",
      "cache_write_per_mtok_usd": "3.75",
      "batch_input_per_mtok_usd": "1.5",
      "batch_output_per_mtok_usd": "7.5",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "bedrock", "vertex"],
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.anthropic.com/pricing",
      "notes": "Cache hit pricing is 0.1x base input ($0.30/MTok); 5-minute cache write is 1.25x base ($3.75/MTok); 1-hour cache write is 2x ($6/MTok). Batch API discounts both input and output by 50%. Knowledge cutoff Aug 2025. Supports extended thinking and adaptive thinking; thinking output tokens are billed at the output rate."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "gpt-5",
      "display_name": "GPT-5",
      "model_family": "GPT-5",
      "knowledge_cutoff": "2024-09-30",
      "context_window": 400000,
      "max_output_tokens": 128000,
      "input_per_mtok_usd": "1.25",
      "output_per_mtok_usd": "10.0",
      "cache_read_per_mtok_usd": "0.125",
      "batch_input_per_mtok_usd": "0.625",
      "batch_output_per_mtok_usd": "5.0",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "azure"],
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://developers.openai.com/api/docs/models/gpt-5",
      "notes": "Reasoning model with adjustable reasoning_effort; reasoning tokens are billed at the output rate. Context window 400k, max output 128k confirmed against developers.openai.com/api/docs/models/gpt-5 (previous row had max_output_tokens as low-confidence). Cached input at 10% of base ($0.125/MTok). Batch API at flat 50% off input and output. PDF input via the Files API; image input native; audio is NOT supported on this model_id. Knowledge cutoff Sept 2024. Not on the April 2026 deprecation list."
    },
    {
      "provider": "Google",
      "provider_url": "https://deepmind.google",
      "model_id": "gemini-2.5-pro",
      "display_name": "Gemini 2.5 Pro",
      "model_family": "Gemini 2.5",
      "knowledge_cutoff": "2025-01-31",
      "context_window": 1048576,
      "max_output_tokens": 65536,
      "input_per_mtok_usd": "1.25",
      "output_per_mtok_usd": "10.0",
      "cache_read_per_mtok_usd": "0.125",
      "cache_storage_per_mtok_per_hour_usd": "4.50",
      "batch_input_per_mtok_usd": "0.625",
      "batch_output_per_mtok_usd": "5.0",
      "pricing_tiers": [
        {
          "threshold_tokens": 200000,
          "input_per_mtok_usd": "2.5",
          "output_per_mtok_usd": "15.0",
          "cache_read_per_mtok_usd": "0.25"
        }
      ],
      "modalities": {
        "input": ["text", "image", "audio", "video"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": true,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "vertex"],
      "confidence": "high",
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://ai.google.dev/pricing",
      "notes": "Input pricing shown is for prompts <=200k tokens; prompts >200k tokens are billed per pricing_tiers; cached input also tiers at 200k (the >200k rate is captured in pricing_tiers[0].cache_read_per_mtok_usd). Context window and max_output_tokens confirmed via ai.google.dev/gemini-api/docs/models/gemini-2.5-pro. Thinking is always on and cannot be disabled; thinking tokens are billed at the output rate. Knowledge cutoff January 2025. Batch Mode discount is a flat 50% off input/output. Audio input is billed at the standard input rate of $1.25/MTok (no separate audio premium, unlike 2.5 Flash/Flash-Lite/2.0 Flash); audio_input_per_mtok_usd omitted. Explicit context caching storage is captured in cache_storage_per_mtok_per_hour_usd. Free tier (AI Studio) is published as per-minute RPM/TPM only, not per-day; free_tier omitted. PDF input via the Files API; image, audio, and video native."
    },
    {
      "provider": "DeepSeek",
      "provider_url": "https://www.deepseek.com",
      "model_id": "deepseek-v4-flash",
      "display_name": "DeepSeek V4 Flash",
      "model_family": "DeepSeek V4",
      "context_window": 1048576,
      "max_output_tokens": 384000,
      "input_per_mtok_usd": "0.14",
      "output_per_mtok_usd": "0.28",
      "cache_read_per_mtok_usd": "0.0028",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native"],
      "last_verified": "2026-05-18",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://api-docs.deepseek.com/quick_start/pricing",
      "notes": "MoE architecture: 284B total parameters, 13B activated. Replaces deepseek-chat (V3-era alias). Cached input price ($0.0028/MTok) is 1/50th of base input, effective after DeepSeek reduced cache hit rates on 2026-04-26. Supports both non-thinking and thinking (default) modes; thinking output is billed at the same output rate, so reasoning_tokens_billed: true. deepseek-chat and deepseek-reasoner legacy aliases are scheduled for discontinuation on 2026-07-24; until then they route to V4 Flash non-thinking and thinking modes respectively."
    },
    {
      "provider": "DeepSeek",
      "provider_url": "https://www.deepseek.com",
      "model_id": "deepseek-v4-pro",
      "display_name": "DeepSeek V4 Pro",
      "model_family": "DeepSeek V4",
      "context_window": 1048576,
      "max_output_tokens": 384000,
      "input_per_mtok_usd": "1.74",
      "output_per_mtok_usd": "3.48",
      "cache_read_per_mtok_usd": "0.0145",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native"],
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-26",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://api-docs.deepseek.com/quick_start/pricing",
      "notes": "Pro-tier sibling to deepseek-v4-flash, positioned for higher-quality responses at lower concurrency (500 vs Flash's 2500). Structured prices ($1.74 / $3.48 per MTok; cache hit $0.0145) are the list rates published by DeepSeek; a 75% launch promotion is in effect until 2026-05-31 15:59 UTC, during which the effective billed rates are $0.435 input / $0.87 output / $0.003625 cache hit. The DeepSeek pricing page also mentions post-expiration rates of 1/4 the original — re-verify after the promo lapses to confirm which interpretation lands. Cache miss is billed at the base input rate. Supports both non-thinking and thinking modes with tool calls and JSON output; thinking output is billed at the output rate, so reasoning_tokens_billed: true. DeepSeek did not publish an explicit launch date; last_changed_at set to verification date."
    },
    {
      "provider": "DeepSeek",
      "provider_url": "https://www.deepseek.com",
      "model_id": "deepseek-chat",
      "display_name": "DeepSeek V3",
      "model_family": "DeepSeek V3",
      "context_window": 1048576,
      "max_output_tokens": 384000,
      "input_per_mtok_usd": "0.14",
      "output_per_mtok_usd": "0.28",
      "cache_read_per_mtok_usd": "0.0028",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native"],
      "deprecated_at": "2026-04-24",
      "replaced_by_model_id": "deepseek-v4-flash",
      "last_verified": "2026-05-18",
      "last_changed_at": "2026-04-24",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://api-docs.deepseek.com/quick_start/pricing",
      "notes": "Canonical API model_id for the DeepSeek V3 lineage (V3 launched 2024-12-26 as deepseek-chat; upgraded through V3-0324, V3.1, V3.1-Terminus, V3.2 by 2025-12-01). Deprecated 2026-04-24 when V4 launched; still callable until scheduled discontinuation 2026-07-24, currently routing to deepseek-v4-flash non-thinking mode (prices captured here reflect that routing). DeepSeek's pricing page no longer publishes V3-era historical rates; standalone deepseek-v3 model_id was never exposed by the API."
    },
    {
      "provider": "DeepSeek",
      "provider_url": "https://www.deepseek.com",
      "model_id": "deepseek-reasoner",
      "display_name": "DeepSeek R1",
      "model_family": "DeepSeek R1",
      "context_window": 1048576,
      "max_output_tokens": 384000,
      "input_per_mtok_usd": "0.14",
      "output_per_mtok_usd": "0.28",
      "cache_read_per_mtok_usd": "0.0028",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native"],
      "deprecated_at": "2026-04-24",
      "replaced_by_model_id": "deepseek-v4-flash",
      "last_verified": "2026-05-18",
      "last_changed_at": "2026-04-24",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://api-docs.deepseek.com/quick_start/pricing",
      "notes": "Canonical API model_id for the DeepSeek R1 reasoning lineage (R1 launched 2025-01-20 as deepseek-reasoner; R1-0528 update 2025-05-28). Deprecated 2026-04-24 when V4 launched; still callable until scheduled discontinuation 2026-07-24, currently routing to deepseek-v4-flash thinking mode (prices captured here reflect that routing). Reasoning output is billed at the standard output rate (reasoning_tokens_billed: true). DeepSeek's pricing page no longer publishes R1-era historical rates; standalone deepseek-r1 model_id was never exposed by the API."
    },
    {
      "provider": "Anthropic",
      "provider_url": "https://www.anthropic.com",
      "model_id": "claude-haiku-4-5-20251001",
      "display_name": "Claude Haiku 4.5",
      "model_family": "Claude 4",
      "knowledge_cutoff": "2025-02-28",
      "aliases": [
        "claude-haiku-4-5",
        "anthropic.claude-haiku-4-5-20251001-v1:0",
        "claude-haiku-4-5@20251001"
      ],
      "context_window": 200000,
      "max_output_tokens": 64000,
      "input_per_mtok_usd": "1.0",
      "output_per_mtok_usd": "5.0",
      "cache_read_per_mtok_usd": "0.1",
      "cache_write_per_mtok_usd": "1.25",
      "batch_input_per_mtok_usd": "0.5",
      "batch_output_per_mtok_usd": "2.5",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "bedrock", "vertex"],
      "last_verified": "2026-05-17",
      "last_changed_at": "2025-10-01",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://platform.claude.com/docs/en/about-claude/pricing",
      "notes": "Anthropic's fastest model with near-frontier intelligence; positioned for high-volume agentic workloads. Cache hit is 0.1x base input ($0.10/MTok); 5-minute cache write is 1.25x ($1.25/MTok); 1-hour cache write is 2x ($2/MTok). Batch API discounts both input and output by 50%. Supports extended thinking; thinking output tokens are billed at the output rate. Reliable knowledge cutoff Feb 2025; training data cutoff Jul 2025."
    },
    {
      "provider": "Anthropic",
      "provider_url": "https://www.anthropic.com",
      "model_id": "claude-sonnet-4-5-20250929",
      "display_name": "Claude Sonnet 4.5",
      "model_family": "Claude 4",
      "knowledge_cutoff": "2025-01-31",
      "aliases": [
        "claude-sonnet-4-5",
        "anthropic.claude-sonnet-4-5-20250929-v1:0",
        "claude-sonnet-4-5@20250929"
      ],
      "context_window": 200000,
      "max_output_tokens": 64000,
      "input_per_mtok_usd": "3.0",
      "output_per_mtok_usd": "15.0",
      "cache_read_per_mtok_usd": "0.3",
      "cache_write_per_mtok_usd": "3.75",
      "batch_input_per_mtok_usd": "1.5",
      "batch_output_per_mtok_usd": "7.5",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "bedrock", "vertex"],
      "last_verified": "2026-05-17",
      "last_changed_at": "2025-09-29",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://platform.claude.com/docs/en/about-claude/pricing",
      "notes": "Legacy listing in Anthropic's models overview but still active. Pricing identical to Sonnet 4.6, but 200k context window (vs 1M on Sonnet 4.6). Cache hit is 0.1x base input ($0.30/MTok); 5-minute cache write is 1.25x ($3.75/MTok); 1-hour cache write is 2x ($6/MTok). Batch API discounts both input and output by 50%. Supports extended thinking; thinking output tokens are billed at the output rate. Reliable knowledge cutoff Jan 2025."
    },
    {
      "provider": "Anthropic",
      "provider_url": "https://www.anthropic.com",
      "model_id": "claude-opus-4-5-20251101",
      "display_name": "Claude Opus 4.5",
      "model_family": "Claude 4",
      "knowledge_cutoff": "2025-05-31",
      "aliases": [
        "claude-opus-4-5",
        "anthropic.claude-opus-4-5-20251101-v1:0",
        "claude-opus-4-5@20251101"
      ],
      "context_window": 200000,
      "max_output_tokens": 64000,
      "input_per_mtok_usd": "5.0",
      "output_per_mtok_usd": "25.0",
      "cache_read_per_mtok_usd": "0.5",
      "cache_write_per_mtok_usd": "6.25",
      "batch_input_per_mtok_usd": "2.5",
      "batch_output_per_mtok_usd": "12.5",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "bedrock", "vertex"],
      "last_verified": "2026-05-17",
      "last_changed_at": "2025-11-01",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://platform.claude.com/docs/en/about-claude/pricing",
      "notes": "Legacy listing in Anthropic's models overview but still active. Pricing identical to Opus 4.6/4.7, but 200k context window (vs 1M on 4.6/4.7) and 64k max output (vs 128k). Cache hit is 0.1x base input ($0.50/MTok); 5-minute cache write is 1.25x ($6.25/MTok); 1-hour cache write is 2x ($10/MTok). Batch API discounts both input and output by 50%. Supports extended thinking; thinking output tokens are billed at the output rate. Reliable knowledge cutoff May 2025."
    },
    {
      "provider": "Anthropic",
      "provider_url": "https://www.anthropic.com",
      "model_id": "claude-3-7-sonnet-20250219",
      "display_name": "Claude Sonnet 3.7",
      "model_family": "Claude 3.7",
      "knowledge_cutoff": "2024-10-31",
      "aliases": [
        "claude-3-7-sonnet-latest",
        "anthropic.claude-3-7-sonnet-20250219-v1:0",
        "claude-3-7-sonnet@20250219"
      ],
      "context_window": 200000,
      "max_output_tokens": 64000,
      "input_per_mtok_usd": "3.0",
      "output_per_mtok_usd": "15.0",
      "cache_read_per_mtok_usd": "0.3",
      "cache_write_per_mtok_usd": "3.75",
      "batch_input_per_mtok_usd": "1.5",
      "batch_output_per_mtok_usd": "7.5",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["bedrock", "vertex"],
      "deprecated_at": "2025-10-28",
      "replaced_by_model_id": "claude-sonnet-4-6",
      "confidence": "medium",
      "last_verified": "2026-05-17",
      "last_changed_at": "2025-10-28",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://platform.claude.com/docs/en/about-claude/model-deprecations",
      "notes": "RETIRED on the Claude API on 2026-02-19; still available on Amazon Bedrock and Google Vertex AI under partner retirement schedules. Anthropic's first reasoning model with extended thinking; can output up to 64k tokens in thinking mode (128k with the output-128k-2025-02-19 beta header). Prices are no longer listed on Anthropic's current pricing page; values sourced from OpenRouter and pricepertoken.com (confidence: medium). Cache and batch pricing inferred from Anthropic's standard multipliers (1.25x 5-min write, 0.1x cache read, 0.5x batch)."
    },
    {
      "provider": "Anthropic",
      "provider_url": "https://www.anthropic.com",
      "model_id": "claude-3-5-haiku-20241022",
      "display_name": "Claude Haiku 3.5",
      "model_family": "Claude 3.5",
      "knowledge_cutoff": "2024-07-31",
      "aliases": [
        "claude-3-5-haiku-latest",
        "anthropic.claude-3-5-haiku-20241022-v1:0",
        "claude-3-5-haiku@20241022"
      ],
      "context_window": 200000,
      "max_output_tokens": 8192,
      "input_per_mtok_usd": "0.8",
      "output_per_mtok_usd": "4.0",
      "cache_read_per_mtok_usd": "0.08",
      "cache_write_per_mtok_usd": "1.0",
      "batch_input_per_mtok_usd": "0.4",
      "batch_output_per_mtok_usd": "2.0",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": false,
      "deployment_options": ["bedrock", "vertex"],
      "deprecated_at": "2025-12-19",
      "replaced_by_model_id": "claude-haiku-4-5-20251001",
      "confidence": "medium",
      "last_verified": "2026-05-17",
      "last_changed_at": "2025-12-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://platform.claude.com/docs/en/about-claude/pricing",
      "notes": "RETIRED on the Claude API on 2026-02-19; still listed on Anthropic's pricing page as available on Amazon Bedrock and Google Vertex AI only. No extended-thinking support. max_output_tokens=8192 sourced from Anthropic legacy model card and OpenRouter (confidence: medium — not present in current docs). Cache hit is 0.1x base input ($0.08/MTok); 5-minute cache write is 1.25x ($1/MTok); 1-hour cache write is 2x ($1.60/MTok). Batch API discounts both input and output by 50%."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "gpt-5-mini",
      "display_name": "GPT-5 mini",
      "model_family": "GPT-5",
      "knowledge_cutoff": "2024-05-31",
      "context_window": 400000,
      "max_output_tokens": 128000,
      "input_per_mtok_usd": "0.25",
      "output_per_mtok_usd": "2.0",
      "cache_read_per_mtok_usd": "0.025",
      "batch_input_per_mtok_usd": "0.125",
      "batch_output_per_mtok_usd": "1.0",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "azure"],
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-05-17",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://developers.openai.com/api/docs/models/gpt-5-mini",
      "notes": "Faster, more cost-efficient GPT-5 variant for low-latency, high-volume workloads. Cached input at 10% of base ($0.025/MTok). Batch API at flat 50% off input and output. Reasoning model with adjustable reasoning_effort; reasoning tokens are billed at the output rate. PDF input via the Files API; image input native. Knowledge cutoff May 2024."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "gpt-5-nano",
      "display_name": "GPT-5 nano",
      "model_family": "GPT-5",
      "knowledge_cutoff": "2024-05-31",
      "context_window": 400000,
      "max_output_tokens": 128000,
      "input_per_mtok_usd": "0.05",
      "output_per_mtok_usd": "0.4",
      "cache_read_per_mtok_usd": "0.005",
      "batch_input_per_mtok_usd": "0.025",
      "batch_output_per_mtok_usd": "0.2",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native", "azure"],
      "confidence": "medium",
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-05-17",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://developers.openai.com/api/docs/models/gpt-5-nano",
      "notes": "Smallest GPT-5 variant; OpenAI model card lists 'Reasoning model: No' with 'Average' reasoning capability — reasoning_tokens_billed set to false on that basis (confidence: medium because other GPT-5 family members are reasoning models). Cached input at 10% of base ($0.005/MTok). Batch API at flat 50% off input and output. Knowledge cutoff May 2024."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "gpt-4.1",
      "display_name": "GPT-4.1",
      "model_family": "GPT-4.1",
      "knowledge_cutoff": "2024-06-01",
      "context_window": 1047576,
      "max_output_tokens": 32768,
      "input_per_mtok_usd": "2.0",
      "output_per_mtok_usd": "8.0",
      "cache_read_per_mtok_usd": "0.5",
      "batch_input_per_mtok_usd": "1.0",
      "batch_output_per_mtok_usd": "4.0",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native", "azure"],
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-05-17",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://developers.openai.com/api/docs/models/gpt-4.1",
      "notes": "Non-reasoning flagship with a ~1M-token context window (1,047,576). Cached input at 25% of base ($0.50/MTok). Batch API at flat 50% off input and output. PDF input via the Files API; image input native. Knowledge cutoff June 2024."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "gpt-4o",
      "display_name": "GPT-4o",
      "model_family": "GPT-4o",
      "knowledge_cutoff": "2023-10-01",
      "context_window": 128000,
      "max_output_tokens": 16384,
      "input_per_mtok_usd": "2.5",
      "output_per_mtok_usd": "10.0",
      "cache_read_per_mtok_usd": "1.25",
      "batch_input_per_mtok_usd": "1.25",
      "batch_output_per_mtok_usd": "5.0",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native", "azure"],
      "deprecated_at": "2026-04-22",
      "replaced_by_model_id": "gpt-4.1",
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-04-22",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://developers.openai.com/api/docs/models/gpt-4o",
      "notes": "Deprecated 2026-04-22 (dated alias gpt-4o-2024-05-13 scheduled for shutdown 2026-10-23 per OpenAI deprecations page); still serving on the API as of 2026-05-17. Audio input/output are NOT supported on this model_id — they live on a sibling gpt-4o-audio-preview model card with separate pricing (audio input $40/MTok, audio output $80/MTok); text-mode prices captured here. Cached input at 50% of base ($1.25/MTok). Batch API at flat 50% off input and output. Knowledge cutoff Oct 2023."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "gpt-4o-mini",
      "display_name": "GPT-4o mini",
      "model_family": "GPT-4o",
      "knowledge_cutoff": "2023-10-01",
      "context_window": 128000,
      "max_output_tokens": 16384,
      "input_per_mtok_usd": "0.15",
      "output_per_mtok_usd": "0.6",
      "cache_read_per_mtok_usd": "0.075",
      "batch_input_per_mtok_usd": "0.075",
      "batch_output_per_mtok_usd": "0.3",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native", "azure"],
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-05-17",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://developers.openai.com/api/docs/models/gpt-4o-mini",
      "notes": "Not on the April 2026 deprecation list; remains active. Audio input/output are NOT supported on this model_id — they live on a sibling gpt-4o-mini-audio-preview model card; text-mode prices captured here. Cached input at 50% of base ($0.075/MTok). Batch API at flat 50% off input and output. Knowledge cutoff Oct 2023."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "o3",
      "display_name": "OpenAI o3",
      "model_family": "o-series",
      "knowledge_cutoff": "2024-06-01",
      "context_window": 200000,
      "max_output_tokens": 100000,
      "input_per_mtok_usd": "2.0",
      "output_per_mtok_usd": "8.0",
      "cache_read_per_mtok_usd": "0.5",
      "batch_input_per_mtok_usd": "1.0",
      "batch_output_per_mtok_usd": "4.0",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "azure"],
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-05-17",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://developers.openai.com/api/docs/models/o3",
      "notes": "Reasoning model for complex tasks; reasoning tokens are billed at the output rate. Bare 'o3' alias remains active (dated o3-mini-2025-01-31 is deprecated but a separate model). OpenAI's model card notes 'o3 is succeeded by GPT-5' but does not list o3 itself as deprecated. Cached input at 25% of base ($0.50/MTok). Batch API at flat 50% off input and output. Knowledge cutoff June 2024."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "o4-mini",
      "display_name": "OpenAI o4-mini",
      "model_family": "o-series",
      "knowledge_cutoff": "2024-06-01",
      "context_window": 200000,
      "max_output_tokens": 100000,
      "input_per_mtok_usd": "1.1",
      "output_per_mtok_usd": "4.4",
      "cache_read_per_mtok_usd": "0.275",
      "batch_input_per_mtok_usd": "0.55",
      "batch_output_per_mtok_usd": "2.2",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "azure"],
      "deprecated_at": "2026-04-22",
      "replaced_by_model_id": "gpt-5-mini",
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-04-22",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://developers.openai.com/api/docs/models/o4-mini",
      "notes": "Fast, cost-efficient reasoning model; reasoning tokens are billed at the output rate. Deprecated 2026-04-22 (dated alias o4-mini-2025-04-16 scheduled for shutdown 2026-10-23 per OpenAI deprecations page); still serving as of 2026-05-17. OpenAI's model card notes 'succeeded by GPT-5 mini'. Cached input at 25% of base ($0.275/MTok). Batch API at flat 50% off input and output. Knowledge cutoff June 2024."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "o1",
      "display_name": "OpenAI o1",
      "model_family": "o-series",
      "knowledge_cutoff": "2023-10-01",
      "context_window": 200000,
      "max_output_tokens": 100000,
      "input_per_mtok_usd": "15.0",
      "output_per_mtok_usd": "60.0",
      "cache_read_per_mtok_usd": "7.5",
      "batch_input_per_mtok_usd": "7.5",
      "batch_output_per_mtok_usd": "30.0",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "azure"],
      "deprecated_at": "2026-04-22",
      "replaced_by_model_id": "o3",
      "last_verified": "2026-05-17",
      "last_changed_at": "2026-04-22",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://developers.openai.com/api/docs/models/o1",
      "notes": "First-generation reasoning model; reasoning tokens are billed at the output rate. Deprecated 2026-04-22 (dated alias o1-2024-12-17 scheduled for shutdown 2026-10-23 per OpenAI deprecations page); still serving as of 2026-05-17. OpenAI's recommended replacement on the developer community post is gpt-5.5; replaced_by_model_id set to in-file successor 'o3' (gpt-5.5 not yet in dataset). Cached input at 50% of base ($7.50/MTok). Batch API at flat 50% off input and output. Knowledge cutoff Oct 2023."
    },
    {
      "provider": "Google",
      "provider_url": "https://deepmind.google",
      "model_id": "gemini-2.5-flash",
      "display_name": "Gemini 2.5 Flash",
      "model_family": "Gemini 2.5",
      "knowledge_cutoff": "2025-01-31",
      "context_window": 1048576,
      "max_output_tokens": 65536,
      "input_per_mtok_usd": "0.30",
      "output_per_mtok_usd": "2.50",
      "audio_input_per_mtok_usd": "1.00",
      "cache_read_per_mtok_usd": "0.03",
      "cache_storage_per_mtok_per_hour_usd": "1.00",
      "batch_input_per_mtok_usd": "0.15",
      "batch_output_per_mtok_usd": "1.25",
      "modalities": {
        "input": ["text", "image", "audio", "video"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": true,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "vertex"],
      "confidence": "high",
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-18",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://ai.google.dev/gemini-api/docs/pricing",
      "notes": "Hybrid reasoning model with dynamic thinking on by default; thinking can be disabled via thinkingBudget=0. When thinking is on, response pricing is the sum of output and thinking tokens (both billed at the output rate). Audio input is priced separately (captured in audio_input_per_mtok_usd); cached audio input is $0.10/MTok vs $0.03/MTok for text/image/video. Batch Mode at flat 50% off; audio batch input is $0.50/MTok. No long-context tier. Context window and max_output_tokens confirmed via ai.google.dev/gemini-api/docs/models/gemini-2.5-flash. Knowledge cutoff January 2025. Explicit context caching storage is captured in cache_storage_per_mtok_per_hour_usd. Free tier (AI Studio) is published as per-minute RPM/TPM only, not per-day; free_tier omitted. Cross-verified prompt/completion price against openrouter.ai/google/gemini-2.5-flash."
    },
    {
      "provider": "Google",
      "provider_url": "https://deepmind.google",
      "model_id": "gemini-2.5-flash-lite",
      "display_name": "Gemini 2.5 Flash-Lite",
      "model_family": "Gemini 2.5",
      "knowledge_cutoff": "2025-01-31",
      "context_window": 1048576,
      "max_output_tokens": 65536,
      "input_per_mtok_usd": "0.10",
      "output_per_mtok_usd": "0.40",
      "audio_input_per_mtok_usd": "0.30",
      "cache_read_per_mtok_usd": "0.01",
      "cache_storage_per_mtok_per_hour_usd": "1.00",
      "batch_input_per_mtok_usd": "0.05",
      "batch_output_per_mtok_usd": "0.20",
      "modalities": {
        "input": ["text", "image", "audio", "video"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": true,
      "supports_audio_out": false,
      "supports_pdf": true,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native", "vertex"],
      "confidence": "high",
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-18",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://ai.google.dev/gemini-api/docs/pricing",
      "notes": "Hybrid reasoning model; thinking is OFF by default (unlike 2.5 Flash/Pro) but can be enabled by setting thinkingBudget. When thinking is enabled, response pricing is the sum of output and thinking tokens at the output rate, so reasoning_tokens_billed is true. Audio input is priced separately (captured in audio_input_per_mtok_usd); cached audio input is $0.03/MTok vs $0.01/MTok for text/image/video. Batch Mode at flat 50% off; audio batch input is $0.15/MTok. No long-context tier. Context window and max_output_tokens confirmed via ai.google.dev/gemini-api/docs/models/gemini-2.5-flash-lite. Knowledge cutoff January 2025. Explicit context caching storage is captured in cache_storage_per_mtok_per_hour_usd. Free tier (AI Studio) is published as per-minute RPM/TPM only, not per-day; free_tier omitted. Cross-verified prompt/completion price against openrouter.ai/google/gemini-2.5-flash-lite."
    },
    {
      "provider": "Google",
      "provider_url": "https://deepmind.google",
      "model_id": "gemini-2.0-flash",
      "display_name": "Gemini 2.0 Flash",
      "model_family": "Gemini 2.0",
      "knowledge_cutoff": "2024-08-31",
      "context_window": 1048576,
      "max_output_tokens": 8192,
      "input_per_mtok_usd": "0.10",
      "output_per_mtok_usd": "0.40",
      "audio_input_per_mtok_usd": "0.70",
      "cache_read_per_mtok_usd": "0.025",
      "cache_storage_per_mtok_per_hour_usd": "1.00",
      "batch_input_per_mtok_usd": "0.05",
      "batch_output_per_mtok_usd": "0.20",
      "modalities": {
        "input": ["text", "image", "audio", "video"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": true,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native", "vertex"],
      "deprecated_at": "2026-06-01",
      "replaced_by_model_id": "gemini-2.5-flash",
      "confidence": "medium",
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-18",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://ai.google.dev/gemini-api/docs/pricing",
      "notes": "Deprecated; scheduled shutdown 2026-06-01. Google's documented migration target is a Gemini 3 Flash preview model not yet in this dataset; replaced_by_model_id points at gemini-2.5-flash as the closest in-file successor. Standard production 2.0 Flash does not support thinking (thinking exists only on Gemini 2.5+ and 3 series per ai.google.dev/gemini-api/docs/thinking); reasoning_tokens_billed=false. Audio input is priced separately (captured in audio_input_per_mtok_usd); cached audio input is $0.175/MTok vs $0.025/MTok for text/image/video. Batch Mode at flat 50% off. No long-context tier. supports_pdf=false since the 2.0 Flash model card lists supported inputs as audio/images/video/text (PDF not enumerated). Confidence medium because Vertex AI's published pricing for the same model name differs ($0.15 input / $0.60 output) from AI Studio's $0.10/$0.40; AI Studio primary value retained per spec, and OpenRouter (openrouter.ai/google/gemini-2.0-flash-001) cross-confirms $0.10/$0.40. Explicit context caching storage is captured in cache_storage_per_mtok_per_hour_usd. Free tier (AI Studio) is published as per-minute RPM/TPM only, not per-day; free_tier omitted."
    },
    {
      "provider": "Meta",
      "provider_url": "https://www.llama.com",
      "model_id": "llama-4-maverick",
      "display_name": "Llama 4 Maverick",
      "model_family": "Llama 4",
      "knowledge_cutoff": "2024-08-31",
      "aliases": [
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "meta-llama/llama-4-maverick"
      ],
      "context_window": 1048576,
      "max_output_tokens": 8192,
      "input_per_mtok_usd": "0.27",
      "output_per_mtok_usd": "0.85",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["together"],
      "aggregators": ["openrouter"],
      "confidence": "high",
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-18",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct",
      "notes": "Multi-host pricing snapshot 2026-05-18: Together $0.27/$0.85 per MTok (meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8) is the row's structured price as the lowest direct-host rate. OpenRouter aggregator routes at $0.15/$0.60 (informational only; structured input/output stay at the lowest direct-host price per the PR4 convention) and is structured as aggregators[\"openrouter\"]. Groq does not list Maverick on its public pricing page; Fireworks does not offer Maverick on serverless (on-demand deployments only). Context window is Meta's published 1M (1048576 tokens); OpenRouter advertises 1.05M but the HuggingFace model card spec is 1M. max_output_tokens not published on the model card; defaulted to 8192. 17B activated / 400B total MoE with 128 experts. deployment_options[] omits hosts whose canonical slug or full pricing could not be confirmed in a single primary source on this date (Fireworks no-serverless, Groq absent)."
    },
    {
      "provider": "Meta",
      "provider_url": "https://www.llama.com",
      "model_id": "llama-4-scout",
      "display_name": "Llama 4 Scout",
      "model_family": "Llama 4",
      "knowledge_cutoff": "2024-08-31",
      "aliases": [
        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "meta-llama/llama-4-scout-17b-16e-instruct",
        "accounts/fireworks/models/llama4-scout-instruct-basic",
        "meta-llama/llama-4-scout"
      ],
      "context_window": 10485760,
      "max_output_tokens": 8192,
      "input_per_mtok_usd": "0.11",
      "output_per_mtok_usd": "0.34",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["together", "fireworks", "groq"],
      "aggregators": ["openrouter"],
      "confidence": "high",
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-18",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct",
      "notes": "Multi-host pricing snapshot 2026-05-18: Together $0.18/$0.59 per MTok (meta-llama/Llama-4-Scout-17B-16E-Instruct); Fireworks $0.15/$0.60 (accounts/fireworks/models/llama4-scout-instruct-basic); Groq $0.11/$0.34 (meta-llama/llama-4-scout-17b-16e-instruct) is the lowest direct-host rate and is the row's structured price. OpenRouter aggregator routes at $0.08/$0.30 (informational only; structured input/output stay at the lowest direct-host price per the PR4 convention) and is structured as aggregators[\"openrouter\"]. Context window is Meta's published 10M (10485760 tokens); most hosts cap below Meta's spec (e.g. Groq, Fireworks typically expose ~128K-1M at the API). max_output_tokens not published on the model card; defaulted to 8192. 17B activated / 109B total MoE with 16 experts."
    },
    {
      "provider": "Meta",
      "provider_url": "https://www.llama.com",
      "model_id": "llama-3.3-70b",
      "display_name": "Llama 3.3 70B Instruct",
      "model_family": "Llama 3.3",
      "knowledge_cutoff": "2023-12-31",
      "aliases": [
        "meta-llama/Llama-3.3-70B-Instruct",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "llama-3.3-70b-versatile",
        "meta-llama/llama-3.3-70b-instruct"
      ],
      "context_window": 131072,
      "max_output_tokens": 8192,
      "input_per_mtok_usd": "0.59",
      "output_per_mtok_usd": "0.79",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["together", "groq"],
      "aggregators": ["openrouter"],
      "confidence": "high",
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-18",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
      "notes": "Multi-host pricing snapshot 2026-05-18: Together $0.88/$0.88 per MTok (meta-llama/Llama-3.3-70B-Instruct-Turbo); Groq $0.59/$0.79 (llama-3.3-70b-versatile) is the lowest direct-host rate and is the row's structured price. Fireworks publishes $0.90 input on accounts/fireworks/models/llama-v3p3-70b-instruct but output price was not captured from a single primary source on this date so Fireworks is omitted from deployment_options. OpenRouter aggregator routes at $0.10/$0.32 (informational only; structured input/output stay at the lowest direct-host price per the PR4 convention) and is structured as aggregators[\"openrouter\"]. Context window 128K per Meta's spec (131072 tokens). Text-only; no vision. max_output_tokens not published on the model card; defaulted to 8192. Knowledge cutoff December 2023 per model card."
    },
    {
      "provider": "Mistral",
      "provider_url": "https://mistral.ai",
      "model_id": "mistral-large-2411",
      "display_name": "Mistral Large 2 (24.11)",
      "model_family": "Mistral Large",
      "aliases": [
        "mistral-large-latest",
        "mistral-large-2407",
        "mistral.mistral-large-2407-v1:0"
      ],
      "context_window": 131072,
      "max_output_tokens": 8192,
      "input_per_mtok_usd": "2.00",
      "output_per_mtok_usd": "6.00",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native", "bedrock", "vertex", "azure"],
      "last_verified": "2026-05-18",
      "last_changed_at": "2024-11-19",
      "deprecated_at": "2026-02-27",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://mistral.ai/news/mistral-large-2407",
      "notes": "La Plateforme rates ($2.00 / $6.00 per MTok) are the row's structured price. Multi-host availability: Bedrock (mistral.mistral-large-2407-v1:0 in us-west-2), Vertex AI, Azure AI Foundry, IBM watsonx. Bedrock published the 24.07 build only, not 24.11. Deprecated on La Plateforme 2026-02-27 (retirement 2026-05-31 per Mistral's legacy table); `mistral-large-latest` now resolves to Mistral Large 3 (not in this dataset). Text-only; no vision (Pixtral Large is the multimodal sibling). max_output_tokens not published on the model card; defaulted to 8192. Batch API is a 50% discount where available but per-model availability is not confirmed from a single primary source on this date, so batch fields are unset. Knowledge cutoff not published by Mistral."
    },
    {
      "provider": "Mistral",
      "provider_url": "https://mistral.ai",
      "model_id": "mistral-medium-2505",
      "display_name": "Mistral Medium 3",
      "model_family": "Mistral Medium",
      "aliases": ["mistral-medium-latest"],
      "context_window": 131072,
      "max_output_tokens": 8192,
      "input_per_mtok_usd": "0.40",
      "output_per_mtok_usd": "2.00",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native"],
      "last_verified": "2026-05-18",
      "last_changed_at": "2025-05-07",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://mistral.ai/news/mistral-medium-3",
      "notes": "La Plateforme rates ($0.40 / $2.00 per MTok) are the row's structured price. Mistral's launch post (2025-05-07) lists La Plateforme and Amazon SageMaker at GA with IBM watsonx, NVIDIA NIM, Azure AI Foundry, and Google Cloud Vertex as forthcoming; SageMaker is not in the deployment_options enum and Bedrock has not been confirmed, so deployment_options is restricted to native. Optimized for agentic and coding use cases. max_output_tokens not published on the model card; defaulted to 8192. Batch API discount per-model availability not confirmed on this date, so batch fields are unset. Knowledge cutoff not published by Mistral."
    },
    {
      "provider": "Mistral",
      "provider_url": "https://mistral.ai",
      "model_id": "mistral-small-2501",
      "display_name": "Mistral Small 3",
      "model_family": "Mistral Small",
      "aliases": ["mistralai/Mistral-Small-24B-Instruct-2501"],
      "context_window": 32768,
      "max_output_tokens": 8192,
      "input_per_mtok_usd": "0.10",
      "output_per_mtok_usd": "0.30",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native"],
      "last_verified": "2026-05-18",
      "last_changed_at": "2025-01-30",
      "deprecated_at": "2025-11-06",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://mistral.ai/news/mistral-small-3",
      "notes": "La Plateforme rates ($0.10 / $0.30 per MTok) are the row's structured price; per Mistral's launch post, half the price of the previous mistral-small ($0.20 / $0.60). 24B-parameter latency-optimized model under Apache 2.0; text-only. Context window 32K per Mistral's spec (33000 tokens rounded; 32768 used here). Deprecated on La Plateforme 2025-11-06 and retired 2025-11-30 per Mistral's legacy table; successor (mistral-small-2503 / 3.1 with vision and 128K context, and later Small 3.x builds) not yet in this dataset. max_output_tokens not published on the model card; defaulted to 8192. Batch API discount per-model availability not confirmed on this date, so batch fields are unset. Knowledge cutoff not published by Mistral."
    },
    {
      "provider": "Mistral",
      "provider_url": "https://mistral.ai",
      "model_id": "codestral-2508",
      "display_name": "Codestral 25.08",
      "model_family": "Codestral",
      "aliases": ["codestral-latest", "codestral-2"],
      "context_window": 262144,
      "max_output_tokens": 8192,
      "input_per_mtok_usd": "0.30",
      "output_per_mtok_usd": "0.90",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native", "vertex"],
      "last_verified": "2026-05-18",
      "last_changed_at": "2025-07-31",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://mistral.ai/news/codestral-25-08",
      "notes": "La Plateforme rates ($0.30 / $0.90 per MTok) are the row's structured price. Code-specialized model optimized for fill-in-the-middle (FIM), code completion, code correction, and test generation; supports tool use and structured output per the 25.08 release. 256K context (262144 tokens). Also available on Google Cloud Vertex AI Model Garden as `codestral-2` under the `mistralai` publisher (Mistral Docs: Vertex AI cloud deployments page). max_output_tokens not published on the model card; defaulted to 8192. Batch API discount per-model availability not confirmed on this date, so batch fields are unset. Knowledge cutoff not published by Mistral."
    },
    {
      "provider": "Mistral",
      "provider_url": "https://mistral.ai",
      "model_id": "pixtral-large-2411",
      "display_name": "Pixtral Large",
      "model_family": "Pixtral",
      "aliases": ["pixtral-large-latest", "mistral.pixtral-large-2502-v1:0"],
      "context_window": 131072,
      "max_output_tokens": 8192,
      "input_per_mtok_usd": "2.00",
      "output_per_mtok_usd": "6.00",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native", "bedrock"],
      "last_verified": "2026-05-18",
      "last_changed_at": "2024-11-18",
      "deprecated_at": "2026-02-27",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://mistral.ai/news/pixtral-large",
      "notes": "La Plateforme rates ($2.00 / $6.00 per MTok) are the row's structured price; pricing parity with Mistral Large 2 since Pixtral Large is the multimodal 124B-parameter open-weight model built on top of Mistral Large 2. Vision-capable: handles documents, charts, and natural images alongside text. Context window 128K (131072 tokens). Bedrock publishes the 25.02 refresh (`mistral.pixtral-large-2502-v1:0`, also routed via `us.mistral.pixtral-large-2502-v1:0`), not the 24.11 build. Deprecated on La Plateforme 2026-02-27 (retirement 2026-05-31 per Mistral's legacy table); Mistral's own news page now carries a \"this model is deprecated\" banner. Successor multimodal capability is absorbed by Mistral Large 3 (not in this dataset). max_output_tokens not published on the model card; defaulted to 8192. Vertex/Azure availability not confirmed for Pixtral Large on this date. Batch API discount per-model availability not confirmed on this date, so batch fields are unset. Knowledge cutoff not published by Mistral."
    },
    {
      "provider": "Cohere",
      "provider_url": "https://cohere.com",
      "model_id": "command-a-03-2025",
      "display_name": "Command A",
      "model_family": "Command A",
      "aliases": ["cohere.command-a-03-2025"],
      "context_window": 256000,
      "max_output_tokens": 8000,
      "input_per_mtok_usd": "2.50",
      "output_per_mtok_usd": "10.00",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native", "azure"],
      "confidence": "medium",
      "last_verified": "2026-05-18",
      "last_changed_at": "2025-03-01",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://docs.cohere.com/docs/command-a",
      "notes": "Cohere's flagship 111B-parameter model: 256K context, text-only, optimized for tool use, RAG, agents, and 23-language multilingual workloads. Price ($2.50 / $10.00 per MTok) per artificialanalysis.ai citing Cohere's API; Command A is not listed on cohere.com/pricing as of 2026-05-18 (the public table still shows the older Command R/R+ tier), so confidence is medium. Cohere docs note AWS Bedrock availability as \"Coming Soon\" (no Bedrock SKU yet, so `bedrock` is omitted from deployment_options); Azure AI Foundry availability is published but uses per-deployment IDs, so no Azure alias is encoded. Oracle OCI exposes it as `cohere.command-a-03-2025` (kept as alias). Cache and batch pricing not published by Cohere. Knowledge cutoff not published on the Cohere model card."
    },
    {
      "provider": "Cohere",
      "provider_url": "https://cohere.com",
      "model_id": "command-r-plus-08-2024",
      "display_name": "Command R+",
      "model_family": "Command R",
      "knowledge_cutoff": "2024-03-31",
      "aliases": ["command-r-plus", "cohere.command-r-plus-v1:0"],
      "context_window": 128000,
      "max_output_tokens": 4000,
      "input_per_mtok_usd": "2.50",
      "output_per_mtok_usd": "10.00",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native", "bedrock", "azure"],
      "last_verified": "2026-05-18",
      "last_changed_at": "2024-08-30",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://cohere.com/pricing",
      "notes": "Cohere Platform rates ($2.50 / $10.00 per MTok) are the row's structured price, listed on cohere.com/pricing as \"Command R+ 08-2024\". 128K context, text-only, optimized for complex RAG and multi-step tool use. Cohere's deprecations page sunsetted only the predecessor `command-r-plus-04-2024` on 2025-09-15 and names this 08-2024 build as the recommended replacement, so it is active on Cohere Platform. Bedrock SKU `cohere.command-r-plus-v1:0` launched Aug 2024 with a Mar 2024 knowledge cutoff (per Bedrock model card), matching this row; Bedrock marks the model \"Legacy\" with an EOL of 2026-08-19, which is a Bedrock-side lifecycle marker, not a Cohere deprecation. Azure AI Foundry availability published by Cohere; per-deployment IDs there, so no Azure alias is encoded. Cache and batch pricing not published by Cohere."
    },
    {
      "provider": "xAI",
      "provider_url": "https://x.ai",
      "model_id": "grok-4-0709",
      "display_name": "Grok 4",
      "model_family": "Grok 4",
      "knowledge_cutoff": "2024-11-30",
      "aliases": ["grok-4"],
      "context_window": 256000,
      "max_output_tokens": 256000,
      "input_per_mtok_usd": "3.00",
      "output_per_mtok_usd": "15.00",
      "cache_read_per_mtok_usd": "0.75",
      "pricing_tiers": [
        {
          "threshold_tokens": 128000,
          "input_per_mtok_usd": "6.00",
          "output_per_mtok_usd": "30.00"
        }
      ],
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native"],
      "deprecated_at": "2026-05-15",
      "replaced_by_model_id": "grok-4.3",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-15",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://docs.x.ai/developers/migration/may-15-retirement",
      "notes": "xAI native rates ($3.00 / $15.00 per MTok, $0.75 cached input) are the row's structured price; prompts above 128K total tokens are billed at the higher pricing_tiers rate ($6.00 / $30.00) per xAI's documented long-context tiering. Grok 4 (snapshot `grok-4-0709`, released 2025-07-09) was xAI's flagship reasoning model: reasoning is always on (thinking tokens billed at the output rate, hence reasoning_tokens_billed: true), parallel tool calling and structured outputs supported, accepts text and image inputs. max_output_tokens of 256000 reflects xAI's documented \"up to 256K tokens of output\" within the shared 256K prompt+response context. Retired from the xAI API on 2026-05-15 12:00 PM PT alongside seven other legacy slugs; requests to `grok-4-0709` and `grok-4` continue to resolve but are now redirected to `grok-4.3` with `low` reasoning effort and billed at grok-4.3 rates. Successor is `grok-4.3`, captured in this dataset and referenced via `replaced_by_model_id`. xAI's API is native-only (not on Bedrock/Vertex/Azure). Batch API not published for this model."
    },
    {
      "provider": "xAI",
      "provider_url": "https://x.ai",
      "model_id": "grok-3",
      "display_name": "Grok 3",
      "model_family": "Grok 3",
      "knowledge_cutoff": "2024-11-30",
      "context_window": 131072,
      "max_output_tokens": 131072,
      "input_per_mtok_usd": "3.00",
      "output_per_mtok_usd": "15.00",
      "cache_read_per_mtok_usd": "0.75",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native"],
      "deprecated_at": "2026-05-15",
      "replaced_by_model_id": "grok-4.3",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-15",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://docs.x.ai/developers/migration/may-15-retirement",
      "notes": "xAI native rates ($3.00 / $15.00 per MTok, $0.75 cached input) are the row's structured price (xAI's pricing page and mem0/pricepertoken aggregator both report $3/$15; artificialanalysis.ai reports a higher $4/$20 — choosing xAI-aligned figures). Grok 3 (released 2025-02-19) was xAI's flagship non-reasoning chat model; text-only inputs, function calling and structured outputs supported, 131,072-token combined prompt+response context window. Not a reasoning model (direct responses, no extended chain-of-thought; the reasoning sibling was `grok-3-mini`, not in this dataset). max_output_tokens defaulted to the documented context cap; xAI does not publish a separate max-output limit beyond the shared 131,072-token window. Retired from the xAI API on 2026-05-15 12:00 PM PT; requests to `grok-3` continue to resolve but are now redirected to `grok-4.3` with `none` reasoning effort and billed at grok-4.3 rates. Successor is `grok-4.3`, captured in this dataset and referenced via `replaced_by_model_id`. xAI's API is native-only. Batch API not published for this model."
    },
    {
      "provider": "xAI",
      "provider_url": "https://x.ai",
      "model_id": "grok-code-fast-1",
      "display_name": "Grok Code Fast 1",
      "model_family": "Grok Code",
      "context_window": 256000,
      "max_output_tokens": 256000,
      "input_per_mtok_usd": "0.20",
      "output_per_mtok_usd": "1.50",
      "cache_read_per_mtok_usd": "0.02",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native"],
      "deprecated_at": "2026-05-15",
      "replaced_by_model_id": "grok-4.3",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-15",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://docs.x.ai/developers/migration/may-15-retirement",
      "notes": "xAI native rates ($0.20 / $1.50 per MTok, $0.02 cached input) are the row's structured price. Grok Code Fast 1 (released 2025-08-26) was xAI's speedy, economical coding-specialized reasoning model: 314B-parameter MoE architecture, 256K combined prompt+response context, agentic coding focus, visible reasoning traces (`reasoning_content` field in streaming responses), function calling and structured outputs supported, text-only. Reasoning is enabled by default so reasoning tokens are billed at the output rate. max_output_tokens defaulted to the documented 256K context cap; xAI does not publish a separate max-output limit beyond the shared window. Retired from the xAI API on 2026-05-15 12:00 PM PT; requests to `grok-code-fast-1` continue to resolve but are now redirected to `grok-4.3` with `low` reasoning effort and billed at grok-4.3 rates. Successor is `grok-4.3`, captured in this dataset and referenced via `replaced_by_model_id`. xAI's API is native-only. Batch API not published for this model. Knowledge cutoff not published by xAI."
    },
    {
      "provider": "xAI",
      "provider_url": "https://x.ai",
      "model_id": "grok-4.3",
      "display_name": "Grok 4.3",
      "model_family": "Grok 4",
      "context_window": 1000000,
      "max_output_tokens": 1000000,
      "input_per_mtok_usd": "1.25",
      "output_per_mtok_usd": "2.50",
      "modalities": {
        "input": ["text", "image"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": true,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-15",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://docs.x.ai/docs/models",
      "notes": "xAI native rates ($1.25 / $2.50 per MTok) are the row's structured price, listed on docs.x.ai/docs/models and docs.x.ai/docs/pricing. Grok 4.3 is xAI's current flagship: positioned as \"the most intelligent and fastest model\" recommended for chat, coding, and general use across the Grok API. 1M-token combined prompt+response context window (a 4x expansion over the 256K window on Grok 4 / Grok Code Fast 1). Successor to `grok-4-0709`, `grok-3`, and `grok-code-fast-1`, all of which xAI retired on 2026-05-15 12:00 PM PT and now redirect to this SKU at varying default `reasoning_effort` levels (`low` for grok-4-0709 / grok-code-fast-1, `none` for grok-3). Thinking mode is exposed via the `reasoning_effort` parameter (`none` / `low` / `medium` / `high`); when reasoning is on, thinking tokens are billed at the output rate, hence reasoning_tokens_billed: true. Accepts text and image inputs, text output; parallel tool calling and structured outputs supported. max_output_tokens reflects the documented 1M-token shared window — xAI does not publish a separate max-output limit. Cached input price, knowledge cutoff, and release date are not published by xAI on the models or pricing pages as of last_verified; omitted rather than guessed. xAI's API is native-only (not on Bedrock / Vertex / Azure). Batch API not published for this model."
    },
    {
      "provider": "Alibaba",
      "provider_url": "https://www.alibabacloud.com/product/modelstudio",
      "model_id": "qwen3-max",
      "display_name": "Qwen3-Max",
      "model_family": "Qwen 3 Max",
      "knowledge_cutoff": "2025-06-30",
      "context_window": 262144,
      "max_output_tokens": 32768,
      "input_per_mtok_usd": "1.20",
      "output_per_mtok_usd": "6.00",
      "batch_input_per_mtok_usd": "0.60",
      "batch_output_per_mtok_usd": "3.00",
      "pricing_tiers": [
        {
          "threshold_tokens": 32768,
          "input_per_mtok_usd": "2.40",
          "output_per_mtok_usd": "12.00"
        },
        {
          "threshold_tokens": 131072,
          "input_per_mtok_usd": "3.00",
          "output_per_mtok_usd": "15.00"
        }
      ],
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": true,
      "deployment_options": ["dashscope"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2025-09-23",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
      "notes": "Alibaba DashScope (International) tiered pricing by input-token bucket: 0-32K = $1.20 input / $6.00 output per MTok (base row rate); 32K-128K = $2.40 / $12.00; 128K-252K = $3.00 / $15.00 (captured in pricing_tiers). Batch API discounts both input and output by 50% ($0.60 / $3.00 at the base tier). Context window 262,144 tokens (DashScope publishes 252K as the top-tier pricing ceiling; Qwen team and OpenRouter publish the full 262,144 model context). Max output 32,768 tokens. Released 2025-09-23 (`qwen3-max-2025-09-23`); knowledge cutoff 2025-06-30. Hybrid thinking model: thinking mode disabled by default but available via `/think` (and disabled via `/no_think`); when enabled, thinking tokens are billed at the output rate, hence reasoning_tokens_billed: true. Text-only inputs and outputs (the Qwen3-VL family is a separate set of model_ids). Tool calling and structured outputs supported via the DashScope and OpenAI-compatible endpoints. Explicit context cache discounts cached input tokens to 10% of the standard rate, but DashScope does not publish a single cache_read figure across the tiered input rates, so cache_read_per_mtok_usd is omitted rather than guessed. Deployment via DashScope (Model Studio) only at last_verified; not on Bedrock, Vertex, Together, Fireworks, or Groq as a first-party offering."
    },
    {
      "provider": "Alibaba",
      "provider_url": "https://www.alibabacloud.com/product/modelstudio",
      "model_id": "qwen3-coder-plus",
      "display_name": "Qwen3-Coder-Plus",
      "model_family": "Qwen 3 Coder",
      "context_window": 1048576,
      "max_output_tokens": 65536,
      "input_per_mtok_usd": "1.00",
      "output_per_mtok_usd": "5.00",
      "batch_input_per_mtok_usd": "0.50",
      "batch_output_per_mtok_usd": "2.50",
      "pricing_tiers": [
        {
          "threshold_tokens": 32768,
          "input_per_mtok_usd": "1.80",
          "output_per_mtok_usd": "9.00"
        },
        {
          "threshold_tokens": 131072,
          "input_per_mtok_usd": "3.00",
          "output_per_mtok_usd": "15.00"
        },
        {
          "threshold_tokens": 262144,
          "input_per_mtok_usd": "6.00",
          "output_per_mtok_usd": "60.00"
        }
      ],
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": true,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["dashscope"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2025-09-23",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
      "notes": "Alibaba DashScope (International) tiered pricing by input-token bucket: 0-32K = $1.00 / $5.00 per MTok (base row rate); 32K-128K = $1.80 / $9.00; 128K-256K = $3.00 / $15.00; 256K-1M = $6.00 / $60.00 (captured in pricing_tiers). Batch API discounts both input and output by 50% ($0.50 / $2.50 at the base tier). 1,000,000-token context window with 65,536 max output tokens. Released 2025-09-23 (`qwen3-coder-plus-2025-09-23`). Built on the Qwen3-Coder 480B-A35B MoE base; positioned for agentic coding (robust tool calling and environment interaction). Not a thinking/reasoning SKU (no chain-of-thought billing semantics), so reasoning_tokens_billed is false. Text-only modalities. Explicit context cache discounts cached input to 10% of the standard rate; implicit cache to 20%; DashScope does not publish a single cache_read figure across tiered input rates, so cache_read_per_mtok_usd is omitted rather than guessed. Knowledge cutoff not published. Deployment via DashScope (Model Studio) only at last_verified."
    },
    {
      "provider": "Perplexity",
      "provider_url": "https://www.perplexity.ai",
      "model_id": "sonar",
      "display_name": "Sonar",
      "model_family": "Sonar",
      "context_window": 128000,
      "max_output_tokens": 8000,
      "input_per_mtok_usd": "1.00",
      "output_per_mtok_usd": "1.00",
      "per_request_usd": "0.005",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": false,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://docs.perplexity.ai/getting-started/pricing",
      "notes": "Perplexity native rates: $1.00 input / $1.00 output per MTok. Web search is built into the API as a first-class capability rather than a user-defined tool, so total cost per query = token costs + a per-request fee. per_request_usd captures the low-context tier ($5 / 1,000 requests = $0.005); medium and high search-context tiers add $8 / $12 per 1,000 requests respectively (not captured structurally — single-value field). Perplexity does not publish a separate per-search fee for this SKU (per-search metering applies to `sonar-deep-research`), so per_search_usd is omitted. 128,000-token context window; max_output_tokens 8,000 per Perplexity's documented Sonar limits. Sonar (released January 2025) is built on a fine-tuned Llama 3.3 70B base optimized for web-grounded question answering with inline citations. Knowledge cutoff is intentionally omitted: Sonar fetches the live web at query time, so a static cutoff date does not meaningfully describe its answer space. Not a reasoning SKU (no chain-of-thought tokens), hence reasoning_tokens_billed is false. supports_tool_use is set conservatively to false because web search — the model's primary capability — is exposed as a built-in feature of the Sonar endpoint, not a user-defined tool; Perplexity recommends its separate Agent API for production tool-using agents. Structured outputs supported via response_format. Perplexity API is native-only (no Bedrock / Vertex / Azure / Together / Fireworks / Groq first-party deployment). Cache and batch APIs are not published."
    },
    {
      "provider": "Perplexity",
      "provider_url": "https://www.perplexity.ai",
      "model_id": "sonar-pro",
      "display_name": "Sonar Pro",
      "model_family": "Sonar",
      "context_window": 200000,
      "max_output_tokens": 8000,
      "input_per_mtok_usd": "3.00",
      "output_per_mtok_usd": "15.00",
      "per_request_usd": "0.006",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": false,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": false,
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://docs.perplexity.ai/getting-started/pricing",
      "notes": "Perplexity native rates: $3.00 input / $15.00 output per MTok. Web search is built into the API, not exposed as a user-defined tool; total cost per query = token costs + a per-request fee. per_request_usd captures the low-context tier ($6 / 1,000 requests = $0.006); medium and high search-context tiers add $10 / $14 per 1,000 requests respectively (not captured structurally — single-value field). 200,000-token context window (largest of the Sonar family); max_output_tokens 8,000 per Perplexity's documented Sonar limits. Positioned as Perplexity's advanced search SKU for complex multi-source queries and follow-ups. Knowledge cutoff intentionally omitted: Sonar Pro fetches the live web at query time. Not a reasoning SKU, hence reasoning_tokens_billed is false. supports_tool_use set conservatively to false because Perplexity exposes web search as the built-in capability and recommends the separate Agent API for production tool-using agents. Structured outputs supported via response_format. Perplexity API is native-only (no Bedrock / Vertex / Azure / Together / Fireworks / Groq first-party deployment). Cache and batch APIs are not published."
    },
    {
      "provider": "Perplexity",
      "provider_url": "https://www.perplexity.ai",
      "model_id": "sonar-reasoning-pro",
      "display_name": "Sonar Reasoning Pro",
      "model_family": "Sonar Reasoning",
      "context_window": 128000,
      "max_output_tokens": 8000,
      "input_per_mtok_usd": "2.00",
      "output_per_mtok_usd": "8.00",
      "per_request_usd": "0.006",
      "modalities": {
        "input": ["text"],
        "output": ["text"]
      },
      "supports_tool_use": false,
      "structured_output": true,
      "supports_vision": false,
      "supports_audio_in": false,
      "supports_audio_out": false,
      "supports_pdf": false,
      "reasoning_tokens_billed": true,
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://docs.perplexity.ai/getting-started/pricing",
      "notes": "Perplexity native rates: $2.00 input / $8.00 output per MTok. Web search is built into the API; total cost per query = token costs + a per-request fee. per_request_usd captures the low-context tier ($6 / 1,000 requests = $0.006); medium and high search-context tiers add $10 / $14 per 1,000 requests respectively (not captured structurally — single-value field). 128,000-token context window; max_output_tokens 8,000 per Perplexity's documented Sonar limits. Reasoning SKU built on DeepSeek R1 with Chain-of-Thought; responses include a leading `<think>` reasoning block followed by the answer, and those reasoning tokens are billed at the output rate, hence reasoning_tokens_billed: true. Knowledge cutoff intentionally omitted: Sonar Reasoning Pro fetches the live web at query time. supports_tool_use set conservatively to false because Perplexity exposes web search as the built-in capability and recommends the separate Agent API for production tool-using agents. Structured outputs supported via response_format. Perplexity API is native-only (no Bedrock / Vertex / Azure / Together / Fireworks / Groq first-party deployment). Cache and batch APIs are not published. Note: the older `sonar-reasoning` SKU is no longer listed in Perplexity's current model lineup at last_verified."
    }
  ]
}