{
  "version": 2,
  "license": "CC-BY-4.0",
  "models": [
    {
      "provider": "Deepgram",
      "provider_url": "https://deepgram.com",
      "model_id": "nova-3-monolingual",
      "display_name": "Nova-3 Monolingual",
      "price_per_minute_usd": "0.0048",
      "price_per_minute_batch_usd": "0.0043",
      "diarization_per_minute_usd": "0.002",
      "languages": ["en"],
      "streaming": true,
      "realtime": true,
      "diarization": "extra-cost",
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://deepgram.com/pricing",
      "notes": "Pay-as-you-go tier (English). Growth tier (volume commitment) is $0.0042/min streaming, $0.0036/min pre-recorded. Diarization add-on is captured in diarization_per_minute_usd."
    },
    {
      "provider": "Deepgram",
      "provider_url": "https://deepgram.com",
      "model_id": "nova-3-multilingual",
      "display_name": "Nova-3 Multilingual",
      "price_per_minute_usd": "0.0058",
      "price_per_minute_batch_usd": "0.0052",
      "diarization_per_minute_usd": "0.002",
      "languages": "61+",
      "streaming": true,
      "realtime": true,
      "diarization": "extra-cost",
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://deepgram.com/pricing",
      "notes": "Pay-as-you-go tier (multilingual, ~61 languages). Growth tier is $0.0050/min streaming, $0.0043/min pre-recorded. Diarization add-on is captured in diarization_per_minute_usd."
    },
    {
      "provider": "Deepgram",
      "provider_url": "https://deepgram.com",
      "model_id": "nova-3-medical",
      "display_name": "Nova-3 Medical",
      "price_per_minute_usd": "0.0048",
      "price_per_minute_batch_usd": "0.0043",
      "diarization_per_minute_usd": "0.002",
      "languages": [
        "en",
        "en-US",
        "en-AU",
        "en-CA",
        "en-GB",
        "en-IE",
        "en-IN",
        "en-NZ"
      ],
      "streaming": true,
      "realtime": true,
      "diarization": "extra-cost",
      "deployment_options": ["native"],
      "confidence": "medium",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://deepgram.com/learn/introducing-nova-3-medical-speech-to-text-api",
      "notes": "Medical-tuned Nova-3 for clinical transcription. English variants only (en, en-US, en-AU, en-CA, en-GB, en-IE, en-IN, en-NZ). Invoked via `model=nova-3-medical` in the Deepgram API. Pricing is not separately listed on the public pricing page; Deepgram's launch announcement quotes $0.0043/min pre-recorded, which matches the Nova-3 Monolingual batch rate. Streaming rate assumed equal to Nova-3 Monolingual ($0.0048/min PAYG); verify with Deepgram sales for production commitments."
    },
    {
      "provider": "AssemblyAI",
      "provider_url": "https://www.assemblyai.com",
      "model_id": "universal-2",
      "display_name": "Universal-2",
      "price_per_minute_usd": "0.0025",
      "languages": "99+",
      "streaming": false,
      "realtime": false,
      "diarization": "extra-cost",
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.assemblyai.com/pricing",
      "notes": "Pre-recorded (file-based) only — broadest AssemblyAI language coverage. Published as $0.15/hr. Diarization add-on +$0.02/hr. For real-time use, see universal-streaming."
    },
    {
      "provider": "AssemblyAI",
      "provider_url": "https://www.assemblyai.com",
      "model_id": "universal-streaming",
      "display_name": "Universal-Streaming",
      "price_per_minute_usd": "0.0025",
      "languages": ["en"],
      "streaming": true,
      "realtime": true,
      "diarization": "extra-cost",
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.assemblyai.com/pricing",
      "notes": "English-only streaming model. Published as $0.15/hr. Higher-tier streaming (Universal-3 Pro Streaming) is $0.45/hr ($0.0075/min)."
    },
    {
      "provider": "AssemblyAI",
      "provider_url": "https://www.assemblyai.com",
      "model_id": "universal-3-pro",
      "display_name": "Universal-3 Pro",
      "price_per_minute_usd": "0.0035",
      "languages": ["en", "es", "pt", "de", "fr", "it"],
      "streaming": false,
      "realtime": false,
      "diarization": "extra-cost",
      "deployment_options": ["native"],
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-26",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.assemblyai.com/pricing",
      "notes": "AssemblyAI's highest-accuracy pre-recorded model with native code-switching across EN/ES/PT/DE/FR/IT. Published as $0.21/hr. For real-time use see universal-3-pro-streaming."
    },
    {
      "provider": "AssemblyAI",
      "provider_url": "https://www.assemblyai.com",
      "model_id": "universal-3-pro-streaming",
      "display_name": "Universal-3 Pro Streaming",
      "price_per_minute_usd": "0.0075",
      "languages": ["en", "es", "pt", "de", "fr", "it"],
      "streaming": true,
      "realtime": true,
      "diarization": "extra-cost",
      "deployment_options": ["native"],
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-26",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.assemblyai.com/pricing",
      "notes": "Real-time streaming variant of Universal-3 Pro, positioned for voice agents. Multilingual with native code-switching across EN/ES/PT/DE/FR/IT. Published as $0.45/hr."
    },
    {
      "provider": "AssemblyAI",
      "provider_url": "https://www.assemblyai.com",
      "model_id": "universal-streaming-multilingual",
      "display_name": "Universal-Streaming Multilingual",
      "price_per_minute_usd": "0.0025",
      "languages": ["en", "es", "pt", "de", "fr", "it"],
      "streaming": true,
      "realtime": true,
      "diarization": "extra-cost",
      "deployment_options": ["native"],
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-26",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.assemblyai.com/pricing",
      "notes": "Multilingual streaming variant covering EN/ES/PT/DE/FR/IT at the same $0.15/hr rate as the English-only universal-streaming. Good balance of cost and latency for voice agents."
    },
    {
      "provider": "AssemblyAI",
      "provider_url": "https://www.assemblyai.com",
      "model_id": "whisper-streaming",
      "display_name": "Whisper-Streaming",
      "price_per_minute_usd": "0.0050",
      "languages": "99+",
      "streaming": true,
      "realtime": true,
      "diarization": "extra-cost",
      "deployment_options": ["native"],
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-26",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.assemblyai.com/pricing",
      "notes": "OpenAI Whisper served via AssemblyAI's streaming infrastructure with 99+ language coverage. Published as $0.30/hr."
    },
    {
      "provider": "Cartesia",
      "provider_url": "https://cartesia.ai",
      "model_id": "ink-1",
      "display_name": "Ink",
      "price_per_minute_usd": "0.003",
      "languages": "42+",
      "streaming": true,
      "realtime": true,
      "diarization": "unsupported",
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://cartesia.ai/pricing",
      "notes": "Cartesia's STT model. Published as $12 per 4000 minutes ($0.003/min). Same provider as the Sonic TTS model (real-time focused)."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "whisper-1",
      "display_name": "Whisper",
      "price_per_minute_usd": "0.006",
      "languages": "99+",
      "streaming": false,
      "realtime": false,
      "diarization": "unsupported",
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://openai.com/api/pricing/",
      "notes": "Billed to the nearest second. Pre-recorded only. openai.com pricing page returned 403 during automated fetch; rate cross-verified via OpenRouter (openai/whisper-1)."
    },
    {
      "provider": "Groq",
      "provider_url": "https://groq.com",
      "model_id": "whisper-large-v3",
      "display_name": "Whisper V3 Large (Groq)",
      "price_per_minute_usd": "0.00185",
      "languages": "99+",
      "streaming": false,
      "realtime": false,
      "diarization": "unsupported",
      "min_billed_seconds": 10,
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://groq.com/pricing/",
      "notes": "Whisper V3 Large hosted on Groq. Published as $0.111/hr. Speed factor 217x realtime."
    },
    {
      "provider": "Groq",
      "provider_url": "https://groq.com",
      "model_id": "whisper-large-v3-turbo",
      "display_name": "Whisper Large v3 Turbo (Groq)",
      "price_per_minute_usd": "0.000667",
      "languages": "99+",
      "streaming": false,
      "realtime": false,
      "diarization": "unsupported",
      "min_billed_seconds": 10,
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://groq.com/pricing/",
      "notes": "Whisper Large v3 Turbo hosted on Groq — faster variant. Published as $0.04/hr. Speed factor 228x realtime."
    },
    {
      "provider": "Microsoft Azure",
      "provider_url": "https://azure.microsoft.com",
      "model_id": "azure-speech-realtime",
      "display_name": "Azure Speech (Real-time)",
      "price_per_minute_usd": "0.016667",
      "languages": "100+",
      "streaming": true,
      "realtime": true,
      "diarization": "included",
      "deployment_options": ["azure"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/",
      "notes": "Azure Speech Standard (S0) real-time speech-to-text. Published as $1.00/hr pay-as-you-go (= $0.016667/min); custom real-time endpoint is $1.20/hr ($0.02/min). Commitment tiers reduce effective rate (2,000 hrs/mo $0.80/hr; 10,000 hrs/mo $0.65/hr; 50,000 hrs/mo $0.50/hr). Real-time diarization is included up to 240 min/session per Microsoft Learn quotas/limits doc. Languages: 100+ per Azure language support docs. Cross-verified via https://learn.microsoft.com/en-us/answers/questions/2155625/speech-to-text-costing-1-hr-is-crazy-no-bulk-avail."
    },
    {
      "provider": "Microsoft Azure",
      "provider_url": "https://azure.microsoft.com",
      "model_id": "azure-speech-batch",
      "display_name": "Azure Speech (Batch)",
      "price_per_minute_usd": "0.006",
      "languages": "100+",
      "streaming": false,
      "realtime": false,
      "diarization": "included",
      "deployment_options": ["azure"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/",
      "notes": "Azure Speech Standard (S0) batch transcription. Published as $0.36/hr (= $0.006/min); custom batch endpoint is $0.45/hr ($0.0075/min). Fast transcription (REST API, sync) is a separate $0.66/hr ($0.011/min) tier, not modelled here. Batch diarization is included (up to 240 min/file). Cross-verified via https://learn.microsoft.com/en-us/answers/questions/2155625/speech-to-text-costing-1-hr-is-crazy-no-bulk-avail."
    },
    {
      "provider": "Google",
      "provider_url": "https://cloud.google.com",
      "model_id": "chirp_2",
      "display_name": "Chirp 2",
      "price_per_minute_usd": "0.016",
      "languages": "20+",
      "streaming": true,
      "realtime": true,
      "diarization": "included",
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://cloud.google.com/speech-to-text/pricing",
      "notes": "Google Cloud Speech-to-Text v2 multilingual model. Standard tier $0.016/min for both real-time and batch (down from v1's $0.024/min, per https://cloud.google.com/blog/products/ai-machine-learning/google-cloud-speech-to-text-v2-api). Dynamic Batch tier (up to 24h SLA) is 75% lower at $0.004/min — not modelled as price_per_minute_batch_usd because standard batch is the same as real-time. Standard volume tiers can reduce effective rate to as low as $0.004/min. Supports StreamingRecognize (~20 languages), Recognize, and BatchRecognize (broadest language coverage) per https://docs.cloud.google.com/speech-to-text/docs/models/chirp-2. GA in us-central1, europe-west4, asia-southeast1."
    },
    {
      "provider": "Google",
      "provider_url": "https://cloud.google.com",
      "model_id": "chirp_3",
      "display_name": "Chirp 3",
      "price_per_minute_usd": "0.016",
      "languages": "98+",
      "streaming": true,
      "realtime": true,
      "diarization": "included",
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://cloud.google.com/speech-to-text/pricing",
      "notes": "Google Cloud Speech-to-Text v2 latest-generation generative ASR model. Standard tier $0.016/min for both real-time and batch; Dynamic Batch tier (up to 24h SLA) at $0.004/min (75% off) — not modelled as price_per_minute_batch_usd because standard batch is the same as real-time. Adds automatic language detection and diarization vs Chirp 2 per https://docs.cloud.google.com/speech-to-text/docs/models/chirp-3. 98+ languages and locales (24 GA + 74 preview); supports StreamingRecognize and BatchRecognize."
    },
    {
      "provider": "Speechmatics",
      "provider_url": "https://www.speechmatics.com",
      "model_id": "enhanced",
      "display_name": "Speechmatics Enhanced",
      "price_per_minute_usd": "0.004",
      "languages": "55+",
      "streaming": true,
      "realtime": true,
      "diarization": "included",
      "deployment_options": ["native"],
      "confidence": "medium",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.speechmatics.com/pricing",
      "notes": "Speechmatics offers two operating points — `enhanced` (highest accuracy) and `standard` (faster/cheaper) — selected via the `operating_point` API parameter on both Batch and Real-time APIs. Pricing page lists Pro tier from $0.24/hr ($0.004/min) on PAYG with volume discounts above 500 hrs/month; the same tier is used for both real-time and batch. Free plan includes 480 minutes/month (not modelled as `free_tier` because schema expects per-day/per-token quotas). Confidence is medium because the pricing page exposes tier names rather than per-model SKU rates; verify against contract for production. Cross-verified product structure via https://docs.speechmatics.com/."
    },
    {
      "provider": "Rev.ai",
      "provider_url": "https://www.rev.ai",
      "model_id": "whisper-fusion",
      "display_name": "Rev.ai Whisper Fusion",
      "price_per_minute_usd": "0.005",
      "languages": ["en"],
      "streaming": true,
      "realtime": true,
      "diarization": "included",
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.rev.ai/pricing",
      "notes": "Rev.ai's streaming transcription product, branded `Whisper Fusion` on the pricing page at $0.005/min; the parallel Whisper Large streaming tier is also $0.005/min. Free credits equivalent to 5 hours of Reverb ASR (cross-applicable across products). Reverb (batch) is modelled separately; see https://docs.rev.ai/ for the full API surface. English-primary; foreign language support is a distinct Reverb Foreign Language product line."
    },
    {
      "provider": "Rev.ai",
      "provider_url": "https://www.rev.ai",
      "model_id": "reverb",
      "display_name": "Rev.ai Reverb",
      "price_per_minute_usd": "0.0033",
      "languages": ["en"],
      "streaming": false,
      "realtime": false,
      "diarization": "included",
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.rev.ai/pricing",
      "notes": "Rev.ai's async/batch ASR model branded `Reverb` at $0.20/hr ($0.0033/min). A `Reverb Turbo` tier exists at $0.10/hr ($0.0017/min) — not modelled as a separate row since it's a latency/quality dial on the same product; `Reverb Foreign Language` ($0.30/hr, $0.005/min, 57+ languages) is also priced separately and could be added if needed. Free credits equivalent to 5 hours of Reverb ASR. See https://docs.rev.ai/ for the async transcription API."
    },
    {
      "provider": "Gladia",
      "provider_url": "https://www.gladia.io",
      "model_id": "solaria-1",
      "display_name": "Gladia Solaria-1",
      "price_per_minute_usd": "0.0125",
      "price_per_minute_batch_usd": "0.01017",
      "languages": "100+",
      "streaming": true,
      "realtime": true,
      "realtime_latency_ms": 300,
      "diarization": "included",
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.gladia.io/pricing",
      "notes": "Gladia's first-generation universal STT model `solaria-1`, supporting 100+ languages with automatic language detection and code-switching. Starter (PAYG) pricing: real-time $0.75/hr ($0.0125/min), async $0.61/hr (~$0.01017/min). Growth (committed) plan lowers real-time to $0.25/hr ($0.0042/min) and async to $0.20/hr ($0.0033/min). Sub-300ms streaming latency claimed on the pricing page. Speaker diarization and word-level timestamps included on all tiers. Starter includes 10 free hours per month. Model name confirmed via https://docs.gladia.io/."
    },
    {
      "provider": "Soniox",
      "provider_url": "https://soniox.com",
      "model_id": "stt-rt-v4",
      "display_name": "Soniox STT Real-time v4",
      "price_per_minute_usd": "0.002",
      "languages": "60+",
      "streaming": true,
      "realtime": true,
      "diarization": "included",
      "deployment_options": ["native"],
      "confidence": "medium",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://soniox.com/pricing",
      "notes": "Soniox real-time STT model `stt-rt-v4`. Primary billing metric is input audio tokens at $2.00 per 1M tokens; vendor approximates ~$0.12/hour which we use as $0.002/min for comparability. Aliased from `stt-rt-v3` (deprecated 2026-02-05; removed 2026-02-28 per https://soniox.com/docs/stt/models). 60+ languages with automatic language detection. Confidence medium because per-minute is an approximation of token-based pricing; actual cost varies with audio content density."
    },
    {
      "provider": "Soniox",
      "provider_url": "https://soniox.com",
      "model_id": "stt-async-v4",
      "display_name": "Soniox STT Async v4",
      "price_per_minute_usd": "0.00167",
      "languages": "60+",
      "streaming": false,
      "realtime": false,
      "diarization": "included",
      "max_audio_minutes_per_file": 300,
      "deployment_options": ["native"],
      "confidence": "medium",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://soniox.com/pricing",
      "notes": "Soniox async (file) STT model `stt-async-v4`. Primary billing metric is input audio tokens at $1.50 per 1M tokens; vendor approximates ~$0.10/hour which we use as $0.00167/min for comparability. Aliased from `stt-async-v3` (deprecated 2026-02-05; removed 2026-02-28 per https://soniox.com/docs/stt/models). Supports up to 5 hours of audio per request. 60+ languages with automatic language detection. Confidence medium because per-minute is an approximation of token-based pricing."
    }
  ]
}