{
  "version": 2,
  "license": "CC-BY-4.0",
  "models": [
    {
      "provider": "ElevenLabs",
      "provider_url": "https://elevenlabs.io",
      "model_id": "eleven_flash_v2_5",
      "display_name": "Eleven Flash v2.5",
      "price_per_1m_chars_usd": "50.0",
      "voice_quality": "neural",
      "languages": "32+",
      "ssml_supported": false,
      "voice_cloning": true,
      "output_formats": [
        "mp3_44100_128",
        "pcm_16000",
        "wav_44100",
        "opus_48000_128",
        "ulaw_8000",
        "alaw_8000"
      ],
      "time_to_first_byte_ms": 75,
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://elevenlabs.io/pricing/api",
      "notes": "Current low-latency flagship; eleven_turbo_v2_5 is deprecated and replaced by Flash v2.5. Pay-as-you-go rate $0.05/1K chars. ~10K voices available. Plain text input only (no SSML)."
    },
    {
      "provider": "ElevenLabs",
      "provider_url": "https://elevenlabs.io",
      "model_id": "eleven_multilingual_v2",
      "display_name": "Eleven Multilingual v2",
      "price_per_1m_chars_usd": "100.0",
      "voice_quality": "neural",
      "languages": "29+",
      "ssml_supported": false,
      "voice_cloning": true,
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://elevenlabs.io/docs/overview/models",
      "notes": "High-quality professional model for audiobooks, video narration, and rich emotional expression. 29 languages, max 10,000 chars per request. Pay-as-you-go billed at 1 credit per character; Flash/Turbo v2.5 are billed at 0.5 credits/char (hence 2x the Flash $50/1M-chars rate). Higher latency than Flash; not recommended for real-time agents."
    },
    {
      "provider": "ElevenLabs",
      "provider_url": "https://elevenlabs.io",
      "model_id": "eleven_v3",
      "display_name": "Eleven v3",
      "price_per_1m_chars_usd": "100.0",
      "voice_quality": "neural",
      "languages": "70+",
      "ssml_supported": false,
      "emotion_control_supported": true,
      "voice_cloning": true,
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://elevenlabs.io/docs/overview/models",
      "notes": "Most expressive ElevenLabs TTS model (GA after alpha). 70+ languages, max 5,000 chars per request. Supports inline audio tags ([whispers], [sighs], [laughs], [happily]) for emotion/delivery control instead of SSML. Higher latency than Flash/Turbo v2.5 — ElevenLabs explicitly recommends v2.5 Flash/Turbo for real-time use. Pay-as-you-go billed at 1 credit/char (same multiplier as Multilingual v2). PVCs (professional voice clones) not yet fully optimized for v3."
    },
    {
      "provider": "ElevenLabs",
      "provider_url": "https://elevenlabs.io",
      "model_id": "eleven_turbo_v2_5",
      "display_name": "Eleven Turbo v2.5",
      "price_per_1m_chars_usd": "50.0",
      "voice_quality": "neural",
      "languages": "32+",
      "ssml_supported": false,
      "voice_cloning": true,
      "deprecated_at": "2026-05-19",
      "replaced_by_model_id": "eleven_flash_v2_5",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://elevenlabs.io/docs/overview/models",
      "notes": "Deprecated per ElevenLabs models page — outclassed by and replaced by eleven_flash_v2_5. Still callable but not recommended for new applications. No official sunset date published; deprecated_at reflects verification date. Pay-as-you-go billed at 0.5 credits/char (same as Flash v2.5)."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "tts-1",
      "display_name": "TTS-1",
      "price_per_1m_chars_usd": "15.0",
      "voice_quality": "neural",
      "languages": ["en"],
      "ssml_supported": false,
      "voice_cloning": false,
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://openai.com/api/pricing/",
      "notes": "Standard quality. The newer gpt-4o-mini-tts model is also available in OpenAI's API; consider adding when its pricing structure stabilizes."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "tts-1-hd",
      "display_name": "TTS-1 HD",
      "price_per_1m_chars_usd": "30.0",
      "voice_quality": "neural",
      "languages": ["en"],
      "ssml_supported": false,
      "voice_cloning": false,
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://openai.com/api/pricing/",
      "notes": "High-definition tier — 2x the price of tts-1 for higher quality output."
    },
    {
      "provider": "OpenAI",
      "provider_url": "https://openai.com",
      "model_id": "gpt-4o-mini-tts",
      "display_name": "GPT-4o mini TTS",
      "price_per_1m_chars_usd": "20.0",
      "voice_quality": "neural",
      "languages": ["en"],
      "ssml_supported": false,
      "voice_cloning": false,
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://developers.openai.com/api/docs/models/gpt-4o-mini-tts",
      "notes": "OpenAI's newer GPT-4o-based TTS. Native pricing is token-based — $0.60/1M text-input tokens + $12/1M audio-output tokens — not per character. OpenAI's published estimate is ~$0.015 per minute of audio; converted to ~$20/1M chars assuming ~150 WPM (~750 chars/min) for consistency with the Cartesia row. Actual $/1M chars varies with speech rate and language. Supports voice steering via natural-language instructions (style/emotion) instead of SSML. Latest snapshot gpt-4o-mini-tts-2025-12-15. Max input 2,000 tokens per request."
    },
    {
      "provider": "Cartesia",
      "provider_url": "https://cartesia.ai",
      "model_id": "sonic-3.5",
      "display_name": "Sonic 3.5",
      "price_per_1m_chars_usd": "50.0",
      "voice_quality": "neural",
      "languages": "42+",
      "ssml_supported": false,
      "voice_cloning": true,
      "output_formats": [
        "raw/pcm_f32le",
        "raw/pcm_s16le",
        "raw/pcm_mulaw",
        "raw/pcm_alaw",
        "wav",
        "mp3"
      ],
      "time_to_first_byte_ms": 90,
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://cartesia.ai/pricing",
      "notes": "Cartesia publishes Sonic pricing as $1 per 25 minutes of audio output (~$0.04/min). Converted to ~$50/1M chars assuming ~150 WPM (~750 chars/min). Actual $/1M chars varies with speech rate; verify by sampling. IVC voice cloning included (no clone fee). 90ms TTFB. SSML tags (speed/volume/break/spell/emotion) are documented on sonic-3 but temporarily disabled on sonic-3.5 per https://docs.cartesia.ai/build-with-cartesia/sonic-3/ssml-tags (checked 2026-05-16); flip ssml_supported back to true once upstream re-enables."
    },
    {
      "provider": "Cartesia",
      "provider_url": "https://cartesia.ai",
      "model_id": "sonic-2",
      "display_name": "Sonic 2",
      "price_per_1m_chars_usd": "50.0",
      "voice_quality": "neural",
      "languages": "15+",
      "ssml_supported": false,
      "voice_cloning": true,
      "time_to_first_byte_ms": 90,
      "replaced_by_model_id": "sonic-3.5",
      "confidence": "medium",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://docs.cartesia.ai/build-with-cartesia/tts-models/older-models",
      "notes": "Predecessor to sonic-3.5; still stable and callable, but Cartesia recommends sonic-3.5 for new builds. Latest snapshot sonic-2-2025-06-11. 8 core stable languages (en, fr, de, es, pt, zh, ja, ko); 7 additional languages reach EOL 2026-06-01. 90ms model latency. Higher-fidelity voice cloning capability. Pricing assumed equal to sonic-3.5 (15 credits/sec of audio); confidence medium because Cartesia did not publish a separate per-model rate. Verify by sampling if cost-critical."
    },
    {
      "provider": "Cartesia",
      "provider_url": "https://cartesia.ai",
      "model_id": "sonic-turbo",
      "display_name": "Sonic Turbo",
      "price_per_1m_chars_usd": "50.0",
      "voice_quality": "neural",
      "languages": "15+",
      "ssml_supported": false,
      "voice_cloning": true,
      "time_to_first_byte_ms": 40,
      "replaced_by_model_id": "sonic-3.5",
      "confidence": "medium",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://docs.cartesia.ai/build-with-cartesia/tts-models/older-models",
      "notes": "Lowest-latency Sonic variant (~40ms TTFB). Still stable and callable, but Cartesia recommends sonic-3.5 for new builds. Latest snapshot sonic-turbo-2025-06-04. 9 stable languages; 6 additional languages reach EOL 2026-06-01. Pricing assumed equal to sonic-3.5 (15 credits/sec of audio); confidence medium because Cartesia did not publish a separate per-model rate. Verify by sampling if cost-critical."
    },
    {
      "provider": "Groq",
      "provider_url": "https://groq.com",
      "model_id": "canopy-labs-orpheus-english",
      "display_name": "Canopy Labs Orpheus English (Groq)",
      "price_per_1m_chars_usd": "22.0",
      "voice_quality": "neural",
      "languages": ["en"],
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://groq.com/pricing/",
      "notes": "Hosted on Groq. Output speed ~100 characters/second."
    },
    {
      "provider": "Groq",
      "provider_url": "https://groq.com",
      "model_id": "canopy-labs-orpheus-arabic-saudi",
      "display_name": "Canopy Labs Orpheus Arabic Saudi (Groq)",
      "price_per_1m_chars_usd": "40.0",
      "voice_quality": "neural",
      "languages": ["ar-SA"],
      "last_verified": "2026-05-05",
      "last_changed_at": "2026-05-05",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://groq.com/pricing/",
      "notes": "Hosted on Groq. Saudi Arabic variant. Output speed ~100 characters/second."
    },
    {
      "provider": "Google",
      "provider_url": "https://cloud.google.com",
      "model_id": "google-tts-studio",
      "display_name": "Google Cloud TTS — Studio",
      "price_per_1m_chars_usd": "160.0",
      "voice_quality": "neural",
      "languages": "40+",
      "ssml_supported": true,
      "voice_cloning": false,
      "deployment_options": ["native", "vertex"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://cloud.google.com/text-to-speech/pricing",
      "notes": "Google's premium TTS tier for professional media production (long-form narration, advertising). Vendor price $0.000160/char = $160/1M chars; the single-speaker Studio class is GA and the multispeaker class is experimental per https://docs.cloud.google.com/text-to-speech/docs/voices. SSML supported except <mark>, <emphasis>, <prosody pitch>, and <lang>. Model_id is a Hail-coined tier slug (Google bills per-voice-tier rather than per API model name). Free tier: first 100K chars/month included (not representable as tokens_per_day in schema)."
    },
    {
      "provider": "Google",
      "provider_url": "https://cloud.google.com",
      "model_id": "google-tts-neural2",
      "display_name": "Google Cloud TTS — Neural2",
      "price_per_1m_chars_usd": "16.0",
      "voice_quality": "neural",
      "languages": "40+",
      "ssml_supported": true,
      "voice_cloning": false,
      "deployment_options": ["native", "vertex"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://cloud.google.com/text-to-speech/pricing",
      "notes": "Google's recommended general-purpose neural tier; same per-char rate as WaveNet but newer architecture. Vendor price $0.000016/char = $16/1M chars. SSML fully supported. Model_id is a Hail-coined tier slug (Google bills per-voice-tier rather than per API model name). Free tier: first 1M chars/month included (not representable as tokens_per_day in schema)."
    },
    {
      "provider": "Google",
      "provider_url": "https://cloud.google.com",
      "model_id": "google-tts-wavenet",
      "display_name": "Google Cloud TTS — WaveNet",
      "price_per_1m_chars_usd": "16.0",
      "voice_quality": "neural",
      "languages": "40+",
      "ssml_supported": true,
      "voice_cloning": false,
      "deployment_options": ["native", "vertex"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://cloud.google.com/text-to-speech/pricing",
      "notes": "Original neural-net voice family from DeepMind; not deprecated as of 2026-05-19 per Cloud TTS release notes (https://docs.cloud.google.com/text-to-speech/docs/release-notes) but Google recommends Neural2 for new projects at the same $16/1M chars rate. SSML fully supported. Model_id is a Hail-coined tier slug. Free tier: first 1M chars/month included."
    },
    {
      "provider": "Google",
      "provider_url": "https://cloud.google.com",
      "model_id": "google-tts-chirp-3-hd",
      "display_name": "Google Cloud TTS — Chirp 3: HD",
      "price_per_1m_chars_usd": "30.0",
      "voice_quality": "neural",
      "languages": "30+",
      "ssml_supported": false,
      "streaming_supported": true,
      "voice_cloning": false,
      "deployment_options": ["native", "vertex"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://cloud.google.com/text-to-speech/pricing",
      "notes": "Google's newest-generation TTS family with 30 voice styles in 30+ languages. Vendor price $0.000030/char = $30/1M chars. Per https://docs.cloud.google.com/text-to-speech/docs/chirp3-hd, Chirp 3: HD explicitly does NOT support SSML, speaking-rate adjustments, or pitch parameters; streaming synthesis IS supported. Model_id is a Hail-coined tier slug. Free tier: first 1M chars/month included."
    },
    {
      "provider": "Microsoft Azure",
      "provider_url": "https://azure.microsoft.com",
      "model_id": "azure-tts-neural",
      "display_name": "Azure AI Speech — Neural",
      "price_per_1m_chars_usd": "16.0",
      "voice_quality": "neural",
      "languages": "100+",
      "ssml_supported": true,
      "streaming_supported": true,
      "voice_cloning": false,
      "deployment_options": ["azure"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/speech-services/",
      "notes": "Azure's standard neural TTS tier (called 'Neural' on the pricing page; 'Standard voice' in docs). 500+ prebuilt voices across 100+ locales per https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech. S0 pay-as-you-go price $16/1M chars for both real-time and batch synthesis (HD, AOAI, Custom Neural Voice, and Personal Voice priced separately). Full SSML support. Chinese characters counted as 2 chars for billing. Free tier (F0): 500K chars/month."
    },
    {
      "provider": "Microsoft Azure",
      "provider_url": "https://azure.microsoft.com",
      "model_id": "azure-tts-hd",
      "display_name": "Azure AI Speech — Neural HD (DragonHD)",
      "aliases": ["dragon-hd"],
      "price_per_1m_chars_usd": "22.0",
      "voice_quality": "neural",
      "voices_count": 30,
      "languages": ["en-US", "zh-CN", "de-DE", "es-ES", "fr-FR", "ja-JP"],
      "ssml_supported": false,
      "emotion_control_supported": true,
      "streaming_supported": true,
      "voice_cloning": false,
      "deployment_options": ["azure"],
      "confidence": "medium",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-03-01",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/speech-services/",
      "notes": "Azure's premium HD neural tier (DragonHD architecture, 30+ GA voices). Per https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/azure-speech-%E2%80%93-neural-hd-text-to-speech-recent-voice-updates/4505380, Azure reduced Neural HD pricing to $22/1M chars effective March 2026 (down from $30/1M). Latency <300ms, real-time only. SSML support is partial (no <prosody>, <emphasis>, <break>); we mark ssml_supported=false because the elements most callers want are unsupported. Automatic emotion/sentiment detection drives delivery (emotion_control_supported=true). DragonHDOmni (700+ voices, mixed GA/preview) and DragonHDFlash (en-US/zh-CN only) are distinct models tracked separately if added later. Confidence medium because the pricing-page value was sourced via third-party recap (techcommunity blog) — verify on the live Azure pricing page before high-volume use."
    },
    {
      "provider": "Inworld",
      "provider_url": "https://inworld.ai",
      "model_id": "inworld-tts-2",
      "display_name": "Inworld Realtime TTS-2",
      "price_per_1m_chars_usd": "35.0",
      "voice_quality": "neural",
      "languages": "100+",
      "streaming_supported": true,
      "voice_cloning": true,
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://inworld.ai/pricing",
      "notes": "Inworld's newest TTS model (Research Preview) with natural-language steering and 100+ language support. Base PAYG rate $35/1M chars; Developer tier $30, Growth $25, Enterprise as low as $10/1M. ~200ms latency. Instant voice cloning, custom pronunciation, timestamp alignment, and zero data retention included. The older inworld-tts-1 and inworld-tts-1-max are deprecated per https://docs.inworld.ai/tts/tts-models — migrate to inworld-tts-1.5-max or inworld-tts-1.5-mini if -2's research-preview status is a concern."
    },
    {
      "provider": "Smallest.ai",
      "provider_url": "https://smallest.ai",
      "model_id": "lightning-v3.1",
      "display_name": "Lightning v3.1",
      "price_per_1m_chars_usd": "25.0",
      "voice_quality": "neural",
      "voices_count": 217,
      "languages": [
        "en",
        "hi",
        "es",
        "mr",
        "kn",
        "ta",
        "bn",
        "gu",
        "te",
        "ml",
        "pa",
        "or"
      ],
      "streaming_supported": true,
      "voice_cloning": true,
      "output_formats": ["pcm", "wav", "mp3", "mulaw"],
      "sample_rates_hz": [8000, 16000, 24000, 44100],
      "time_to_first_byte_ms": 200,
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://smallest.ai/pricing",
      "notes": "Smallest.ai's current TTS model (44.1 kHz native, ~200ms TTFB). Vendor rate ~$0.25/10k chars = $25/1M chars. 217 voices across 12 languages: English, Hindi, Spanish, and 9 Indian languages (Marathi, Kannada, Tamil, Bengali, Gujarati, Telugu, Malayalam, Punjabi, Odia). Lightning v2 and lightning-large are deprecated per https://docs.smallest.ai/waves/documentation/getting-started/models — new integrations should use lightning-v3.1. WebSocket streaming for real-time/conversational use. Instant + professional voice cloning supported. On-prem available on Enterprise plan."
    },
    {
      "provider": "Rime",
      "provider_url": "https://rime.ai",
      "model_id": "mistv3",
      "display_name": "Rime Mist v3",
      "price_per_1m_chars_usd": "30.0",
      "voice_quality": "neural",
      "voices_count": 94,
      "languages": ["en"],
      "streaming_supported": true,
      "voice_cloning": false,
      "time_to_first_byte_ms": 100,
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://rime.ai/pricing",
      "notes": "Rime's current Mist-family flagship; English-only with sub-100ms TTFB. Pricing-page rate $0.03/1K chars = $30/1M chars (the 'Mist' line on https://rime.ai/pricing). 94 voices. Custom pronunciation is on mistv2 but not yet on mistv3 per https://docs.rime.ai/api-reference/models. Coda ($0.05/1K = $50/1M, 184 voices, 6 languages incl. ES/FR/PT/DE/JA) and Arcana ($0.04/1K = $40/1M, multilingual, 94 voices) are distinct higher-tier models tracked separately if added later. Voice cloning not documented for Mist; available via Enterprise plan for custom voices."
    },
    {
      "provider": "LMNT",
      "provider_url": "https://www.lmnt.com",
      "model_id": "blizzard",
      "display_name": "LMNT Blizzard",
      "price_per_1m_chars_usd": "50.0",
      "voice_quality": "neural",
      "languages": "31+",
      "streaming_supported": true,
      "voice_cloning": true,
      "deployment_options": ["native"],
      "confidence": "medium",
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.lmnt.com/pricing",
      "notes": "LMNT's flagship Blizzard 2.0 model (canonical model_id 'blizzard' per https://docs.lmnt.com/models/overview). 31 languages with accent control, word timestamps, streaming, voice cloning, and speech sessions. Confidence medium because LMNT publishes plan-bundled pricing (Indie $10/mo for 200K chars + $0.05/1K overage; Pro $49/mo + $0.045/1K overage; Premium $199/mo + $0.035/1K overage) rather than a standalone PAYG per-char rate — $50/1M shown here is the Indie-tier overage rate. Free tier includes 15K characters/month with no overage rate (not representable as tokens_per_day in schema). Premium tier overage is $0.035/1K = $35/1M — large customers should benchmark on their own plan."
    },
    {
      "provider": "Deepgram",
      "provider_url": "https://deepgram.com",
      "model_id": "aura-2",
      "display_name": "Deepgram Aura 2",
      "price_per_1m_chars_usd": "30.0",
      "voice_quality": "neural",
      "languages": ["en", "es", "de", "fr", "nl", "it", "ja"],
      "streaming_supported": true,
      "voice_cloning": false,
      "output_formats": ["wav", "mp3", "linear16", "mulaw", "alaw", "opus"],
      "deployment_options": ["native"],
      "last_verified": "2026-05-19",
      "last_changed_at": "2026-05-19",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://deepgram.com/pricing",
      "notes": "Deepgram's current TTS family, addressed as 'aura-2-<voice>-<lang>' (e.g., aura-2-thalia-en) per https://developers.deepgram.com/docs/tts-models. Vendor rate $0.030/1K chars = $30/1M chars on Pay-As-You-Go; Growth tier $0.027/1K = $27/1M. Voice counts by language: en 40+ (incl. Aura 1 legacy), es 15+ (Early Access), nl 8, fr 2, de 7, it 10, ja 4. Free tier ships $200 of signup credit applicable to all products (not representable as tokens_per_day in schema). Aura 1 voices remain callable but Deepgram recommends Aura 2 for new integrations."
    },
    {
      "provider": "Resemble AI",
      "provider_url": "https://www.resemble.ai",
      "model_id": "chatterbox-turbo",
      "display_name": "Resemble Chatterbox Turbo",
      "price_per_second_usd": "0.0005",
      "voice_quality": "neural",
      "languages": ["en"],
      "voice_cloning": true,
      "deployment_options": ["native"],
      "confidence": "medium",
      "last_verified": "2026-05-26",
      "last_changed_at": "2026-05-26",
      "verification_method": "manual-confirmed",
      "verified_by": "r13i",
      "source_url": "https://www.resemble.ai/pricing",
      "notes": "Resemble bills TTS per second of generated audio at a flat $0.0005/sec on their Flex (pay-as-you-go) plan; the rate is not split per model. Chatterbox Turbo is Resemble's flagship English TTS per https://www.resemble.ai (also open-sourced at https://github.com/resemble-ai/chatterbox, 'SoTA open-source TTS'). Confidence medium because the pricing page lists the rate against the service category 'Text-to-speech' rather than naming Chatterbox Turbo specifically, and the API's exact 'model' parameter slug was not verified against live docs. Multilingual ('Chatterbox Multilingual') and dramatic-read ('DramaBox') variants are marketed separately; modeled here as the English flagship only."
    }
  ]
}