{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/ClientMessageAssistantSpeech",
  "title": "ClientMessageAssistantSpeech",
  "type": "object",
  "properties": {
    "phoneNumber": {
      "description": "This is the phone number that the message is associated with.",
      "oneOf": [
        {
          "$ref": "#/components/schemas/CreateByoPhoneNumberDTO",
          "title": "ByoPhoneNumber"
        },
        {
          "$ref": "#/components/schemas/CreateTwilioPhoneNumberDTO",
          "title": "TwilioPhoneNumber"
        },
        {
          "$ref": "#/components/schemas/CreateVonagePhoneNumberDTO",
          "title": "VonagePhoneNumber"
        },
        {
          "$ref": "#/components/schemas/CreateVapiPhoneNumberDTO",
          "title": "VapiPhoneNumber"
        },
        {
          "$ref": "#/components/schemas/CreateTelnyxPhoneNumberDTO",
          "title": "TelnyxPhoneNumber"
        }
      ]
    },
    "type": {
      "type": "string",
      "description": "This is the type of the message. \"assistant-speech\" is sent as assistant audio is being played.",
      "enum": [
        "assistant.speechStarted"
      ]
    },
    "text": {
      "type": "string",
      "description": "The full assistant text for the current turn. This is the complete text,\nnot an incremental delta \u2014 consumers should use `timing` metadata (e.g.\n`wordsSpoken`) to determine which portion has been spoken so far."
    },
    "turn": {
      "type": "number",
      "description": "This is the turn number of the assistant speech event (0-indexed)."
    },
    "source": {
      "type": "string",
      "description": "Indicates how the text was sourced.",
      "enum": [
        "model",
        "force-say",
        "custom-voice"
      ]
    },
    "timing": {
      "description": "Optional timing metadata. Shape depends on `timing.type`:\n\n- `word-alignment` (ElevenLabs): per-character timing at playback\n  cadence. words[] includes space entries. Best consumed by tracking\n  a running character count: join timing.words, add to a char cursor,\n  and highlight text up to that position. No interpolation needed.\n\n- `word-progress` (Minimax with voice.subtitleType: 'word'): cursor-\n  based word count per TTS segment. Use wordsSpoken as the anchor,\n  interpolate forward using segmentDurationMs or timing.words until\n  the next event arrives.\n\nWhen absent, the event is a text-only fallback for providers without\nword-level timing (e.g. Cartesia, Deepgram, Azure). Text emits once\nper TTS chunk when audio is playing. Optionally interpolate a word\ncursor at ~3.5 words/sec between events for approximate tracking.",
      "oneOf": [
        {
          "$ref": "#/components/schemas/AssistantSpeechWordAlignmentTiming",
          "title": "WordAlignmentTiming"
        },
        {
          "$ref": "#/components/schemas/AssistantSpeechWordProgressTiming",
          "title": "WordProgressTiming"
        }
      ],
      "discriminator": {
        "propertyName": "type"
      }
    },
    "timestamp": {
      "type": "number",
      "description": "This is the timestamp of the message."
    },
    "call": {
      "description": "This is the call that the message is associated with.",
      "allOf": [
        {
          "$ref": "#/components/schemas/Call"
        }
      ]
    },
    "customer": {
      "description": "This is the customer that the message is associated with.",
      "allOf": [
        {
          "$ref": "#/components/schemas/CreateCustomerDTO"
        }
      ]
    },
    "assistant": {
      "description": "This is the assistant that the message is associated with.",
      "allOf": [
        {
          "$ref": "#/components/schemas/CreateAssistantDTO"
        }
      ]
    }
  },
  "required": [
    "type",
    "text"
  ]
}