{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://api-evangelist.com/schemas/vapi/vapi-assistant-schema.json",
  "title": "Vapi Assistant",
  "description": "JSON Schema for the Vapi Assistant resource as returned by the Vapi API.",
  "type": "object",
  "properties": {
    "transcriber": {
      "description": "These are the options for the assistant's transcriber.",
      "oneOf": [
        {
          "type": "object",
          "properties": {
            "provider": {
              "type": "string",
              "description": "This is the transcription provider that will be used.",
              "enum": [
                "assembly-ai"
              ]
            },
            "language": {
              "type": "string",
              "description": "This is the language that will be set for the transcription.",
              "enum": [
                "multi",
                "en"
              ]
            },
            "confidenceThreshold": {
              "type": "number",
              "description": "Transcripts below this confidence threshold will be discarded.\n\n@default 0.4"
            },
            "formatTurns": {
              "type": "boolean",
              "description": "This enables formatting of transcripts.\n\n@default true"
            },
            "endOfTurnConfidenceThreshold": {
              "type": "number",
              "description": "This is the end of turn confidence threshold. The minimum confidence that the end of turn is detected.\nNote: Only used if startSpeakingPlan.smartEndpointingPlan is not set.\n@min 0\n@max 1\n@default 0.7"
            },
            "minEndOfTurnSilenceWhenConfident": {
              "type": "number",
              "description": "This is the minimum end of turn silence when confident in milliseconds.\nNote: Only used if startSpeakingPlan.smartEndpointingPlan is not set.\n@default 160"
            },
            "wordFinalizationMaxWaitTime": {
              "type": "number"
            },
            "maxTurnSilence": {
              "type": "number",
              "description": "This is the maximum turn silence time in milliseconds.\nNote: Only used if startSpeakingPlan.smartEndpointingPlan is not set.\n@default 400"
            },
            "vadAssistedEndpointingEnabled": {
              "type": "boolean",
              "description": "Use VAD to assist with endpointing decisions from the transcriber.\nWhen enabled, transcriber endpointing will be buffered if VAD detects the user is still speaking, preventing premature turn-taking.\nWhen disabled, transcriber endpointing will be used immediately regardless of VAD state, allowing for"
            },
            "speechModel": {
              "type": "string",
              "description": "This is the speech model used for the streaming session.\nNote: Keyterms prompting is not supported with multilingual streaming.\n@default 'universal-streaming-english'",
              "enum": [
                "universal-streaming-english",
                "universal-streaming-multilingual"
              ]
            },
            "realtimeUrl": {
              "type": "string",
              "description": "The WebSocket URL that the transcriber connects to."
            },
            "wordBoost": {
              "type": "array",
              "description": "Add up to 2500 characters of custom vocabulary.",
              "items": {
                "type": "object"
              }
            },
            "keytermsPrompt": {
              "type": "array",
              "description": "Keyterms prompting improves recognition accuracy for specific words and phrases.\nCan include up to 100 keyterms, each up to 50 characters.\nCosts an additional $0.04/hour when enabled.",
              "items": {
                "type": "object"
              }
            },
            "endUtteranceSilenceThreshold": {
              "type": "number",
              "description": "The duration of the end utterance silence threshold in milliseconds."
            },
            "disablePartialTranscripts": {
              "type": "boolean",
              "description": "Disable partial transcripts.\nSet to `true` to not receive partial transcripts. Defaults to `false`."
            },
            "fallbackPlan": {
              "description": "This is the plan for transcriber provider fallbacks in the event that the primary transcriber provider fails."
            }
          },
          "required": [
            "provider"
          ]
        },
        {
          "type": "object",
          "properties": {
            "provider": {
              "type": "string",
              "description": "This is the transcription provider that will be used.",
              "enum": [
                "azure"
              ]
            },
            "language": {
              "type": "string",
              "description": "This is the language that will be set for the transcription. The list of languages Azure supports can be found here: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=stt",
              "enum": [
                "af-ZA",
                "am-ET",
                "ar-AE",
                "ar-BH",
                "ar-DZ",
                "ar-EG",
                "ar-IL",
                "ar-IQ",
                "ar-JO",
                "ar-KW"
              ]
            },
            "segmentationStrategy": {
              "type": "string",
              "description": "Controls how phrase boundaries are detected, enabling either simple time/silence heuristics or more advanced semantic segmentation.",
              "enum": [
                "Default",
                "Time",
                "Semantic"
              ]
            },
            "segmentationSilenceTimeoutMs": {
              "type": "number",
              "description": "Duration of detected silence after which the service finalizes a phrase. Configure to adjust sensitivity to pauses in speech."
            },
            "segmentationMaximumTimeMs": {
              "type": "number",
              "description": "Maximum duration a segment can reach before being cut off when using time-based segmentation."
            },
            "fallbackPlan": {
              "description": "This is the plan for transcriber provider fallbacks in the event that the primary transcriber provider fails."
            }
          },
          "required": [
            "provider"
          ]
        },
        {
          "type": "object",
          "properties": {
            "provider": {
              "type": "string",
              "description": "This is the transcription provider that will be used. Use `custom-transcriber` for providers that are not natively supported.",
              "enum": [
                "custom-transcriber"
              ]
            },
            "server": {
              "description": "This is where the transcription request will be sent.\n\nUsage:\n1. Vapi will initiate a websocket connection with `server.url`.\n\n2. Vapi will send an initial text frame with the sample rate. Format:\n```\n    {\n      \"type\": \"start\",\n      \"encoding\": \"linear16\", // 16-bit raw PCM format\n      \"containe"
            },
            "fallbackPlan": {
              "description": "This is the plan for transcriber provider fallbacks in the event that the primary transcriber provider fails."
            }
          },
          "required": [
            "provider",
            "server"
          ]
        },
        {
          "type": "object",
          "properties": {
            "provider": {
              "type": "string",
              "description": "This is the transcription provider that will be used.",
              "enum": [
                "deepgram"
              ]
            },
            "model": {
              "description": "This is the Deepgram model that will be used. A list of models can be found here: https://developers.deepgram.com/docs/models-languages-overview"
            },
            "language": {
              "type": "string",
              "description": "This is the language that will be set for the transcription. The list of languages Deepgram supports can be found here: https://developers.deepgram.com/docs/models-languages-overview",
              "enum": [
                "ar",
                "az",
                "ba",
                "be",
                "bg",
                "bn",
                "br",
                "bs",
                "ca",
                "cs"
              ]
            },
            "smartFormat": {
              "type": "boolean",
              "description": "This will be use smart format option provided by Deepgram. It's default disabled because it can sometimes format numbers as times but it's getting better."
            },
            "mipOptOut": {
              "type": "boolean",
              "description": "If set to true, this will add mip_opt_out=true as a query parameter of all API requests. See https://developers.deepgram.com/docs/the-deepgram-model-improvement-partnership-program#want-to-opt-out\n\nThis will only be used if you are using your own Deepgram API key.\n\n@default false"
            },
            "numerals": {
              "type": "boolean",
              "description": "If set to true, this will cause deepgram to convert spoken numbers to literal numerals. For example, \"my phone number is nine-seven-two...\" would become \"my phone number is 972...\"\n\n@default false"
            },
            "profanityFilter": {
              "type": "boolean",
              "description": "If set to true, Deepgram will replace profanity in transcripts with surrounding asterisks, e.g. \"f***\".\n\n@default false"
            },
            "redaction": {
              "type": "array",
              "description": "Enables redaction of sensitive information from transcripts.\n\nOptions include:\n- \"pci\": Redacts credit card numbers, expiration dates, and CVV.\n- \"pii\": Redacts personally identifiable information (names, locations, identifying numbers, etc.).\n- \"phi\": Redacts protected health information (medical c",
              "enum": [
                "pci",
                "pii",
                "phi",
                "numbers"
              ],
              "items": {
                "type": "object"
              }
            },
            "confidenceThreshold": {
              "type": "number",
              "description": "Transcripts below this confidence threshold will be discarded.\n\n@default 0.4"
            },
            "eotThreshold": {
              "type": "number",
              "description": "End-of-turn confidence required to finish a turn. Only used with Flux models.\n\n@default 0.7"
            },
            "eotTimeoutMs": {
              "type": "number",
              "description": "A turn will be finished when this much time has passed after speech, regardless of EOT confidence. Only used with Flux models.\n\n@default 5000"
            },
            "languages": {
              "type": "array",
              "description": "Language hints to bias Flux Multilingual (`flux-general-multi`) toward specific languages.\nProvide BCP-47 language codes (e.g. \"en\", \"es\", \"fr\"). Multiple hints can be given for\nmultilingual or code-switching scenarios. Omit for auto-detection. Only used with `flux-general-multi`.",
              "items": {
                "type": "object"
              }
            },
            "keywords": {
              "type": "array",
              "description": "These keywords are passed to the transcription model to help it pick up use-case specific words. Anything that may not be a common word, like your company name, should be added here.",
              "items": {
                "type": "object"
              }
            },
            "keyterm": {
              "type": "array",
              "description": "Keyterm Prompting allows you improve Keyword Recall Rate (KRR) for important keyterms or phrases up to 90%.",
              "items": {
                "type": "object"
              }
            },
            "endpointing": {
              "type": "number",
              "description": "This is the timeout after which Deepgram will send transcription on user silence. You can read in-depth documentation here: https://developers.deepgram.com/docs/endpointing.\n\nHere are the most important bits:\n- Defaults to 10. This is recommended for most use cases to optimize for latency.\n- 10 can "
            },
            "fallbackPlan": {
              "description": "This is the plan for transcriber provider fallbacks in the event that the primary transcriber provider fails."
            }
          },
          "required": [
            "provider"
          ]
        },
        {
          "type": "object",
          "properties": {
            "provider": {
              "type": "string",
              "description": "This is the transcription provider that will be used.",
              "enum": [
                "11labs"
              ]
            },
            "model": {
              "description": "This is the model that will be used for the transcription."
            },
            "language": {
              "type": "string",
              "description": "This is the language that will be used for the transcription.",
              "enum": [
                "aa",
                "ab",
                "ae",
                "af",
                "ak",
                "am",
                "an",
                "ar",
                "as",
                "av"
              ]
            },
            "silenceThresholdSeconds": {
              "type": "number",
              "description": "This is the number of seconds of silence before VAD commits (0.3-3.0)."
            },
            "confidenceThreshold": {
              "type": "number",
              "description": "This is the VAD sensitivity (0.1-0.9, lower indicates more sensitive)."
            },
            "minSpeechDurationMs": {
              "type": "number",
              "description": "This is the minimum speech duration for VAD (50-2000ms)."
            },
            "minSilenceDurationMs": {
              "type": "number",
              "description": "This is the minimum silence duration for VAD (50-2000ms)."
            },
            "fallbackPlan": {
              "description": "This is the plan for transcriber provider fallbacks in the event that the primary transcriber provider fails."
            }
          },
          "required": [
            "provider"
          ]
        }
      ]
    },
    "model": {
      "description": "These are the options for the assistant's LLM.",
      "oneOf": [
        {
          "type": "object",
          "properties": {
            "messages": {
              "type": "array",
              "description": "This is the starting state for the conversation.",
              "items": {
                "type": "object"
              }
            },
            "tools": {
              "type": "array",
              "description": "These are the tools that the assistant can use during the call. To use existing tools, use `toolIds`.\n\nBoth `tools` and `toolIds` can be used together.",
              "items": {
                "type": "object"
              }
            },
            "toolIds": {
              "type": "array",
              "description": "These are the tools that the assistant can use during the call. To use transient tools, use `tools`.\n\nBoth `tools` and `toolIds` can be used together.",
              "items": {
                "type": "object"
              }
            },
            "knowledgeBase": {
              "description": "These are the options for the knowledge base."
            },
            "model": {
              "type": "string",
              "description": "The specific Anthropic/Claude model that will be used.",
              "enum": [
                "claude-3-opus-20240229",
                "claude-3-sonnet-20240229",
                "claude-3-haiku-20240307",
                "claude-3-5-sonnet-20240620",
                "claude-3-5-sonnet-20241022",
                "claude-3-5-haiku-20241022",
                "claude-3-7-sonnet-20250219",
                "claude-opus-4-20250514",
                "claude-opus-4-5-20251101",
                "claude-opus-4-6"
              ]
            },
            "provider": {
              "type": "string",
              "description": "The provider identifier for Anthropic.",
              "enum": [
                "anthropic"
              ]
            },
            "thinking": {
              "description": "Optional configuration for Anthropic's thinking feature.\nOnly applicable for claude-3-7-sonnet-20250219 model.\nIf provided, maxTokens must be greater than thinking.budgetTokens."
            },
            "temperature": {
              "type": "number",
              "description": "This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency."
            },
            "maxTokens": {
              "type": "number",
              "description": "This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250."
            },
            "emotionRecognitionEnabled": {
              "type": "boolean",
              "description": "This determines whether we detect user's emotion while they speak and send it as an additional info to model.\n\nDefault `false` because the model is usually are good at understanding the user's emotion from text.\n\n@default false"
            },
            "numFastTurns": {
              "type": "number",
              "description": "This sets how many turns at the start of the conversation to use a smaller, faster model from the same provider before switching to the primary model. Example, gpt-3.5-turbo if provider is openai.\n\nDefault is 0.\n\n@default 0"
            }
          },
          "required": [
            "model",
            "provider"
          ]
        },
        {
          "type": "object",
          "properties": {
            "messages": {
              "type": "array",
              "description": "This is the starting state for the conversation.",
              "items": {
                "type": "object"
              }
            },
            "tools": {
              "type": "array",
              "description": "These are the tools that the assistant can use during the call. To use existing tools, use `toolIds`.\n\nBoth `tools` and `toolIds` can be used together.",
              "items": {
                "type": "object"
              }
            },
            "toolIds": {
              "type": "array",
              "description": "These are the tools that the assistant can use during the call. To use transient tools, use `tools`.\n\nBoth `tools` and `toolIds` can be used together.",
              "items": {
                "type": "object"
              }
            },
            "knowledgeBase": {
              "description": "These are the options for the knowledge base."
            },
            "provider": {
              "type": "string",
              "description": "The provider identifier for Anthropic via AWS Bedrock.",
              "enum": [
                "anthropic-bedrock"
              ]
            },
            "model": {
              "type": "string",
              "description": "The specific Anthropic/Claude model that will be used via Bedrock.",
              "enum": [
                "claude-3-opus-20240229",
                "claude-3-sonnet-20240229",
                "claude-3-haiku-20240307",
                "claude-3-5-sonnet-20240620",
                "claude-3-5-sonnet-20241022",
                "claude-3-5-haiku-20241022",
                "claude-3-7-sonnet-20250219",
                "claude-opus-4-20250514",
                "claude-opus-4-5-20251101",
                "claude-opus-4-6"
              ]
            },
            "thinking": {
              "description": "Optional configuration for Anthropic's thinking feature.\nOnly applicable for claude-3-7-sonnet-20250219 model.\nIf provided, maxTokens must be greater than thinking.budgetTokens."
            },
            "temperature": {
              "type": "number",
              "description": "This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency."
            },
            "maxTokens": {
              "type": "number",
              "description": "This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250."
            },
            "emotionRecognitionEnabled": {
              "type": "boolean",
              "description": "This determines whether we detect user's emotion while they speak and send it as an additional info to model.\n\nDefault `false` because the model is usually are good at understanding the user's emotion from text.\n\n@default false"
            },
            "numFastTurns": {
              "type": "number",
              "description": "This sets how many turns at the start of the conversation to use a smaller, faster model from the same provider before switching to the primary model. Example, gpt-3.5-turbo if provider is openai.\n\nDefault is 0.\n\n@default 0"
            }
          },
          "required": [
            "provider",
            "model"
          ]
        },
        {
          "type": "object",
          "properties": {
            "messages": {
              "type": "array",
              "description": "This is the starting state for the conversation.",
              "items": {
                "type": "object"
              }
            },
            "tools": {
              "type": "array",
              "description": "These are the tools that the assistant can use during the call. To use existing tools, use `toolIds`.\n\nBoth `tools` and `toolIds` can be used together.",
              "items": {
                "type": "object"
              }
            },
            "toolIds": {
              "type": "array",
              "description": "These are the tools that the assistant can use during the call. To use transient tools, use `tools`.\n\nBoth `tools` and `toolIds` can be used together.",
              "items": {
                "type": "object"
              }
            },
            "knowledgeBase": {
              "description": "These are the options for the knowledge base."
            },
            "provider": {
              "type": "string",
              "enum": [
                "anyscale"
              ]
            },
            "model": {
              "type": "string",
              "description": "This is the name of the model. Ex. cognitivecomputations/dolphin-mixtral-8x7b"
            },
            "temperature": {
              "type": "number",
              "description": "This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency."
            },
            "maxTokens": {
              "type": "number",
              "description": "This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250."
            },
            "emotionRecognitionEnabled": {
              "type": "boolean",
              "description": "This determines whether we detect user's emotion while they speak and send it as an additional info to model.\n\nDefault `false` because the model is usually are good at understanding the user's emotion from text.\n\n@default false"
            },
            "numFastTurns": {
              "type": "number",
              "description": "This sets how many turns at the start of the conversation to use a smaller, faster model from the same provider before switching to the primary model. Example, gpt-3.5-turbo if provider is openai.\n\nDefault is 0.\n\n@default 0"
            }
          },
          "required": [
            "provider",
            "model"
          ]
        },
        {
          "type": "object",
          "properties": {
            "messages": {
              "type": "array",
              "description": "This is the starting state for the conversation.",
              "items": {
                "type": "object"
              }
            },
            "tools": {
              "type": "array",
              "description": "These are the tools that the assistant can use during the call. To use existing tools, use `toolIds`.\n\nBoth `tools` and `toolIds` can be used together.",
              "items": {
                "type": "object"
              }
            },
            "toolIds": {
              "type": "array",
              "description": "These are the tools that the assistant can use during the call. To use transient tools, use `tools`.\n\nBoth `tools` and `toolIds` can be used together.",
              "items": {
                "type": "object"
              }
            },
            "knowledgeBase": {
              "description": "These are the options for the knowledge base."
            },
            "model": {
              "type": "string",
              "description": "This is the name of the model. Ex. cognitivecomputations/dolphin-mixtral-8x7b",
              "enum": [
                "llama3.1-8b",
                "llama-3.3-70b"
              ]
            },
            "provider": {
              "type": "string",
              "enum": [
                "cerebras"
              ]
            },
            "temperature": {
              "type": "number",
              "description": "This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency."
            },
            "maxTokens": {
              "type": "number",
              "description": "This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250."
            },
            "emotionRecognitionEnabled": {
              "type": "boolean",
              "description": "This determines whether we detect user's emotion while they speak and send it as an additional info to model.\n\nDefault `false` because the model is usually are good at understanding the user's emotion from text.\n\n@default false"
            },
            "numFastTurns": {
              "type": "number",
              "description": "This sets how many turns at the start of the conversation to use a smaller, faster model from the same provider before switching to the primary model. Example, gpt-3.5-turbo if provider is openai.\n\nDefault is 0.\n\n@default 0"
            }
          },
          "required": [
            "model",
            "provider"
          ]
        },
        {
          "type": "object",
          "properties": {
            "messages": {
              "type": "array",
              "description": "This is the starting state for the conversation.",
              "items": {
                "type": "object"
              }
            },
            "tools": {
              "type": "array",
              "description": "These are the tools that the assistant can use during the call. To use existing tools, use `toolIds`.\n\nBoth `tools` and `toolIds` can be used together.",
              "items": {
                "type": "object"
              }
            },
            "toolIds": {
              "type": "array",
              "description": "These are the tools that the assistant can use during the call. To use transient tools, use `tools`.\n\nBoth `tools` and `toolIds` can be used together.",
              "items": {
                "type": "object"
              }
            },
            "knowledgeBase": {
              "description": "These are the options for the knowledge base."
            },
            "provider": {
              "type": "string",
              "description": "This is the provider that will be used for the model. Any service, including your own server, that is compatible with the OpenAI API can be used.",
              "enum": [
                "custom-llm"
              ]
            },
            "metadataSendMode": {
              "type": "string",
              "description": "This determines whether metadata is sent in requests to the custom provider.\n\n- `off` will not send any metadata. payload will look like `{ messages }`\n- `variable` will send `assistant.metadata` as a variable on the payload. payload will look like `{ messages, metadata }`\n- `destructured` will send",
              "enum": [
                "off",
                "variable",
                "destructured"
              ]
            },
            "headers": {
              "type": "object",
              "description": "Custom headers to send with requests. These headers can override default OpenAI headers except for Authorization (which should be specified using a custom-llm credential)."
            },
            "url": {
              "type": "string",
              "description": "These is the URL we'll use for the OpenAI client's `baseURL`. Ex. https://openrouter.ai/api/v1"
            },
            "wordLevelConfidenceEnabled": {
              "type": "boolean",
              "description": "This determines whether the transcriber's word level confidence is sent in requests to the custom provider. Default is false.\nThis only works for Deepgram transcribers."
            },
            "timeoutSeconds": {
              "type": "number",
              "description": "This sets the timeout for the connection to the custom provider without needing to stream any tokens back. Default is 20 seconds."
            },
            "model": {
              "type": "string",
              "description": "This is the name of the model. Ex. cognitivecomputations/dolphin-mixtral-8x7b"
            },
            "temperature": {
              "type": "number",
              "description": "This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency."
            },
            "maxTokens": {
              "type": "number",
              "description": "This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250."
            },
            "emotionRecognitionEnabled": {
              "type": "boolean",
              "description": "This determines whether we detect user's emotion while they speak and send it as an additional info to model.\n\nDefault `false` because the model is usually are good at understanding the user's emotion from text.\n\n@default false"
            },
            "numFastTurns": {
              "type": "number",
              "description": "This sets how many turns at the start of the conversation to use a smaller, faster model from the same provider before switching to the primary model. Example, gpt-3.5-turbo if provider is openai.\n\nDefault is 0.\n\n@default 0"
            }
          },
          "required": [
            "provider",
            "url",
            "model"
          ]
        }
      ]
    },
    "voice": {
      "description": "These are the options for the assistant's voice.",
      "oneOf": [
        {
          "type": "object",
          "properties": {
            "cachingEnabled": {
              "type": "boolean",
              "description": "This is the flag to toggle voice caching for the assistant."
            },
            "provider": {
              "type": "string",
              "description": "This is the voice provider that will be used.",
              "enum": [
                "azure"
              ]
            },
            "voiceId": {
              "description": "This is the provider-specific ID that will be used."
            },
            "chunkPlan": {
              "description": "This is the plan for chunking the model output before it is sent to the voice provider."
            },
            "speed": {
              "type": "number",
              "description": "This is the speed multiplier that will be used."
            },
            "fallbackPlan": {
              "description": "This is the plan for voice provider fallbacks in the event that the primary voice provider fails."
            }
          },
          "required": [
            "provider",
            "voiceId"
          ]
        },
        {
          "type": "object",
          "properties": {
            "cachingEnabled": {
              "type": "boolean",
              "description": "This is the flag to toggle voice caching for the assistant."
            },
            "provider": {
              "type": "string",
              "description": "This is the voice provider that will be used.",
              "enum": [
                "cartesia"
              ]
            },
            "voiceId": {
              "type": "string",
              "description": "The ID of the particular voice you want to use."
            },
            "model": {
              "type": "string",
              "description": "This is the model that will be used. This is optional and will default to the correct model for the voiceId.",
              "enum": [
                "sonic-3.5",
                "sonic-3.5-2026-05-04",
                "sonic-3",
                "sonic-3-2026-01-12",
                "sonic-3-2025-10-27",
                "sonic-2",
                "sonic-2-2025-06-11",
                "sonic-english",
                "sonic-multilingual",
                "sonic-preview"
              ]
            },
            "language": {
              "type": "string",
              "description": "This is the language that will be used. This is optional and will default to the correct language for the voiceId.",
              "enum": [
                "ar",
                "bg",
                "bn",
                "cs",
                "da",
                "de",
                "el",
                "en",
                "es",
                "fi"
              ]
            },
            "experimentalControls": {
              "description": "Experimental controls for Cartesia voice generation"
            },
            "generationConfig": {
              "description": "Generation config for fine-grained control of sonic-3 voice output (speed, volume, and experimental controls). Only available for sonic-3 model."
            },
            "pronunciationDictId": {
              "type": "string",
              "description": "Pronunciation dictionary ID for sonic-3. Allows custom pronunciations for specific words. Only available for sonic-3 model."
            },
            "chunkPlan": {
              "description": "This is the plan for chunking the model output before it is sent to the voice provider."
            },
            "fallbackPlan": {
              "description": "This is the plan for voice provider fallbacks in the event that the primary voice provider fails."
            }
          },
          "required": [
            "provider",
            "voiceId"
          ]
        },
        {
          "type": "object",
          "properties": {
            "cachingEnabled": {
              "type": "boolean",
              "description": "This is the flag to toggle voice caching for the assistant."
            },
            "provider": {
              "type": "string",
              "description": "This is the voice provider that will be used. Use `custom-voice` for providers that are not natively supported.",
              "enum": [
                "custom-voice"
              ]
            },
            "voiceId": {
              "type": "string",
              "description": "This is the provider-specific ID that will be used. This is passed in the voice request payload to identify the voice to use."
            },
            "chunkPlan": {
              "description": "This is the plan for chunking the model output before it is sent to the voice provider."
            },
            "server": {
              "description": "This is where the voice request will be sent.\n\nRequest Example:\n\nPOST https://{server.url}\nContent-Type: application/json\n\n{\n  \"message\": {\n    \"type\": \"voice-request\",\n    \"text\": \"Hello, world!\",\n    \"sampleRate\": 24000,\n    ...other metadata about the call...\n  }\n}\n\nResponse Expected: 1-channel 1"
            },
            "fallbackPlan": {
              "description": "This is the plan for voice provider fallbacks in the event that the primary voice provider fails."
            }
          },
          "required": [
            "provider",
            "server"
          ]
        },
        {
          "type": "object",
          "properties": {
            "cachingEnabled": {
              "type": "boolean",
              "description": "This is the flag to toggle voice caching for the assistant."
            },
            "provider": {
              "type": "string",
              "description": "This is the voice provider that will be used.",
              "enum": [
                "deepgram"
              ]
            },
            "voiceId": {
              "type": "string",
              "description": "This is the provider-specific ID that will be used.",
              "enum": [
                "asteria",
                "luna",
                "stella",
                "athena",
                "hera",
                "orion",
                "arcas",
                "perseus",
                "angus",
                "orpheus"
              ]
            },
            "model": {
              "type": "string",
              "description": "This is the model that will be used. Defaults to 'aura-2' when not specified.",
              "enum": [
                "aura",
                "aura-2"
              ]
            },
            "mipOptOut": {
              "type": "boolean",
              "description": "If set to true, this will add mip_opt_out=true as a query parameter of all API requests. See https://developers.deepgram.com/docs/the-deepgram-model-improvement-partnership-program#want-to-opt-out\n\nThis will only be used if you are using your own Deepgram API key.\n\n@default false"
            },
            "chunkPlan": {
              "description": "This is the plan for chunking the model output before it is sent to the voice provider."
            },
            "fallbackPlan": {
              "description": "This is the plan for voice provider fallbacks in the event that the primary voice provider fails."
            }
          },
          "required": [
            "provider",
            "voiceId"
          ]
        },
        {
          "type": "object",
          "properties": {
            "cachingEnabled": {
              "type": "boolean",
              "description": "This is the flag to toggle voice caching for the assistant."
            },
            "provider": {
              "type": "string",
              "description": "This is the voice provider that will be used.",
              "enum": [
                "11labs"
              ]
            },
            "voiceId": {
              "description": "This is the provider-specific ID that will be used. Ensure the Voice is present in your 11Labs Voice Library."
            },
            "stability": {
              "type": "number",
              "description": "Defines the stability for voice settings."
            },
            "similarityBoost": {
              "type": "number",
              "description": "Defines the similarity boost for voice settings."
            },
            "style": {
              "type": "number",
              "description": "Defines the style for voice settings."
            },
            "useSpeakerBoost": {
              "type": "boolean",
              "description": "Defines the use speaker boost for voice settings."
            },
            "speed": {
              "type": "number",
              "description": "Defines the speed for voice settings."
            },
            "optimizeStreamingLatency": {
              "type": "number",
              "description": "Defines the optimize streaming latency for voice settings. Defaults to 3."
            },
            "enableSsmlParsing": {
              "type": "boolean",
              "description": "This enables the use of https://elevenlabs.io/docs/speech-synthesis/prompting#pronunciation. Defaults to false to save latency.\n\n@default false"
            },
            "autoMode": {
              "type": "boolean",
              "description": "Defines the auto mode for voice settings. Defaults to false."
            },
            "model": {
              "type": "string",
              "description": "This is the model that will be used. Defaults to 'eleven_turbo_v2' if not specified.",
              "enum": [
                "eleven_multilingual_v2",
                "eleven_turbo_v2",
                "eleven_turbo_v2_5",
                "eleven_flash_v2",
                "eleven_flash_v2_5",
                "eleven_monolingual_v1",
                "eleven_v3"
              ]
            },
            "language": {
              "type": "string",
              "description": "This is the language (ISO 639-1) that is enforced for the model. Currently only Turbo v2.5 supports language enforcement. For other models, an error will be returned if language code is provided."
            },
            "chunkPlan": {
              "description": "This is the plan for chunking the model output before it is sent to the voice provider."
            },
            "pronunciationDictionaryLocators": {
              "type": "array",
              "description": "This is the pronunciation dictionary locators to use.",
              "items": {
                "type": "object"
              }
            },
            "fallbackPlan": {
              "description": "This is the plan for voice provider fallbacks in the event that the primary voice provider fails."
            }
          },
          "required": [
            "provider",
            "voiceId"
          ]
        }
      ]
    },
    "firstMessage": {
      "type": "string",
      "description": "This is the first message that the assistant will say. This can also be a URL to a containerized audio file (mp3, wav, etc.).\n\nIf unspecified, assistant will wait for user to speak and use the model to respond once they speak."
    },
    "firstMessageInterruptionsEnabled": {
      "type": "boolean"
    },
    "firstMessageMode": {
      "type": "string",
      "description": "This is the mode for the first message. Default is 'assistant-speaks-first'.\n\nUse:\n- 'assistant-speaks-first' to have the assistant speak first.\n- 'assistant-waits-for-user' to have the assistant wait for the user to speak first.\n- 'assistant-speaks-first-with-model-generated-message' to have the as",
      "enum": [
        "assistant-speaks-first",
        "assistant-speaks-first-with-model-generated-message",
        "assistant-waits-for-user"
      ]
    },
    "voicemailDetection": {
      "description": "These are the settings to configure or disable voicemail detection. Alternatively, voicemail detection can be configured using the model.tools=[VoicemailTool].\nBy default, voicemail detection is disabled.",
      "oneOf": [
        {
          "type": "string",
          "enum": [
            "off"
          ]
        },
        {
          "type": "object",
          "properties": {
            "beepMaxAwaitSeconds": {
              "type": "number",
              "description": "This is the maximum duration from the start of the call that we will wait for a voicemail beep, before speaking our message\n\n- If we detect a voicemail beep before this, we will speak the message at that point.\n\n- Setting too low a value means that the bot will start speaking its voicemail message t"
            },
            "provider": {
              "type": "string",
              "description": "This is the provider to use for voicemail detection.",
              "enum": [
                "google"
              ]
            },
            "backoffPlan": {
              "description": "This is the backoff plan for the voicemail detection."
            },
            "type": {
              "type": "string",
              "description": "This is the detection type to use for voicemail detection.\n- 'audio': Uses native audio models (default)\n- 'transcript': Uses ASR/transcript-based detection\n@default 'audio' (audio detection)",
              "enum": [
                "audio",
                "transcript"
              ]
            }
          },
          "required": [
            "provider"
          ]
        },
        {
          "type": "object",
          "properties": {
            "beepMaxAwaitSeconds": {
              "type": "number",
              "description": "This is the maximum duration from the start of the call that we will wait for a voicemail beep, before speaking our message\n\n- If we detect a voicemail beep before this, we will speak the message at that point.\n\n- Setting too low a value means that the bot will start speaking its voicemail message t"
            },
            "provider": {
              "type": "string",
              "description": "This is the provider to use for voicemail detection.",
              "enum": [
                "openai"
              ]
            },
            "backoffPlan": {
              "description": "This is the backoff plan for the voicemail detection."
            },
            "type": {
              "type": "string",
              "description": "This is the detection type to use for voicemail detection.\n- 'audio': Uses native audio models (default)\n- 'transcript': Uses ASR/transcript-based detection\n@default 'audio' (audio detection)",
              "enum": [
                "audio",
                "transcript"
              ]
            }
          },
          "required": [
            "provider"
          ]
        },
        {
          "type": "object",
          "properties": {
            "provider": {
              "type": "string",
              "description": "This is the provider to use for voicemail detection.",
              "enum": [
                "twilio"
              ]
            },
            "voicemailDetectionTypes": {
              "type": "array",
              "description": "These are the AMD messages from Twilio that are considered as voicemail. Default is ['machine_end_beep', 'machine_end_silence'].\n\n@default {Array} ['machine_end_beep', 'machine_end_silence']",
              "enum": [
                "machine_start",
                "human",
                "fax",
                "unknown",
                "machine_end_beep",
                "machine_end_silence",
                "machine_end_other"
              ],
              "items": {
                "type": "object"
              }
            },
            "enabled": {
              "type": "boolean",
              "description": "This sets whether the assistant should detect voicemail. Defaults to true.\n\n@default true"
            },
            "machineDetectionTimeout": {
              "type": "number",
              "description": "The number of seconds that Twilio should attempt to perform answering machine detection before timing out and returning AnsweredBy as unknown. Default is 30 seconds.\n\nIncreasing this value will provide the engine more time to make a determination. This can be useful when DetectMessageEnd is provided"
            },
            "machineDetectionSpeechThreshold": {
              "type": "number",
              "description": "The number of milliseconds that is used as the measuring stick for the length of the speech activity. Durations lower than this value will be interpreted as a human, longer as a machine. Default is 2400 milliseconds.\n\nIncreasing this value will reduce the chance of a False Machine (detected machine,"
            },
            "machineDetectionSpeechEndThreshold": {
              "type": "number",
              "description": "The number of milliseconds of silence after speech activity at which point the speech activity is considered complete. Default is 1200 milliseconds.\n\nIncreasing this value will typically be used to better address the short voicemail greeting scenarios. For short voicemails, there is typically 1000-2"
            },
            "machineDetectionSilenceTimeout": {
              "type": "number",
              "description": "The number of milliseconds of initial silence after which an unknown AnsweredBy result will be returned. Default is 5000 milliseconds.\n\nIncreasing this value will result in waiting for a longer period of initial silence before returning an 'unknown' AMD result.\n\nDecreasing this value will result in "
            }
          },
          "required": [
            "provider"
          ]
        },
        {
          "type": "object",
          "properties": {
            "beepMaxAwaitSeconds": {
              "type": "number",
              "description": "This is the maximum duration from the start of the call that we will wait for a voicemail beep, before speaking our message\n\n- If we detect a voicemail beep before this, we will speak the message at that point.\n\n- Setting too low a value means that the bot will start speaking its voicemail message t"
            },
            "provider": {
              "type": "string",
              "description": "This is the provider to use for voicemail detection.",
              "enum": [
                "vapi"
              ]
            },
            "backoffPlan": {
              "description": "This is the backoff plan for the voicemail detection."
            },
            "type": {
              "type": "string",
              "description": "This is the detection type to use for voicemail detection.\n- 'audio': Uses native audio models (default)\n- 'transcript': Uses ASR/transcript-based detection\n@default 'audio' (audio detection)",
              "enum": [
                "audio",
                "transcript"
              ]
            }
          },
          "required": [
            "provider"
          ]
        }
      ]
    },
    "clientMessages": {
      "type": "array",
      "description": "These are the messages that will be sent to your Client SDKs. Default is conversation-update,function-call,hang,model-output,speech-update,status-update,transfer-update,transcript,tool-calls,user-interrupted,voice-input,workflow.node.started,assistant.started. You can check the shape of the messages",
      "enum": [
        "conversation-update",
        "assistant.speechStarted",
        "function-call",
        "function-call-result",
        "hang",
        "language-changed",
        "metadata",
        "model-output",
        "speech-update",
        "status-update"
      ],
      "items": {
        "type": "string",
        "enum": [
          "conversation-update",
          "assistant.speechStarted",
          "function-call",
          "function-call-result",
          "hang",
          "language-changed",
          "metadata",
          "model-output",
          "speech-update",
          "status-update"
        ]
      }
    },
    "serverMessages": {
      "type": "array",
      "description": "These are the messages that will be sent to your Server URL. Default is conversation-update,end-of-call-report,function-call,hang,speech-update,status-update,tool-calls,transfer-destination-request,handoff-destination-request,user-interrupted,assistant.started. You can check the shape of the message",
      "enum": [
        "assistant.started",
        "assistant.speechStarted",
        "conversation-update",
        "end-of-call-report",
        "function-call",
        "hang",
        "language-changed",
        "language-change-detected",
        "model-output",
        "phone-call-control"
      ],
      "items": {
        "type": "string",
        "enum": [
          "assistant.started",
          "assistant.speechStarted",
          "conversation-update",
          "end-of-call-report",
          "function-call",
          "hang",
          "language-changed",
          "language-change-detected",
          "model-output",
          "phone-call-control"
        ]
      }
    },
    "maxDurationSeconds": {
      "type": "number",
      "description": "This is the maximum number of seconds that the call will last. When the call reaches this duration, it will be ended.\n\n@default 600 (10 minutes)"
    },
    "backgroundSound": {
      "description": "This is the background sound in the call. Default for phone calls is 'office' and default for web calls is 'off'.\nYou can also provide a custom sound by providing a URL to an audio file.",
      "oneOf": [
        {
          "type": "string",
          "enum": [
            "off",
            "office"
          ]
        },
        {
          "type": "string",
          "format": "uri"
        }
      ]
    },
    "modelOutputInMessagesEnabled": {
      "type": "boolean",
      "description": "This determines whether the model's output is used in conversation history rather than the transcription of assistant's speech.\n\n@default false"
    },
    "transportConfigurations": {
      "type": "array",
      "description": "These are the configurations to be passed to the transport providers of assistant's calls, like Twilio. You can store multiple configurations for different transport providers. For a call, only the configuration matching the call transport provider is used.",
      "items": {}
    },
    "observabilityPlan": {
      "description": "This is the plan for observability of assistant's calls.\n\nCurrently, only Langfuse is supported.",
      "oneOf": [
        {
          "type": "object",
          "properties": {
            "provider": {
              "type": "string",
              "enum": [
                "langfuse"
              ]
            },
            "promptName": {
              "type": "string",
              "description": "The name of a Langfuse prompt to link generations to. This enables tracking which prompt version was used for each generation. https://langfuse.com/docs/prompt-management/features/link-to-traces"
            },
            "promptVersion": {
              "type": "number",
              "description": "The version number of the Langfuse prompt to link generations to. Used together with promptName to identify the exact prompt version. https://langfuse.com/docs/prompt-management/features/link-to-traces"
            },
            "traceName": {
              "type": "string",
              "description": "Custom name for the Langfuse trace. Supports Liquid templates.\n\nAvailable variables:\n- {{ call.id }} - Call UUID\n- {{ call.type }} - 'inboundPhoneCall', 'outboundPhoneCall', 'webCall'\n- {{ assistant.name }} - Assistant name\n- {{ assistant.id }} - Assistant ID\n\nExample: \"{{ assistant.name }} - {{ cal"
            },
            "tags": {
              "type": "array",
              "description": "This is an array of tags to be added to the Langfuse trace. Tags allow you to categorize and filter traces. https://langfuse.com/docs/tracing-features/tags",
              "items": {
                "type": "object"
              }
            },
            "metadata": {
              "type": "object",
              "description": "This is a JSON object that will be added to the Langfuse trace. Traces can be enriched with metadata to better understand your users, application, and experiments. https://langfuse.com/docs/tracing-features/metadata\nBy default it includes the call metadata, assistant metadata, and assistant override"
            }
          },
          "required": [
            "provider",
            "tags"
          ]
        }
      ],
      "allOf": [
        {
          "type": "object",
          "properties": {
            "provider": {
              "type": "string",
              "enum": [
                "langfuse"
              ]
            },
            "promptName": {
              "type": "string",
              "description": "The name of a Langfuse prompt to link generations to. This enables tracking which prompt version was used for each generation. https://langfuse.com/docs/prompt-management/features/link-to-traces"
            },
            "promptVersion": {
              "type": "number",
              "description": "The version number of the Langfuse prompt to link generations to. Used together with promptName to identify the exact prompt version. https://langfuse.com/docs/prompt-management/features/link-to-traces"
            },
            "traceName": {
              "type": "string",
              "description": "Custom name for the Langfuse trace. Supports Liquid templates.\n\nAvailable variables:\n- {{ call.id }} - Call UUID\n- {{ call.type }} - 'inboundPhoneCall', 'outboundPhoneCall', 'webCall'\n- {{ assistant.name }} - Assistant name\n- {{ assistant.id }} - Assistant ID\n\nExample: \"{{ assistant.name }} - {{ cal"
            },
            "tags": {
              "type": "array",
              "description": "This is an array of tags to be added to the Langfuse trace. Tags allow you to categorize and filter traces. https://langfuse.com/docs/tracing-features/tags",
              "items": {
                "type": "object"
              }
            },
            "metadata": {
              "type": "object",
              "description": "This is a JSON object that will be added to the Langfuse trace. Traces can be enriched with metadata to better understand your users, application, and experiments. https://langfuse.com/docs/tracing-features/metadata\nBy default it includes the call metadata, assistant metadata, and assistant override"
            }
          },
          "required": [
            "provider",
            "tags"
          ]
        }
      ]
    },
    "credentials": {
      "type": "array",
      "description": "These are dynamic credentials that will be used for the assistant calls. By default, all the credentials are available for use in the call but you can supplement an additional credentials using this. Dynamic credentials override existing credentials.",
      "items": {}
    },
    "hooks": {
      "type": "array",
      "description": "This is a set of actions that will be performed on certain events.",
      "items": {}
    },
    "name": {
      "type": "string",
      "description": "This is the name of the assistant.\n\nThis is required when you want to transfer between assistants in a call."
    },
    "voicemailMessage": {
      "type": "string",
      "description": "This is the message that the assistant will say if the call is forwarded to voicemail.\n\nIf unspecified, it will hang up."
    },
    "endCallMessage": {
      "type": "string",
      "description": "This is the message that the assistant will say if it ends the call.\n\nIf unspecified, it will hang up without saying anything."
    },
    "endCallPhrases": {
      "type": "array",
      "description": "This list contains phrases that, if spoken by the assistant, will trigger the call to be hung up. Case insensitive.",
      "items": {
        "type": "string"
      }
    },
    "compliancePlan": {
      "type": "object",
      "properties": {
        "hipaaEnabled": {
          "type": "boolean",
          "description": "When this is enabled, logs, recordings, and transcriptions will be stored in HIPAA-compliant storage. Defaults to false. Only HIPAA-compliant providers will be available for LLM, Voice, and Transcriber respectively. This setting is only honored if the organization is on an Enterprise subscription or"
        },
        "pciEnabled": {
          "type": "boolean",
          "description": "When this is enabled, the user will be restricted to use PCI-compliant providers, and no logs or transcripts are stored.\nAt the end of the call, you will receive an end-of-call-report message to store on your server. Defaults to false."
        },
        "securityFilterPlan": {
          "description": "This is the security filter plan for the assistant. It allows filtering of transcripts for security threats before sending to LLM."
        },
        "recordingConsentPlan": {}
      }
    },
    "metadata": {
      "type": "object",
      "description": "This is for metadata you want to store on the assistant."
    },
    "backgroundSpeechDenoisingPlan": {
      "description": "This enables filtering of noise and background speech while the user is talking.\n\nFeatures:\n- Smart denoising using Krisp\n- Fourier denoising\n\nSmart denoising can be combined with or used independently of Fourier denoising.\n\nOrder of precedence:\n- Smart denoising\n- Fourier denoising",
      "allOf": [
        {
          "type": "object",
          "properties": {
            "smartDenoisingPlan": {
              "description": "Whether smart denoising using Krisp is enabled."
            },
            "fourierDenoisingPlan": {
              "description": "Whether Fourier denoising is enabled. Note that this is experimental and may not work as expected.\n\nThis can be combined with smart denoising, and will be run afterwards."
            }
          }
        }
      ]
    },
    "analysisPlan": {
      "description": "This is the plan for analysis of assistant's calls. Stored in `call.analysis`.",
      "allOf": [
        {
          "type": "object",
          "properties": {
            "minMessagesThreshold": {
              "type": "number",
              "description": "The minimum number of messages required to run the analysis plan.\nIf the number of messages is less than this, analysis will be skipped.\n\n@default 2"
            },
            "summaryPlan": {
              "description": "This is the plan for generating the summary of the call. This outputs to `call.analysis.summary`."
            },
            "structuredDataPlan": {
              "description": "This is the plan for generating the structured data from the call. This outputs to `call.analysis.structuredData`."
            },
            "structuredDataMultiPlan": {
              "type": "array",
              "description": "This is an array of structured data plan catalogs. Each entry includes a `key` and a `plan` for generating the structured data from the call. This outputs to `call.analysis.structuredDataMulti`.",
              "items": {
                "type": "object"
              }
            },
            "successEvaluationPlan": {
              "description": "This is the plan for generating the success evaluation of the call. This outputs to `call.analysis.successEvaluation`."
            },
            "outcomeIds": {
              "type": "array",
              "description": "This is an array of outcome UUIDs to be calculated during analysis.\nThe outcomes will be calculated and stored in `call.analysis.outcomes`.",
              "items": {
                "type": "object"
              }
            }
          }
        }
      ]
    },
    "artifactPlan": {
      "description": "This is the plan for artifacts generated during assistant's calls. Stored in `call.artifact`.",
      "allOf": [
        {
          "type": "object",
          "properties": {
            "recordingEnabled": {
              "type": "boolean",
              "description": "This determines whether assistant's calls are recorded. Defaults to true.\n\nUsage:\n- If you don't want to record the calls, set this to false.\n- If you want to record the calls when `assistant.hipaaEnabled` (deprecated) or `assistant.compliancePlan.hipaaEnabled` explicity set this to true and make su"
            },
            "recordingFormat": {
              "type": "string",
              "description": "This determines the format of the recording. Defaults to `wav;l16`.\n\n@default 'wav;l16'",
              "enum": [
                "wav;l16",
                "mp3"
              ]
            },
            "recordingUseCustomStorageEnabled": {
              "type": "boolean",
              "description": "This determines whether to use custom storage (S3 or GCP) for call recordings when storage credentials are configured.\n\nWhen set to false, recordings will be stored on Vapi's storage instead of your custom storage, even if you have custom storage credentials configured.\n\nUsage:\n- Set to false if you"
            },
            "videoRecordingEnabled": {
              "type": "boolean",
              "description": "This determines whether the video is recorded during the call. Defaults to false. Only relevant for `webCall` type.\n\nYou can find the video recording at `call.artifact.videoRecordingUrl` after the call is ended.\n\n@default false"
            },
            "fullMessageHistoryEnabled": {
              "type": "boolean",
              "description": "This determines whether the artifact contains the full message history, even after handoff context engineering. Defaults to false."
            },
            "pcapEnabled": {
              "type": "boolean",
              "description": "This determines whether the SIP packet capture is enabled. Defaults to true. Only relevant for `phone` type calls where phone number's provider is `vapi` or `byo-phone-number`.\n\nYou can find the packet capture at `call.artifact.pcapUrl` after the call is ended.\n\n@default true"
            },
            "pcapS3PathPrefix": {
              "type": "string",
              "description": "This is the path where the SIP packet capture will be uploaded. This is only used if you have provided S3 or GCP credentials on the Provider Credentials page in the Dashboard.\n\nIf credential.s3PathPrefix or credential.bucketPlan.path is set, this will append to it.\n\nUsage:\n- If you want to upload th"
            },
            "pcapUseCustomStorageEnabled": {
              "type": "boolean",
              "description": "This determines whether to use custom storage (S3 or GCP) for SIP packet captures when storage credentials are configured.\n\nWhen set to false, packet captures will be stored on Vapi's storage instead of your custom storage, even if you have custom storage credentials configured.\n\nUsage:\n- Set to fal"
            },
            "loggingEnabled": {
              "type": "boolean",
              "description": "This determines whether the call logs are enabled. Defaults to true.\n\n@default true"
            },
            "loggingUseCustomStorageEnabled": {
              "type": "boolean",
              "description": "This determines whether to use custom storage (S3 or GCP) for call logs when storage credentials are configured.\n\nWhen set to false, logs will be stored on Vapi's storage instead of your custom storage, even if you have custom storage credentials configured.\n\nUsage:\n- Set to false if you have custom"
            },
            "transcriptPlan": {
              "description": "This is the plan for `call.artifact.transcript`. To disable, set `transcriptPlan.enabled` to false."
            },
            "recordingPath": {
              "type": "string",
              "description": "This is the path where the recording will be uploaded. This is only used if you have provided S3 or GCP credentials on the Provider Credentials page in the Dashboard.\n\nIf credential.s3PathPrefix or credential.bucketPlan.path is set, this will append to it.\n\nUsage:\n- If you want to upload the recordi"
            },
            "structuredOutputIds": {
              "type": "array",
              "description": "This is an array of structured output IDs to be calculated during the call.\nThe outputs will be extracted and stored in `call.artifact.structuredOutputs` after the call is ended.",
              "items": {
                "type": "object"
              }
            },
            "structuredOutputs": {
              "type": "array",
              "description": "This is an array of transient structured outputs to be calculated during the call.\nThe outputs will be extracted and stored in `call.artifact.structuredOutputs` after the call is ended.\nUse this to provide inline structured output configurations instead of referencing existing ones via structuredOut",
              "items": {
                "type": "object"
              }
            },
            "scorecardIds": {
              "type": "array",
              "description": "This is an array of scorecard IDs that will be evaluated based on the structured outputs extracted during the call.\nThe scorecards will be evaluated and the results will be stored in `call.artifact.scorecards` after the call has ended.",
              "items": {
                "type": "object"
              }
            },
            "scorecards": {
              "type": "array",
              "description": "This is the array of scorecards that will be evaluated based on the structured outputs extracted during the call.\nThe scorecards will be evaluated and the results will be stored in `call.artifact.scorecards` after the call has ended.",
              "items": {
                "type": "object"
              }
            },
            "loggingPath": {
              "type": "string",
              "description": "This is the path where the call logs will be uploaded. This is only used if you have provided S3 or GCP credentials on the Provider Credentials page in the Dashboard.\n\nIf credential.s3PathPrefix or credential.bucketPlan.path is set, this will append to it.\n\nUsage:\n- If you want to upload the call lo"
            }
          }
        }
      ]
    },
    "startSpeakingPlan": {
      "description": "This is the plan for when the assistant should start talking.\n\nYou should configure this if you're running into these issues:\n- The assistant is too slow to start talking after the customer is done speaking.\n- The assistant is too fast to start talking after the customer is done speaking.\n- The assi",
      "allOf": [
        {
          "type": "object",
          "properties": {
            "waitSeconds": {
              "type": "number",
              "description": "This is how long assistant waits before speaking. Defaults to 0.4.\n\nThis is the minimum it will wait but if there is latency is the pipeline, this minimum will be exceeded. This is intended as a stopgap in case the pipeline is moving too fast.\n\nExample:\n- If model generates tokens and voice generate"
            },
            "smartEndpointingEnabled": {},
            "smartEndpointingPlan": {
              "description": "This is the plan for smart endpointing. Pick between Vapi smart endpointing, LiveKit, or custom endpointing model (or nothing). We strongly recommend using livekit endpointing when working in English. LiveKit endpointing is not supported in other languages, yet.\n\nIf this is set, it will override and"
            },
            "customEndpointingRules": {
              "type": "array",
              "description": "These are the custom endpointing rules to set an endpointing timeout based on a regex on the customer's speech or the assistant's last message.\n\nUsage:\n- If you have yes/no questions like \"are you interested in a loan?\", you can set a shorter timeout.\n- If you have questions where the customer may p",
              "items": {
                "type": "object"
              }
            },
            "transcriptionEndpointingPlan": {
              "description": "This determines how a customer speech is considered done (endpointing) using the transcription of customer's speech.\n\nOnce an endpoint is triggered, the request is sent to `assistant.model`.\n\nNote: This plan is only used if `smartEndpointingPlan` is not set and transcriber does not have built-in end"
            }
          }
        }
      ]
    },
    "stopSpeakingPlan": {
      "description": "This is the plan for when assistant should stop talking on customer interruption.\n\nYou should configure this if you're running into these issues:\n- The assistant is too slow to recognize customer's interruption.\n- The assistant is too fast to recognize customer's interruption.\n- The assistant is get",
      "allOf": [
        {
          "type": "object",
          "properties": {
            "numWords": {
              "type": "number",
              "description": "This is the number of words that the customer has to say before the assistant will stop talking.\n\nWords like \"stop\", \"actually\", \"no\", etc. will always interrupt immediately regardless of this value.\n\nWords like \"okay\", \"yeah\", \"right\" will never interrupt.\n\nWhen set to 0, `voiceSeconds` is used in "
            },
            "voiceSeconds": {
              "type": "number",
              "description": "This is the seconds customer has to speak before the assistant stops talking. This uses the VAD (Voice Activity Detection) spike to determine if the customer has started speaking.\n\nConsiderations:\n- A lower value might be more responsive but could potentially pick up non-speech sounds.\n- A higher va"
            },
            "backoffSeconds": {
              "type": "number",
              "description": "This is the seconds to wait before the assistant will start talking again after being interrupted.\n\nDefaults to 1.\n\n@default 1"
            },
            "acknowledgementPhrases": {
              "type": "array",
              "description": "These are the phrases that will never interrupt the assistant, even if numWords threshold is met.\nThese are typically acknowledgement or backchanneling phrases.",
              "items": {
                "type": "object"
              }
            },
            "interruptionPhrases": {
              "type": "array",
              "description": "These are the phrases that will always interrupt the assistant immediately, regardless of numWords.\nThese are typically phrases indicating disagreement or desire to stop.",
              "items": {
                "type": "object"
              }
            }
          }
        }
      ]
    },
    "monitorPlan": {
      "description": "This is the plan for real-time monitoring of the assistant's calls.\n\nUsage:\n- To enable live listening of the assistant's calls, set `monitorPlan.listenEnabled` to `true`.\n- To enable live control of the assistant's calls, set `monitorPlan.controlEnabled` to `true`.\n- To attach monitors to the assis",
      "allOf": [
        {
          "type": "object",
          "properties": {
            "listenEnabled": {
              "type": "boolean",
              "description": "This determines whether the assistant's calls allow live listening. Defaults to true.\n\nFetch `call.monitor.listenUrl` to get the live listening URL.\n\n@default true"
            },
            "listenAuthenticationEnabled": {
              "type": "boolean",
              "description": "This enables authentication on the `call.monitor.listenUrl`.\n\nIf `listenAuthenticationEnabled` is `true`, the `call.monitor.listenUrl` will require an `Authorization: Bearer <vapi-public-api-key>` header.\n\n@default false"
            },
            "controlEnabled": {
              "type": "boolean",
              "description": "This determines whether the assistant's calls allow live control. Defaults to true.\n\nFetch `call.monitor.controlUrl` to get the live control URL.\n\nTo use, send any control message via a POST request to `call.monitor.controlUrl`. Here are the types of controls supported: https://docs.vapi.ai/api-refe"
            },
            "controlAuthenticationEnabled": {
              "type": "boolean",
              "description": "This enables authentication on the `call.monitor.controlUrl`.\n\nIf `controlAuthenticationEnabled` is `true`, the `call.monitor.controlUrl` will require an `Authorization: Bearer <vapi-public-api-key>` header.\n\n@default false"
            },
            "monitorIds": {
              "type": "array",
              "description": "This the set of monitor ids that are attached to the assistant.\nThe source of truth for the monitor ids is the assistant_monitor join table.\nThis field can be used for transient assistants and to update assistants with new monitor ids.\n\n@default []",
              "items": {
                "type": "object"
              }
            }
          }
        }
      ]
    },
    "credentialIds": {
      "type": "array",
      "description": "These are the credentials that will be used for the assistant calls. By default, all the credentials are available for use in the call but you can provide a subset using this.",
      "items": {
        "type": "string"
      }
    },
    "server": {
      "description": "This is where Vapi will send webhooks. You can find all webhooks available along with their shape in ServerMessage schema.\n\nThe order of precedence is:\n\n1. assistant.server.url\n2. phoneNumber.serverUrl\n3. org.serverUrl",
      "allOf": [
        {
          "type": "object",
          "properties": {
            "timeoutSeconds": {
              "type": "number",
              "description": "This is the timeout in seconds for the request. Defaults to 20 seconds.\n\n@default 20"
            },
            "credentialId": {
              "type": "string",
              "description": "The credential ID for server authentication"
            },
            "staticIpAddressesEnabled": {
              "type": "boolean",
              "description": "If enabled, requests will originate from a static set of IPs owned and managed by Vapi.\n\n@default false"
            },
            "encryptedPaths": {
              "type": "array",
              "description": "This is the paths to encrypt in the request body if credentialId and encryptionPlan are defined.",
              "items": {
                "type": "object"
              }
            },
            "url": {
              "type": "string",
              "description": "This is where the request will be sent."
            },
            "headers": {
              "type": "object",
              "description": "These are the headers to include in the request.\n\nEach key-value pair represents a header name and its value.\n\nNote: Specifying an Authorization header here will override the authorization provided by the `credentialId` (if provided). This is an anti-pattern and should be avoided outside of edge cas"
            },
            "backoffPlan": {
              "description": "This is the backoff plan if the request fails. Defaults to undefined (the request will not be retried).\n\n@default undefined (the request will not be retried)"
            }
          }
        }
      ]
    }
  },
  "required": [
    "id",
    "orgId",
    "createdAt",
    "updatedAt"
  ]
}