{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "#/components/schemas/FallbackAssemblyAITranscriber", "title": "FallbackAssemblyAITranscriber", "type": "object", "properties": { "provider": { "type": "string", "description": "This is the transcription provider that will be used.", "enum": [ "assembly-ai" ] }, "language": { "type": "string", "description": "This is the language that will be set for the transcription.", "enum": [ "multi", "en" ] }, "confidenceThreshold": { "type": "number", "description": "Transcripts below this confidence threshold will be discarded.\n\n@default 0.4", "minimum": 0, "maximum": 1, "example": 0.4 }, "formatTurns": { "type": "boolean", "description": "This enables formatting of transcripts.\n\n@default true", "example": true }, "endOfTurnConfidenceThreshold": { "type": "number", "description": "This is the end of turn confidence threshold. The minimum confidence that the end of turn is detected.\nNote: Only used if startSpeakingPlan.smartEndpointingPlan is not set.\n@min 0\n@max 1\n@default 0.7", "minimum": 0, "maximum": 1, "example": 0.7 }, "minEndOfTurnSilenceWhenConfident": { "type": "number", "description": "This is the minimum end of turn silence when confident in milliseconds.\nNote: Only used if startSpeakingPlan.smartEndpointingPlan is not set.\n@default 160", "minimum": 0, "example": 160 }, "wordFinalizationMaxWaitTime": { "type": "number", "deprecated": true, "minimum": 0, "example": 160 }, "maxTurnSilence": { "type": "number", "description": "This is the maximum turn silence time in milliseconds.\nNote: Only used if startSpeakingPlan.smartEndpointingPlan is not set.\n@default 400", "minimum": 0, "example": 400 }, "vadAssistedEndpointingEnabled": { "type": "boolean", "description": "Use VAD to assist with endpointing decisions from the transcriber.\nWhen enabled, transcriber endpointing will be buffered if VAD detects the user is still speaking, preventing premature turn-taking.\nWhen disabled, transcriber endpointing will be used immediately regardless of VAD state, allowing for quicker but more aggressive turn-taking.\nNote: Only used if startSpeakingPlan.smartEndpointingPlan is not set.\n\n@default true", "example": true }, "speechModel": { "type": "string", "description": "This is the speech model used for the streaming session.\nNote: Keyterms prompting is not supported with multilingual streaming.\n@default 'universal-streaming-english'", "enum": [ "universal-streaming-english", "universal-streaming-multilingual" ] }, "realtimeUrl": { "type": "string", "description": "The WebSocket URL that the transcriber connects to." }, "wordBoost": { "description": "Add up to 2500 characters of custom vocabulary.", "type": "array", "items": { "type": "string", "maxLength": 2500 } }, "keytermsPrompt": { "description": "Keyterms prompting improves recognition accuracy for specific words and phrases.\nCan include up to 100 keyterms, each up to 50 characters.\nCosts an additional $0.04/hour when enabled.", "type": "array", "items": { "type": "string", "maxLength": 50 } }, "endUtteranceSilenceThreshold": { "type": "number", "description": "The duration of the end utterance silence threshold in milliseconds." }, "disablePartialTranscripts": { "type": "boolean", "description": "Disable partial transcripts.\nSet to `true` to not receive partial transcripts. Defaults to `false`." } }, "required": [ "provider" ] }