{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "#/components/schemas/SpeechToTextRequest", "title": "SpeechToTextRequest", "type": "object", "required": [ "file" ], "properties": { "file": { "type": "string", "format": "binary", "description": "The audio file to transcribe. Supports formats including MP3, WAV, FLAC, OGG, and M4A." }, "model_id": { "type": "string", "description": "The identifier of the speech-to-text model to use for transcription." }, "language_code": { "type": "string", "description": "Language code in ISO 639-1 format to hint the expected language of the audio content." }, "tag_audio_events": { "type": "boolean", "description": "Whether to tag non-speech audio events such as music, laughter, or applause in the transcription output.", "default": false }, "timestamps_granularity": { "type": "string", "description": "The level of timestamp granularity to include in the response.", "enum": [ "none", "word", "character" ] } } }