{ "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "TranscriptionResponse", "type": "object", "properties": { "text": { "type": "string", "description": "The transcribed text." }, "task": { "type": "string", "description": "The task performed, always transcribe." }, "language": { "type": "string", "description": "The detected or specified language of the audio." }, "duration": { "type": "number", "description": "The duration of the input audio in seconds." }, "words": { "type": "array", "description": "Extracted words and their corresponding timestamps. Only present when timestamp_granularities includes word." }, "segments": { "type": "array", "description": "Segments of the transcribed text and their corresponding details. Only present when timestamp_granularities includes segment or response_format is verbose_json." } } }