# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from __future__ import annotations from typing import Any, Mapping, Iterator, Optional, AsyncIterator, cast from typing_extensions import deprecated import httpx from ..types import ( TTSModel, ModelSpeed, InfillModel, SupportedLanguage, tts_infill_params, tts_generate_params, tts_generate_sse_params, ) from .._files import deepcopy_with_paths from .._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given from .._utils import extract_files, maybe_transform, async_maybe_transform from .._compat import cached_property from ..lib._tts import ( WebSocketContext as WebSocketContext, AsyncWebSocketContext as AsyncWebSocketContext, TTSResourceConnection as TTSResourceConnection, AsyncTTSResourceConnection as AsyncTTSResourceConnection, BackcompatWebSocketTtsOutput as BackcompatWebSocketTtsOutput, TTSResourceConnectionManager as TTSResourceConnectionManager, BackcompatTTSResourceConnection as BackcompatTTSResourceConnection, AsyncTTSResourceConnectionManager as AsyncTTSResourceConnectionManager, AsyncBackcompatTTSResourceConnection as AsyncBackcompatTTSResourceConnection, ) from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( BinaryAPIResponse, AsyncBinaryAPIResponse, StreamedBinaryAPIResponse, AsyncStreamedBinaryAPIResponse, to_raw_response_wrapper, to_streamed_response_wrapper, async_to_raw_response_wrapper, to_custom_raw_response_wrapper, async_to_streamed_response_wrapper, to_custom_streamed_response_wrapper, async_to_custom_raw_response_wrapper, async_to_custom_streamed_response_wrapper, ) from .._streaming import Stream, AsyncStream from .._base_client import make_request_options from ..types.tts_model import TTSModel from ..types.model_speed import ModelSpeed from ..types.infill_model import InfillModel from ..types.tts_sse_event import TTSSSEEvent from ..types.supported_language import SupportedLanguage from ..types.voice_specifier_param import VoiceSpecifierParam from ..types.generation_config_param import GenerationConfigParam from ..types.websocket_connection_options import WebsocketConnectionOptions __all__ = ["TTSResource", "AsyncTTSResource"] class TTSResource(SyncAPIResource): @cached_property def with_raw_response(self) -> TTSResourceWithRawResponse: """ This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/cartesia-ai/cartesia-python#accessing-raw-response-data-eg-headers """ return TTSResourceWithRawResponse(self) @cached_property def with_streaming_response(self) -> TTSResourceWithStreamingResponse: """ An alternative to `.with_raw_response` that doesn't eagerly read the response body. For more information, see https://www.github.com/cartesia-ai/cartesia-python#with_streaming_response """ return TTSResourceWithStreamingResponse(self) def generate( self, *, model_id: TTSModel, output_format: tts_generate_params.OutputFormat, transcript: str, voice: VoiceSpecifierParam, generation_config: GenerationConfigParam | Omit = omit, language: Optional[SupportedLanguage] | Omit = omit, pronunciation_dict_id: Optional[str] | Omit = omit, save: Optional[bool] | Omit = omit, speed: ModelSpeed | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> BinaryAPIResponse: """ Text-to-Speech (Bytes). The simplest way to stream generated audio. See [Compare TTS Endpoints](https://docs.cartesia.ai/use-the-api/compare-tts-endpoints) for details. Args: model_id: Text-to-speech models. See [the docs](https://docs.cartesia.ai/build-with-cartesia/tts-models/latest) for all options. generation_config: Configure the various attributes of the generated speech. These are only for `sonic-3` and have no effect on earlier models. See [Volume, Speed, and Emotion in Sonic-3](https://docs.cartesia.ai/build-with-cartesia/sonic-3/volume-speed-emotion) for a guide on this option. language: The language that the given voice should speak the transcript in. For valid options, see [Models](https://docs.cartesia.ai/build-with-cartesia/tts-models). pronunciation_dict_id: The ID of a pronunciation dictionary to use for the generation. Pronunciation dictionaries are supported by `sonic-3` models and newer. save: Whether to save the generated audio file. When true, the response will include a `Cartesia-File-ID` header. speed: Speed setting for the model. Defaults to `normal`. This feature is experimental and may not work for all voices. Influences the speed of the generated speech. Faster speeds may reduce hallucination rate. extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ extra_headers = {"Accept": "audio/wav", **(extra_headers or {})} return self._post( "/tts/bytes", body=maybe_transform( { "model_id": model_id, "output_format": output_format, "transcript": transcript, "voice": voice, "generation_config": generation_config, "language": language, "pronunciation_dict_id": pronunciation_dict_id, "save": save, "speed": speed, }, tts_generate_params.TTSGenerateParams, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=BinaryAPIResponse, ) def generate_sse( self, *, model_id: TTSModel, output_format: tts_generate_sse_params.OutputFormat, transcript: str, voice: VoiceSpecifierParam, add_phoneme_timestamps: Optional[bool] | Omit = omit, add_timestamps: Optional[bool] | Omit = omit, context_id: Optional[str] | Omit = omit, generation_config: GenerationConfigParam | Omit = omit, language: SupportedLanguage | Omit = omit, pronunciation_dict_id: Optional[str] | Omit = omit, speed: ModelSpeed | Omit = omit, use_normalized_timestamps: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Stream[TTSSSEEvent]: """ Text-to-Speech (SSE). Supports: - Streaming - Timestamps - context_id without transcript buffering See [Compare TTS Endpoints](https://docs.cartesia.ai/use-the-api/compare-tts-endpoints) for details. Args: model_id: Text-to-speech models. See [the docs](https://docs.cartesia.ai/build-with-cartesia/tts-models/latest) for all options. add_phoneme_timestamps: Whether to return phoneme-level timestamps. If `false` (default), no phoneme timestamps will be produced. If `true`, the server will return timestamp events containing phoneme-level timing information. add_timestamps: Whether to return word-level timestamps. If `false` (default), no word timestamps will be produced at all. If `true`, the server will return timestamp events containing word-level timing information. context_id: Optional context ID for this request. generation_config: Configure the various attributes of the generated speech. These are only for `sonic-3` and have no effect on earlier models. See [Volume, Speed, and Emotion in Sonic-3](https://docs.cartesia.ai/build-with-cartesia/sonic-3/volume-speed-emotion) for a guide on this option. language: The language that the given voice should speak the transcript in. For valid options, see [Models](https://docs.cartesia.ai/build-with-cartesia/tts-models). pronunciation_dict_id: The ID of a pronunciation dictionary to use for the generation. Pronunciation dictionaries are supported by `sonic-3` models and newer. speed: Speed setting for the model. Defaults to `normal`. This feature is experimental and may not work for all voices. Influences the speed of the generated speech. Faster speeds may reduce hallucination rate. use_normalized_timestamps: Whether to use normalized timestamps (True) or original timestamps (False). extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} return self._post( "/tts/sse", body=maybe_transform( { "model_id": model_id, "output_format": output_format, "transcript": transcript, "voice": voice, "add_phoneme_timestamps": add_phoneme_timestamps, "add_timestamps": add_timestamps, "context_id": context_id, "generation_config": generation_config, "language": language, "pronunciation_dict_id": pronunciation_dict_id, "speed": speed, "use_normalized_timestamps": use_normalized_timestamps, }, tts_generate_sse_params.TTSGenerateSSEParams, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=cast(Any, TTSSSEEvent), # Union types cannot be passed in as arguments in the type system stream=True, stream_cls=Stream[TTSSSEEvent], ) def infill( self, *, language: str | Omit = omit, left_audio: FileTypes | Omit = omit, model_id: InfillModel | Omit = omit, output_format: tts_infill_params.OutputFormat | Omit = omit, right_audio: FileTypes | Omit = omit, transcript: str | Omit = omit, voice_id: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> BinaryAPIResponse: """ Infill (Bytes). Generate audio that smoothly connects two existing audio segments. This is useful for inserting new speech between existing speech segments while maintaining natural transitions. **The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.** At least one of `left_audio` or `right_audio` must be provided. As with all generative models, there's some inherent variability, but here's some tips we recommend to get the best results from infill: - Use longer infill transcripts - This gives the model more flexibility to adapt to the rest of the audio - Target natural pauses in the audio when deciding where to clip - This means you don't need word-level timestamps to be as precise - Clip right up to the start and end of the audio segment you want infilled, keeping as much silence in the left/right audio segments as possible - This helps the model generate more natural transitions Args: language: The language of the transcript model_id: Infill models. See [the docs](https://docs.cartesia.ai/api-reference/infill/bytes#body-model-id) for all options. transcript: The infill text to generate voice_id: The ID of the voice to use for generating audio extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ extra_headers = {"Accept": "audio/wav", **(extra_headers or {})} body = deepcopy_with_paths( { "language": language, "left_audio": left_audio, "model_id": model_id, "output_format": output_format, "right_audio": right_audio, "transcript": transcript, "voice_id": voice_id, }, [["left_audio"], ["right_audio"]], ) files = extract_files(cast(Mapping[str, object], body), paths=[["left_audio"], ["right_audio"]]) # It should be noted that the actual Content-Type header that will be # sent to the server will contain a `boundary` parameter, e.g. # multipart/form-data; boundary=---abc-- extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} return self._post( "/infill/bytes", body=maybe_transform(body, tts_infill_params.TTSInfillParams), files=files, options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=BinaryAPIResponse, ) @deprecated("bytes() is deprecated; use .generate() instead") def bytes( self, *, model_id: str, output_format: tts_generate_params.OutputFormat, transcript: str, voice: VoiceSpecifierParam, duration: Optional[float] | Omit = omit, # noqa: ARG002 generation_config: GenerationConfigParam | Omit = omit, language: Optional[SupportedLanguage] | Omit = omit, pronunciation_dict_id: Optional[str] | Omit = omit, save: Optional[bool] | Omit = omit, speed: ModelSpeed | Omit = omit, extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Iterator[bytes]: """ Text-to-speech (Bytes). .. deprecated:: Use :meth:`generate` instead. """ response = self.generate( model_id=model_id, output_format=output_format, transcript=transcript, voice=voice, generation_config=generation_config, language=language, pronunciation_dict_id=pronunciation_dict_id, save=save, speed=speed, extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, ) return response.iter_bytes() sse = generate_sse # alias for backward compatibility def websocket_connect( self, extra_query: Query = {}, extra_headers: Headers = {}, websocket_connection_options: WebsocketConnectionOptions = {}, ) -> TTSResourceConnectionManager: """Text-to-Speech (WebSocket). Supports: - Streaming - Long-lived connections allow for lower latency by reusing a live network connection - Timestamps - Multiple TTS [contexts](https://docs.cartesia.ai/use-the-api/tts-websocket/contexts) over the same connection - [Context flushing](https://docs.cartesia.ai/use-the-api/tts-websocket/context-flushing-and-flush-i-ds) - [Transcript buffering](https://docs.cartesia.ai/use-the-api/tts-websocket/buffering) """ return TTSResourceConnectionManager( client=self._client, extra_query=extra_query, extra_headers=extra_headers, websocket_connection_options=websocket_connection_options, ) def websocket( self, extra_query: Query = {}, extra_headers: Headers = {}, websocket_connection_options: WebsocketConnectionOptions = {}, ) -> BackcompatTTSResourceConnection: """ SDK v2 compatible Text-to-Speech (WebSocket). Supports: - Streaming - Long-lived connections allow for lower latency by reusing a live network connection - Timestamps - Multiple TTS [contexts](https://docs.cartesia.ai/use-the-api/tts-websocket/contexts) over the same connection - [Context flushing](https://docs.cartesia.ai/use-the-api/tts-websocket/context-flushing-and-flush-i-ds) - [Transcript buffering](https://docs.cartesia.ai/use-the-api/tts-websocket/buffering) """ return BackcompatTTSResourceConnection( TTSResourceConnectionManager( client=self._client, extra_query=extra_query, extra_headers=extra_headers, websocket_connection_options=websocket_connection_options, ) ) class AsyncTTSResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncTTSResourceWithRawResponse: """ This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/cartesia-ai/cartesia-python#accessing-raw-response-data-eg-headers """ return AsyncTTSResourceWithRawResponse(self) @cached_property def with_streaming_response(self) -> AsyncTTSResourceWithStreamingResponse: """ An alternative to `.with_raw_response` that doesn't eagerly read the response body. For more information, see https://www.github.com/cartesia-ai/cartesia-python#with_streaming_response """ return AsyncTTSResourceWithStreamingResponse(self) async def generate( self, *, model_id: TTSModel, output_format: tts_generate_params.OutputFormat, transcript: str, voice: VoiceSpecifierParam, generation_config: GenerationConfigParam | Omit = omit, language: Optional[SupportedLanguage] | Omit = omit, pronunciation_dict_id: Optional[str] | Omit = omit, save: Optional[bool] | Omit = omit, speed: ModelSpeed | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> AsyncBinaryAPIResponse: """ Text-to-Speech (Bytes). The simplest way to stream generated audio. See [Compare TTS Endpoints](https://docs.cartesia.ai/use-the-api/compare-tts-endpoints) for details. Args: model_id: Text-to-speech models. See [the docs](https://docs.cartesia.ai/build-with-cartesia/tts-models/latest) for all options. generation_config: Configure the various attributes of the generated speech. These are only for `sonic-3` and have no effect on earlier models. See [Volume, Speed, and Emotion in Sonic-3](https://docs.cartesia.ai/build-with-cartesia/sonic-3/volume-speed-emotion) for a guide on this option. language: The language that the given voice should speak the transcript in. For valid options, see [Models](https://docs.cartesia.ai/build-with-cartesia/tts-models). pronunciation_dict_id: The ID of a pronunciation dictionary to use for the generation. Pronunciation dictionaries are supported by `sonic-3` models and newer. save: Whether to save the generated audio file. When true, the response will include a `Cartesia-File-ID` header. speed: Speed setting for the model. Defaults to `normal`. This feature is experimental and may not work for all voices. Influences the speed of the generated speech. Faster speeds may reduce hallucination rate. extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ extra_headers = {"Accept": "audio/wav", **(extra_headers or {})} return await self._post( "/tts/bytes", body=await async_maybe_transform( { "model_id": model_id, "output_format": output_format, "transcript": transcript, "voice": voice, "generation_config": generation_config, "language": language, "pronunciation_dict_id": pronunciation_dict_id, "save": save, "speed": speed, }, tts_generate_params.TTSGenerateParams, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=AsyncBinaryAPIResponse, ) async def generate_sse( self, *, model_id: TTSModel, output_format: tts_generate_sse_params.OutputFormat, transcript: str, voice: VoiceSpecifierParam, add_phoneme_timestamps: Optional[bool] | Omit = omit, add_timestamps: Optional[bool] | Omit = omit, context_id: Optional[str] | Omit = omit, generation_config: GenerationConfigParam | Omit = omit, language: SupportedLanguage | Omit = omit, pronunciation_dict_id: Optional[str] | Omit = omit, speed: ModelSpeed | Omit = omit, use_normalized_timestamps: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> AsyncStream[TTSSSEEvent]: """ Text-to-Speech (SSE). Supports: - Streaming - Timestamps - context_id without transcript buffering See [Compare TTS Endpoints](https://docs.cartesia.ai/use-the-api/compare-tts-endpoints) for details. Args: model_id: Text-to-speech models. See [the docs](https://docs.cartesia.ai/build-with-cartesia/tts-models/latest) for all options. add_phoneme_timestamps: Whether to return phoneme-level timestamps. If `false` (default), no phoneme timestamps will be produced. If `true`, the server will return timestamp events containing phoneme-level timing information. add_timestamps: Whether to return word-level timestamps. If `false` (default), no word timestamps will be produced at all. If `true`, the server will return timestamp events containing word-level timing information. context_id: Optional context ID for this request. generation_config: Configure the various attributes of the generated speech. These are only for `sonic-3` and have no effect on earlier models. See [Volume, Speed, and Emotion in Sonic-3](https://docs.cartesia.ai/build-with-cartesia/sonic-3/volume-speed-emotion) for a guide on this option. language: The language that the given voice should speak the transcript in. For valid options, see [Models](https://docs.cartesia.ai/build-with-cartesia/tts-models). pronunciation_dict_id: The ID of a pronunciation dictionary to use for the generation. Pronunciation dictionaries are supported by `sonic-3` models and newer. speed: Speed setting for the model. Defaults to `normal`. This feature is experimental and may not work for all voices. Influences the speed of the generated speech. Faster speeds may reduce hallucination rate. use_normalized_timestamps: Whether to use normalized timestamps (True) or original timestamps (False). extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} return await self._post( "/tts/sse", body=await async_maybe_transform( { "model_id": model_id, "output_format": output_format, "transcript": transcript, "voice": voice, "add_phoneme_timestamps": add_phoneme_timestamps, "add_timestamps": add_timestamps, "context_id": context_id, "generation_config": generation_config, "language": language, "pronunciation_dict_id": pronunciation_dict_id, "speed": speed, "use_normalized_timestamps": use_normalized_timestamps, }, tts_generate_sse_params.TTSGenerateSSEParams, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=cast(Any, TTSSSEEvent), # Union types cannot be passed in as arguments in the type system stream=True, stream_cls=AsyncStream[TTSSSEEvent], ) async def infill( self, *, language: str | Omit = omit, left_audio: FileTypes | Omit = omit, model_id: InfillModel | Omit = omit, output_format: tts_infill_params.OutputFormat | Omit = omit, right_audio: FileTypes | Omit = omit, transcript: str | Omit = omit, voice_id: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> AsyncBinaryAPIResponse: """ Infill (Bytes). Generate audio that smoothly connects two existing audio segments. This is useful for inserting new speech between existing speech segments while maintaining natural transitions. **The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.** At least one of `left_audio` or `right_audio` must be provided. As with all generative models, there's some inherent variability, but here's some tips we recommend to get the best results from infill: - Use longer infill transcripts - This gives the model more flexibility to adapt to the rest of the audio - Target natural pauses in the audio when deciding where to clip - This means you don't need word-level timestamps to be as precise - Clip right up to the start and end of the audio segment you want infilled, keeping as much silence in the left/right audio segments as possible - This helps the model generate more natural transitions Args: language: The language of the transcript model_id: Infill models. See [the docs](https://docs.cartesia.ai/api-reference/infill/bytes#body-model-id) for all options. transcript: The infill text to generate voice_id: The ID of the voice to use for generating audio extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ extra_headers = {"Accept": "audio/wav", **(extra_headers or {})} body = deepcopy_with_paths( { "language": language, "left_audio": left_audio, "model_id": model_id, "output_format": output_format, "right_audio": right_audio, "transcript": transcript, "voice_id": voice_id, }, [["left_audio"], ["right_audio"]], ) files = extract_files(cast(Mapping[str, object], body), paths=[["left_audio"], ["right_audio"]]) # It should be noted that the actual Content-Type header that will be # sent to the server will contain a `boundary` parameter, e.g. # multipart/form-data; boundary=---abc-- extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} return await self._post( "/infill/bytes", body=await async_maybe_transform(body, tts_infill_params.TTSInfillParams), files=files, options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=AsyncBinaryAPIResponse, ) @deprecated("bytes() is deprecated; use .generate() instead") async def bytes( self, *, model_id: str, output_format: tts_generate_params.OutputFormat, transcript: str, voice: VoiceSpecifierParam, duration: Optional[float] | Omit = omit, # noqa: ARG002 generation_config: GenerationConfigParam | Omit = omit, language: Optional[SupportedLanguage] | Omit = omit, pronunciation_dict_id: Optional[str] | Omit = omit, save: Optional[bool] | Omit = omit, speed: ModelSpeed | Omit = omit, extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> AsyncIterator[bytes]: """ Text-to-Speech (Bytes). .. deprecated:: Use :meth:`generate` instead. """ response = await self.generate( model_id=model_id, output_format=output_format, transcript=transcript, voice=voice, generation_config=generation_config, language=language, pronunciation_dict_id=pronunciation_dict_id, save=save, speed=speed, extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, ) return response.iter_bytes() sse = generate_sse # Alias for backward compatibility def websocket_connect( self, extra_query: Query = {}, extra_headers: Headers = {}, websocket_connection_options: WebsocketConnectionOptions = {}, ) -> AsyncTTSResourceConnectionManager: """Text-to-Speech (WebSocket). Supports: - Streaming - Long-lived connections allow for lower latency by reusing a live network connection - Timestamps - Multiple TTS [contexts](https://docs.cartesia.ai/use-the-api/tts-websocket/contexts) over the same connection - [Context flushing](https://docs.cartesia.ai/use-the-api/tts-websocket/context-flushing-and-flush-i-ds) - [Transcript buffering](https://docs.cartesia.ai/use-the-api/tts-websocket/buffering) """ return AsyncTTSResourceConnectionManager( client=self._client, extra_query=extra_query, extra_headers=extra_headers, websocket_connection_options=websocket_connection_options, ) async def websocket( self, extra_query: Query = {}, extra_headers: Headers = {}, websocket_connection_options: WebsocketConnectionOptions = {}, ) -> AsyncBackcompatTTSResourceConnection: """ SDK v2 compatible Text-to-Speech (WebSocket). Supports: - Streaming - Long-lived connections allow for lower latency by reusing a live network connection - Timestamps - Multiple TTS [contexts](https://docs.cartesia.ai/use-the-api/tts-websocket/contexts) over the same connection - [Context flushing](https://docs.cartesia.ai/use-the-api/tts-websocket/context-flushing-and-flush-i-ds) - [Transcript buffering](https://docs.cartesia.ai/use-the-api/tts-websocket/buffering) """ return AsyncBackcompatTTSResourceConnection( AsyncTTSResourceConnectionManager( client=self._client, extra_query=extra_query, extra_headers=extra_headers, websocket_connection_options=websocket_connection_options, ) ) class TTSResourceWithRawResponse: def __init__(self, tts: TTSResource) -> None: self._tts = tts self.generate = to_custom_raw_response_wrapper( tts.generate, BinaryAPIResponse, ) self.generate_sse = to_raw_response_wrapper( tts.generate_sse, ) self.infill = to_custom_raw_response_wrapper( tts.infill, BinaryAPIResponse, ) self.sse = self.generate_sse # Alias for backward compatibility class AsyncTTSResourceWithRawResponse: def __init__(self, tts: AsyncTTSResource) -> None: self._tts = tts self.generate = async_to_custom_raw_response_wrapper( tts.generate, AsyncBinaryAPIResponse, ) self.generate_sse = async_to_raw_response_wrapper( tts.generate_sse, ) self.infill = async_to_custom_raw_response_wrapper( tts.infill, AsyncBinaryAPIResponse, ) self.sse = self.generate_sse # Alias for backward compatibility class TTSResourceWithStreamingResponse: def __init__(self, tts: TTSResource) -> None: self._tts = tts self.generate = to_custom_streamed_response_wrapper( tts.generate, StreamedBinaryAPIResponse, ) self.generate_sse = to_streamed_response_wrapper( tts.generate_sse, ) self.infill = to_custom_streamed_response_wrapper( tts.infill, StreamedBinaryAPIResponse, ) self.sse = self.generate_sse # Alias for backward compatibility class AsyncTTSResourceWithStreamingResponse: def __init__(self, tts: AsyncTTSResource) -> None: self._tts = tts self.generate = async_to_custom_streamed_response_wrapper( tts.generate, AsyncStreamedBinaryAPIResponse, ) self.generate_sse = async_to_streamed_response_wrapper( tts.generate_sse, ) self.infill = async_to_custom_streamed_response_wrapper( tts.infill, AsyncStreamedBinaryAPIResponse, ) self.sse = self.generate_sse # Alias for backward compatibility