openapi: 3.1.0
info:
  title: Together APIs
  description: The Together REST API. Please see https://docs.together.ai for more details.
  version: 2.0.0
  termsOfService: https://www.together.ai/terms-of-service
  contact:
    name: Together Support
    url: https://www.together.ai/contact
  license:
    name: MIT
    url: https://github.com/togethercomputer/openapi/blob/main/LICENSE
servers:
  - url: https://api.together.ai/v1
security:
  - bearerAuth: []
paths:
  /deployments:
    get:
      description: Get a list of all deployments in your project
      summary: Get the list of deployments
      tags:
        - Deployments
      responses:
        '200':
          description: List of deployments
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DeploymentListResponse'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            deployments = client.beta.jig.list()
            print(deployments)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployments = await client.beta.jig.list();
            console.log(deployments);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployments = await client.beta.jig.list();
            console.log(deployments);
        - lang: Shell
          label: cURL
          source: |
            curl -X GET \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/deployments
    post:
      description: Create a new deployment with specified configuration
      summary: Create a new deployment
      tags:
        - Deployments
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            deployment = client.beta.jig.deploy(
              name="my-deployment",
              gpu_type="h100-80gb",
              image="registry.together.ai/proj_abcdefg1234567890/my-image:latest"
            )
            print(deployment)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployment = await client.beta.jig.deploy({
              name: "my-deployment",
              gpu_type: "h100-80gb",
              image: "registry.together.ai/proj_abcdefg1234567890/my-image:latest"
            });
            console.log(deployment);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployment = await client.beta.jig.deploy({
              name: "my-deployment",
              gpu_type: "h100-80gb",
              image: "registry.together.ai/proj_abcdefg1234567890/my-image:latest"
            });
            console.log(deployment);
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateDeploymentRequest'
        description: Deployment configuration
        required: true
      responses:
        '200':
          description: Deployment created successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DeploymentResponseItem'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
  /deployments/{id}:
    delete:
      description: Delete an existing deployment
      summary: Delete a deployment
      tags:
        - Deployments
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Deployment ID or name
            type: string
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            deployment = client.beta.jig.destroy("my-deployment")
            print(deployment)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployment = await client.beta.jig.destroy("my-deployment");
            console.log(deployment);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployment = await client.beta.jig.destroy("my-deployment");
            console.log(deployment);
        - lang: Shell
          label: cURL
          source: |
            curl -X DELETE \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/deployments/my-deployment
      responses:
        '200':
          description: Deployment deleted successfully
          content:
            application/json:
              schema:
                type: object
        '404':
          description: Deployment not found
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
    get:
      description: Retrieve details of a specific deployment by its ID or name
      summary: Get a deployment by ID or name
      tags:
        - Deployments
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Deployment ID or name
            type: string
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            deployment = client.beta.jig.retrieve("my-deployment")
            print(deployment)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployment = await client.beta.jig.retrieve("my-deployment");
            console.log(deployment);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployment = await client.beta.jig.retrieve("my-deployment");
            console.log(deployment);
      responses:
        '200':
          description: Deployment details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DeploymentResponseItem'
        '404':
          description: Deployment not found
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
    patch:
      description: Update an existing deployment configuration
      summary: Update a deployment
      tags:
        - Deployments
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            deployment = client.beta.jig.update("my-deployment", gpu_count=2)
            print(deployment)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            const deployment = await client.beta.jig.update("my-deployment", {
            gpu_count: 2 });

            console.log(deployment);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            const deployment = await client.beta.jig.update("my-deployment", {
            gpu_count: 2 });

            console.log(deployment);
        - lang: Shell
          label: cURL
          source: |
            curl -X PATCH \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  --data '{ "gpu_count": 2 }' \
                  https://api.together.ai/v1/deployments/my-deployment
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Deployment ID or name
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/UpdateDeploymentRequest'
        description: Updated deployment configuration
        required: true
      responses:
        '200':
          description: Deployment updated successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DeploymentResponseItem'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                type: object
        '404':
          description: Deployment not found
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
  /deployments/{id}/logs:
    get:
      description: Retrieve logs from a deployment, optionally filtered by replica ID.
      summary: Get logs for a deployment
      tags:
        - Deployments
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            deployment = client.beta.jig.logs("my-deployment")
            print(deployment)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployment = await client.beta.jig.logs("my-deployment");
            console.log(deployment);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const deployment = await client.beta.jig.logs("my-deployment");
            console.log(deployment);
        - lang: Shell
          label: cURL
          source: |
            curl -X GET \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/deployments/my-deployment/logs
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Deployment ID or name
            type: string
        - name: replica_id
          in: query
          required: false
          schema:
            description: Replica ID to filter logs
            type: string
      responses:
        '200':
          description: Deployment logs
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DeploymentLogs'
        '404':
          description: Deployment not found
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
  /deployments/secrets:
    get:
      description: Retrieve all secrets in your project
      summary: Get the list of project secrets
      tags:
        - Secrets
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            secrets = client.beta.jig.secrets.list()
            print(secrets)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const secrets = await client.beta.jig.secrets.list();
            console.log(secrets);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const secrets = await client.beta.jig.secrets.list();
            console.log(secrets);
      responses:
        '200':
          description: List of secrets
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListSecretsResponse'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
    post:
      description: Create a new secret to store sensitive configuration values
      summary: Create a new secret
      tags:
        - Secrets
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            client.beta.jig.secrets.create(name="my-secret", value="my-value")
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            await client.beta.jig.secrets.create({ name: "my-secret", value:
            "my-value" });
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            await client.beta.jig.secrets.create({ name: "my-secret", value:
            "my-value" });
        - lang: Shell
          label: cURL
          source: |
            curl -X POST \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  --data '{ "name": "my-secret", "value": "my-value" }' \
                  https://api.together.ai/v1/deployments/secrets
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateSecretRequest'
        description: Secret configuration
        required: true
      responses:
        '200':
          description: Secret created successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SecretResponseItem'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
  /deployments/secrets/{id}:
    delete:
      description: Delete an existing secret
      summary: Delete a secret
      tags:
        - Secrets
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            client.beta.jig.secrets.delete("my-secret")
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            await client.beta.jig.secrets.delete("my-secret");
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            await client.beta.jig.secrets.delete("my-secret");
        - lang: Shell
          label: cURL
          source: |
            curl -X DELETE \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/deployments/secrets/my-secret
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Secret ID or name
            type: string
      responses:
        '200':
          description: Secret deleted successfully
          content:
            application/json:
              schema:
                type: object
        '404':
          description: Secret not found
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
    get:
      description: Retrieve details of a specific secret by its ID or name
      summary: Get a secret by ID or name
      tags:
        - Secrets
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            secret = client.beta.jig.secrets.retrieve("my-secret")
            print(secret)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const secret = await client.beta.jig.secrets.retrieve("my-secret");
            console.log(secret);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const secret = await client.beta.jig.secrets.retrieve("my-secret");
            console.log(secret);
        - lang: Shell
          label: cURL
          source: |
            curl -X GET \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/deployments/secrets/my-secret
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Secret ID or name
            type: string
      responses:
        '200':
          description: Secret details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SecretResponseItem'
        '404':
          description: Secret not found
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
    patch:
      description: Update an existing secret's value or metadata
      summary: Update a secret
      tags:
        - Secrets
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            client.beta.jig.secrets.update("my-secret", value="my-new-value")
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            await client.beta.jig.secrets.update("my-secret", { value:
            "my-new-value" });
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            await client.beta.jig.secrets.update("my-secret", { value:
            "my-new-value" });
        - lang: Shell
          label: cURL
          source: |
            curl -X PATCH \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  --data '{ "value": "my-new-value" }' \
                  https://api.together.ai/v1/deployments/secrets/my-secret
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Secret ID or name
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/UpdateSecretRequest'
        description: Updated secret configuration
        required: true
      responses:
        '200':
          description: Secret updated successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SecretResponseItem'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                type: object
        '404':
          description: Secret not found
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
  /deployments/storage/{filename}:
    get:
      description: Download a file by redirecting to a signed URL
      summary: Download a file
      tags:
        - DeploymentsStorage
      parameters:
        - name: filename
          in: path
          required: true
          schema:
            description: Filename
            type: string
      responses:
        '307':
          description: Redirect to signed download URL
          content:
            application/json:
              schema:
                type: string
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                additionalProperties:
                  type: string
                type: object
        '404':
          description: File not found
          content:
            application/json:
              schema:
                additionalProperties:
                  type: string
                type: object
        '500':
          description: Internal error
          content:
            application/json:
              schema:
                additionalProperties:
                  type: string
                type: object
  /deployments/storage/volumes:
    get:
      description: Retrieve all volumes in your project
      summary: Get the list of project volumes
      tags:
        - DeploymentsVolumes
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            volumes = client.beta.jig.storage.volumes.list()
            print(volumes)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const volumes = await client.beta.jig.storage.volumes.list();
            console.log(volumes);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const volumes = await client.beta.jig.storage.volumes.list();
            console.log(volumes);
        - lang: Shell
          label: cURL
          source: |
            curl -X GET \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/deployments/storage/volumes
      responses:
        '200':
          description: List of volumes
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListVolumesResponse'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
    post:
      description: Create a new volume to preload files in deployments
      summary: Create a new volume
      tags:
        - DeploymentsVolumes
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            volumes = client.beta.jig.volumes.create(name="my-volume")
            print(volumes)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            const volumes = await client.beta.jig.volumes.create({ name:
            "my-volume" });

            console.log(volumes);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            const volumes = await client.beta.jig.volumes.create({ name:
            "my-volume" });

            console.log(volumes);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  --data '{ "name": "my-volume" }' \
                  https://api.together.ai/v1/deployments/storage/volumes
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateVolumeRequest'
        description: Volume configuration
        required: true
      responses:
        '200':
          description: Volume created successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/VolumeResponseItem'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
  /deployments/storage/volumes/{id}:
    delete:
      description: Delete an existing volume
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Volume ID or name
            type: string
      responses:
        '200':
          description: Volume deleted successfully
          content:
            application/json:
              schema:
                type: object
        '404':
          description: Volume not found
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
      summary: Delete a volume
      tags:
        - Volumes
    get:
      description: Retrieve details of a specific volume by its ID or name
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Volume ID or name
            type: string
      responses:
        '200':
          description: Volume details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/VolumeResponseItem'
        '404':
          description: Volume not found
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
      summary: Get a volume by ID or name
      tags:
        - Volumes
    patch:
      description: Update an existing volume's configuration or contents
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Volume ID or name.
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/UpdateVolumeRequest'
        description: Updated volume configuration
        required: true
      responses:
        '200':
          description: Volume updated successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/VolumeResponseItem'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                type: object
        '404':
          description: Volume not found
          content:
            application/json:
              schema:
                type: object
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
      summary: Update a volume
      tags:
        - Volumes
  /voices:
    get:
      tags:
        - Voices
      summary: Fetch available voices for each model
      description: Fetch available voices for each model
      operationId: fetchVoices
      responses:
        '200':
          description: Success
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListVoicesResponse'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.audio.voices.list()

            print(response.data)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.audio.voices.list()

            print(response.data)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.audio.voices.list()

            console.log(response.data);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.audio.voices.list()

            console.log(response.data);
  /videos/{id}:
    get:
      tags:
        - Video
      summary: Fetch video metadata
      description: Fetch video metadata
      servers:
        - url: https://api.together.ai/v2
      operationId: retrieveVideo
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.videos.retrieve(video_id)

            print(response.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.videos.retrieve(video_id)

            print(response.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.videos.retrieve(videoId);

            console.log(response.status);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.videos.retrieve(videoId);

            console.log(response.status);
      parameters:
        - in: path
          name: id
          required: true
          schema:
            description: Identifier of video from create response.
            type: string
      responses:
        '200':
          description: Success
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/VideoJob'
        '400':
          description: Invalid request parameters.
        '404':
          description: Video ID not found.
  /videos:
    post:
      tags:
        - Video
      summary: Create video
      description: Create a video
      operationId: createVideo
      servers:
        - url: https://api.together.ai/v2
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.videos.create(
                model="together/video-model",
                prompt="A cartoon of an astronaut riding a horse on the moon"
            )

            print(response.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.videos.create(
                model="together/video-model",
                prompt="A cartoon of an astronaut riding a horse on the moon"
            )

            print(response.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.videos.create({
              model: "together/video-model",
              prompt: "A cartoon of an astronaut riding a horse on the moon",
            });

            console.log(response.id);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.videos.create({
              model: "together/video-model",
              prompt: "A cartoon of an astronaut riding a horse on the moon",
            });

            console.log(response.id);
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateVideoBody'
      responses:
        '200':
          description: Success
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/VideoJob'
  /chat/completions:
    post:
      tags:
        - Chat
      summary: Create chat completion
      description: >-
        Generate a model response for a given chat conversation. Supports single
        queries and multi-turn conversations with system, user, and assistant
        messages.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.chat.completions.create(
                model="Qwen/Qwen3.5-9B",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": "What are some fun things to do in New York?"},
                ],
                reasoning={"enabled": False}
            )
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.chat.completions.create(
                model="Qwen/Qwen3.5-9B",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": "What are some fun things to do in New York?"},
                ],
                reasoning={"enabled": False}
            )

            print(response.choices[0].message.content)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.chat.completions.create({
              model: "Qwen/Qwen3.5-9B",
              messages: [
                { role: "system", content: "You are a helpful assistant." },
                { role: "user", "content": "What are some fun things to do in New York?" },
              ],
              reasoning: { enabled: false },
            });

            console.log(response.choices[0].message?.content);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.chat.completions.create({
              model: "Qwen/Qwen3.5-9B",
              messages: [
                { role: "system", content: "You are a helpful assistant." },
                { role: "user", "content": "What are some fun things to do in New York?" },
              ],
              reasoning: { enabled: false },
            });

            console.log(response.choices[0].message?.content);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/chat/completions" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "model": "Qwen/Qwen3.5-9B",
                   "messages": [
                     {"role": "system", "content": "You are a helpful assistant."},
                     {"role": "user", "content": "What are some fun things to do in New York?"}
                   ],
                   "reasoning": {"enabled": false}
                 }'
      operationId: chat-completions
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
      responses:
        '200':
          description: '200'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
            text/event-stream:
              schema:
                $ref: '#/components/schemas/ChatCompletionStream'
        '400':
          description: BadRequest
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '404':
          description: NotFound
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '429':
          description: RateLimit
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '503':
          description: Overloaded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '504':
          description: Timeout
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      deprecated: false
  /completions:
    post:
      tags:
        - Completion
      summary: Create completion
      description: >-
        Generate text completions for a given prompt using a language, code, or
        image model.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.completions.create(
                model="Qwen/Qwen3.5-9B",
                prompt="The largest city in France is",
                max_tokens=1
            )

            print(response.choices[0].text)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.completions.create(
                model="Qwen/Qwen3.5-9B",
                prompt="The largest city in France is",
                max_tokens=1
            )

            print(response.choices[0].text)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.completions.create({
              model: "Qwen/Qwen3.5-9B",
              prompt: "The largest city in France is",
              max_tokens: 1,
            });

            console.log(response.choices[0].text);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.completions.create({
              model: "Qwen/Qwen3.5-9B",
              prompt: "The largest city in France is",
              max_tokens: 1
            });

            console.log(response.choices[0].text);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/completions" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "model": "Qwen/Qwen3.5-9B",
                   "prompt": "The largest city in France is",
                   "max_tokens": 1
                 }'
      operationId: completions
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CompletionRequest'
      responses:
        '200':
          description: '200'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CompletionResponse'
            text/event-stream:
              schema:
                $ref: '#/components/schemas/CompletionStream'
        '400':
          description: BadRequest
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '404':
          description: NotFound
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '429':
          description: RateLimit
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '503':
          description: Overloaded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '504':
          description: Timeout
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      deprecated: false
  /embeddings:
    post:
      tags:
        - Embeddings
      summary: Create embedding
      description: >-
        Generate vector embeddings for one or more text inputs. Returns
        numerical arrays representing semantic meaning, useful for search,
        classification, and retrieval.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.embeddings.create(
                model="BAAI/bge-large-en-v1.5",
                input="New York City",
            )

            print(response.data[0].embedding)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.embeddings.create(
                model="BAAI/bge-large-en-v1.5",
                input="New York City",
            )

            print(response.data[0].embedding)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.embeddings.create({
              model: "BAAI/bge-large-en-v1.5",
              input: "New York City",
            });

            console.log(response.data[0].embedding);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.embeddings.create({
              model: "BAAI/bge-large-en-v1.5",
              input: "New York City",
            });

            console.log(response.data[0].embedding);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/embeddings" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "model": "BAAI/bge-large-en-v1.5",
                   "input": "New York City"
                 }'
      operationId: embeddings
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EmbeddingsRequest'
      responses:
        '200':
          description: '200'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EmbeddingsResponse'
        '400':
          description: BadRequest
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '404':
          description: NotFound
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '429':
          description: RateLimit
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '503':
          description: Overloaded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '504':
          description: Timeout
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      deprecated: false
  /models:
    get:
      tags:
        - Models
      summary: List all models
      description: Lists all of Together's open-source models
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            models = client.models.list()

            for model in models:
                print(model.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            models = client.models.list()

            for model in models:
                print(model.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const models = await client.models.list();

            for (const model of models) {
              console.log(model.id);
            }
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const models = await client.models.list();

            for (const model of models) {
              console.log(model.id);
            }
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/models" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      operationId: models
      parameters:
        - name: dedicated
          in: query
          schema:
            description: Filter models to only return dedicated models
            type: boolean
      responses:
        '200':
          description: '200'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelInfoList'
        '400':
          description: BadRequest
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '404':
          description: NotFound
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '429':
          description: RateLimit
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '504':
          description: Timeout
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      deprecated: false
    post:
      tags:
        - Models
      summary: Upload a custom model or adapter
      description: Upload a custom model or adapter from Hugging Face or S3
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.models.upload(
                model_name="My-Fine-Tuned-Model",
                model_source="https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
            )

            print(response.data.job_id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.models.upload(
                "My-Fine-Tuned-Model",
                "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
            )

            print(response.job_id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.models.upload({
              model_name: "My-Fine-Tuned-Model",
              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
            })

            console.log(response);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.models.upload({
              model_name: "My-Fine-Tuned-Model",
              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
            })

            console.log(response);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/models" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                    "model_name": "My-Fine-Tuned-Model",
                    "model_source": "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz"
                  }'
      operationId: uploadModel
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ModelUploadRequest'
      responses:
        '200':
          description: Model / adapter upload job created successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelUploadSuccessResponse'
  /jobs/{jobId}:
    get:
      tags:
        - Jobs
      summary: Get job status
      description: Get the status of a specific job
      operationId: getJob
      parameters:
        - name: jobId
          in: path
          required: true
          schema:
            example: job-a15dad11-8d8e-4007-97c5-a211304de284
            description: The ID of the job to retrieve
            type: string
      responses:
        '200':
          description: Job status retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JobInfoSuccessResponse'
  /jobs:
    get:
      tags:
        - Jobs
      summary: List all jobs
      description: List all jobs and their statuses
      operationId: listJobs
      responses:
        '200':
          description: Jobs retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JobsInfoSuccessResponse'
  /images/generations:
    post:
      tags:
        - Images
      summary: Create image
      description: Use an image model to generate an image for a given prompt.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.images.generate(
                model="black-forest-labs/FLUX.1-schnell",
                steps=4,
                prompt="A cartoon of an astronaut riding a horse on the moon",
            )

            print(response.data[0].url)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.images.generate(
                model="black-forest-labs/FLUX.1-schnell",
                steps=4,
                prompt="A cartoon of an astronaut riding a horse on the moon",
            )

            print(response.data[0].url)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.images.generate({
              model: "black-forest-labs/FLUX.1-schnell",
              prompt: "A cartoon of an astronaut riding a horse on the moon",
            });

            console.log(response.data[0].url);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.images.generate({
              model: "black-forest-labs/FLUX.1-schnell",
              prompt: "A cartoon of an astronaut riding a horse on the moon",
            });

            console.log(response.data[0].url);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/images/generations" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "model": "black-forest-labs/FLUX.1-schnell",
                   "prompt": "A cartoon of an astronaut riding a horse on the moon"
                 }'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - prompt
                - model
              properties:
                prompt:
                  type: string
                  description: >-
                    A description of the desired images. Maximum length varies
                    by model.
                  example: cat floating in space, cinematic
                model:
                  type: string
                  description: >
                    The model to use for image generation.<br> <br> [See all of
                    Together AI's image
                    models](https://docs.together.ai/docs/serverless-models#image-models)
                  example: black-forest-labs/FLUX.1-schnell
                  anyOf:
                    - type: string
                      enum:
                        - black-forest-labs/FLUX.1-schnell-Free
                        - black-forest-labs/FLUX.1-schnell
                        - black-forest-labs/FLUX.1.1-pro
                    - type: string
                steps:
                  type: integer
                  default: 20
                  description: Number of generation steps.
                image_url:
                  type: string
                  description: URL of an image to use for image models that support it.
                seed:
                  type: integer
                  description: >-
                    Seed used for generation. Can be used to reproduce image
                    generations.
                'n':
                  type: integer
                  default: 1
                  description: Number of image results to generate.
                height:
                  type: integer
                  default: 1024
                  description: Height of the image to generate in number of pixels.
                width:
                  type: integer
                  default: 1024
                  description: Width of the image to generate in number of pixels.
                negative_prompt:
                  type: string
                  description: The prompt or prompts not to guide the image generation.
                response_format:
                  type: string
                  description: >-
                    Format of the image response. Can be either a base64 string
                    or a URL.
                  enum:
                    - base64
                    - url
                guidance_scale:
                  type: number
                  description: >-
                    Adjusts the alignment of the generated image with the input
                    prompt. Higher values (e.g., 8-10) make the output more
                    faithful to the prompt, while lower values (e.g., 1-5)
                    encourage more creative freedom.
                  default: 3.5
                output_format:
                  type: string
                  description: >-
                    The format of the image response. Can be either be `jpeg` or
                    `png`. Defaults to `jpeg`.
                  default: jpeg
                  enum:
                    - jpeg
                    - png
                image_loras:
                  description: >-
                    An array of objects that define LoRAs (Low-Rank Adaptations)
                    to influence the generated image.
                  type: array
                  items:
                    type: object
                    required:
                      - path
                      - scale
                    properties:
                      path:
                        type: string
                        description: >-
                          The URL of the LoRA to apply (e.g.
                          https://huggingface.co/strangerzonehf/Flux-Midjourney-Mix2-LoRA).
                      scale:
                        type: number
                        description: >-
                          The strength of the LoRA's influence. Most LoRA's
                          recommend a value of 1.
                reference_images:
                  description: >-
                    An array of image URLs that guide the overall appearance and
                    style of the generated image. These reference images
                    influence the visual characteristics consistently across the
                    generation.
                  type: array
                  items:
                    type: string
                    description: URL of a reference image to guide the image generation.
                disable_safety_checker:
                  type: boolean
                  description: If true, disables the safety checker for image generation.
      responses:
        '200':
          description: Image generated successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ImageResponse'
  /files:
    get:
      tags:
        - Files
      summary: List all files
      description: List the metadata for all uploaded data files.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.files.list()

            for file in response.data:
                print(file.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.files.list()

            for file in response.data:
                print(file.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.files.list();

            for (const file of response.data) {
              console.log(file.id);
            }
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.files.list();

            for (const file of response.data) {
              console.log(file.id);
            }
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/files" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      responses:
        '200':
          description: List of files
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FileList'
  /files/{id}:
    get:
      tags:
        - Files
      summary: Retrieve file metadata
      description: Retrieve the metadata for a single uploaded data file.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            file = client.files.retrieve(id="file-id")

            print(file)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            file = client.files.retrieve(id="file-id")

            print(file)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const file = await client.files.retrieve("file-id");

            console.log(file);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const file = await client.files.retrieve("file-id");

            console.log(file);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/files/ID" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: The ID of the file to retrieve
            type: string
      responses:
        '200':
          description: File retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FileResponse'
    delete:
      tags:
        - Files
      summary: Delete a file
      description: Delete a previously uploaded data file.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.files.delete(id="file-id")

            print(response)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.files.delete(id="file-id")

            print(response)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.files.delete("file-id");

            console.log(response);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.files.delete("file-id");

            console.log(response);
        - lang: Shell
          label: cURL
          source: |
            curl -X "DELETE" "https://api.together.ai/v1/files/file-id" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY"
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: The ID of the file to delete
            type: string
      responses:
        '200':
          description: File deleted successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FileDeleteResponse'
  /files/{id}/content:
    get:
      tags:
        - Files
      summary: Get file contents
      description: Get the contents of a single uploaded data file.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: >
            # Docs for v1 can be found by changing the above selector ^

            from together import Together

            import os


            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )


            with client.files.with_streaming_response.content(id="file-id") as
            response:
              for line in response.iter_lines():
                print(line)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            file = client.files.retrieve_content(id="file-id")

            print(file.filename)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.files.content("file-id");
            const content = await response.text();

            console.log(content);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.files.content("file-id");
            const content = await response.text();

            console.log(content);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/files/file-id/content" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: The ID of the file to get the content of
            type: string
      responses:
        '200':
          description: File content retrieved successfully
          content:
            text/plain:
              schema:
                type: string
                format: binary
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /files/upload:
    post:
      tags:
        - Files
      summary: Upload a file
      description: Upload a file with specified purpose, file name, and file type.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            current_dir = os.path.dirname(os.path.abspath(__file__))
            file_path = os.path.join(current_dir, "data.jsonl")
            file = client.files.upload(file=file_path)

            print(file.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            current_dir = os.path.dirname(os.path.abspath(__file__))
            file_path = os.path.join(current_dir, "data.jsonl")
            file = client.files.upload(file=file_path)

            print(file.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import { upload } from "together-ai/lib/upload"
            import path from "path";
            import { fileURLToPath } from "url";

            const __filename = fileURLToPath(import.meta.url);
            const __dirname = path.dirname(__filename);
            const filepath = path.join(__dirname, "data.jsonl");
            const file = await upload(filepath);

            console.log(file.id);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import { upload } from "together-ai/lib/upload"
            import path from "path";
            import { fileURLToPath } from "url";

            const __filename = fileURLToPath(import.meta.url);
            const __dirname = path.dirname(__filename);
            const filepath = path.join(__dirname, "data.jsonl");
            const file = await upload(filepath);

            console.log(file.id);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/files/upload" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -F "file=@/path/to/data.jsonl" \
                 -F "file_name=data.jsonl" \
                 -F "purpose=fine-tune"
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required:
                - purpose
                - file_name
                - file
              properties:
                purpose:
                  $ref: '#/components/schemas/FilePurpose'
                file_name:
                  type: string
                  description: The name of the file being uploaded
                  example: dataset.csv
                file_type:
                  $ref: '#/components/schemas/FileType'
                file:
                  type: string
                  format: binary
                  description: The content of the file being uploaded
      responses:
        '200':
          description: File uploaded successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FileResponse'
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /fine-tunes:
    post:
      tags:
        - Fine-tuning
      summary: Create job
      description: Create a fine-tuning job with the provided model and training data.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.fine_tuning.create(
                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
                training_file="file-id"
            )

            print(response)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.fine_tuning.create(
                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
                training_file="file-id"
            )

            print(response)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.fineTuning.create({
              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
              training_file: "file-id",
            });

            console.log(response);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.fineTuning.create({
              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
              training_file: "file-id",
            });

            console.log(response);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/fine-tunes" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
                   "training_file": "file-id"
                 }'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - training_file
                - model
              properties:
                training_file:
                  type: string
                  description: File-ID of a training file uploaded to the Together API
                validation_file:
                  type: string
                  description: File-ID of a validation file uploaded to the Together API
                packing:
                  type: boolean
                  default: true
                  description: Whether to use sequence packing for training.
                max_seq_length:
                  type: integer
                  description: Maximum sequence length to use for training.
                model:
                  type: string
                  description: Name of the base model to run fine-tune job on
                n_epochs:
                  type: integer
                  default: 1
                  description: >-
                    Number of complete passes through the training dataset
                    (higher values may improve results but increase cost and
                    risk of overfitting)
                n_checkpoints:
                  type: integer
                  default: 1
                  description: >-
                    Number of intermediate model versions saved during training
                    for evaluation
                n_evals:
                  type: integer
                  default: 0
                  description: >-
                    Number of evaluations to be run on a given validation set
                    during training
                batch_size:
                  oneOf:
                    - type: integer
                    - type: string
                      enum:
                        - max
                  default: max
                  description: >-
                    Number of training examples processed together (larger
                    batches use more memory but may train faster). Defaults to
                    "max". We use training optimizations like packing, so the
                    effective batch size may be different than the value you
                    set.
                learning_rate:
                  type: number
                  format: float
                  default: 0.00001
                  description: >-
                    Controls how quickly the model adapts to new information
                    (too high may cause instability, too low may slow
                    convergence)
                lr_scheduler:
                  type: object
                  default: none
                  $ref: '#/components/schemas/LRScheduler'
                  description: >-
                    The learning rate scheduler to use. It specifies how the
                    learning rate is adjusted during training.
                warmup_ratio:
                  type: number
                  format: float
                  default: 0
                  description: >-
                    The percent of steps at the start of training to linearly
                    increase the learning rate.
                max_grad_norm:
                  type: number
                  format: float
                  default: 1
                  description: >-
                    Max gradient norm to be used for gradient clipping. Set to 0
                    to disable.
                weight_decay:
                  type: number
                  format: float
                  default: 0
                  description: Weight decay. Regularization parameter for the optimizer.
                random_seed:
                  type: integer
                  nullable: true
                  description: >
                    Random seed for reproducible training. When set, the same
                    seed produces the same run (e.g. data shuffle, init). If
                    omitted or null, the server applies its default seed (e.g.
                    42).
                suffix:
                  type: string
                  description: Suffix that will be added to your fine-tuned model name
                wandb_api_key:
                  type: string
                  description: >-
                    Integration key for tracking experiments and model metrics
                    on W&B platform
                wandb_base_url:
                  type: string
                  description: The base URL of a dedicated Weights & Biases instance.
                wandb_project_name:
                  type: string
                  description: >-
                    The Weights & Biases project for your run. If not specified,
                    will use `together` as the project name.
                wandb_name:
                  type: string
                  description: The Weights & Biases name for your run.
                wandb_entity:
                  type: string
                  description: The Weights & Biases entity for your run.
                train_on_inputs:
                  oneOf:
                    - type: boolean
                    - type: string
                      enum:
                        - auto
                  type: boolean
                  default: auto
                  description: >-
                    Whether to mask the user messages in conversational data or
                    prompts in instruction data.
                  deprecated: true
                training_method:
                  type: object
                  oneOf:
                    - $ref: '#/components/schemas/TrainingMethodSFT'
                    - $ref: '#/components/schemas/TrainingMethodDPO'
                  description: >-
                    The training method to use. 'sft' for Supervised Fine-Tuning
                    or 'dpo' for Direct Preference Optimization.
                training_type:
                  type: object
                  default: null
                  nullable: true
                  anyOf:
                    - $ref: '#/components/schemas/FullTrainingType'
                    - $ref: '#/components/schemas/LoRATrainingType'
                  description: >-
                    The training type to use. If not provided, the job will
                    default to LoRA training type.
                multimodal_params:
                  $ref: '#/components/schemas/MultimodalParams'
                from_checkpoint:
                  type: string
                  description: >-
                    The checkpoint identifier to continue training from a
                    previous fine-tuning job. Format is `{$JOB_ID}` or
                    `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or
                    `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional;
                    without it, the final checkpoint will be used.
                from_hf_model:
                  type: string
                  description: >-
                    The Hugging Face Hub repo to start training from. Should be
                    as close as possible to the base model (specified by the
                    `model` argument) in terms of architecture and size.
                hf_model_revision:
                  type: string
                  description: >-
                    The revision of the Hugging Face Hub model to continue
                    training from. E.g., hf_model_revision=main (default, used
                    if the argument is not provided) or
                    hf_model_revision='607a30d783dfa663caf39e06633721c8d4cfcd7e'
                    (specific commit).
                hf_api_token:
                  type: string
                  description: The API token for the Hugging Face Hub.
                hf_output_repo_name:
                  type: string
                  description: >-
                    The name of the Hugging Face repository to upload the
                    fine-tuned model to.
      responses:
        '200':
          description: Fine-tuning job initiated successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FinetuneResponseTruncated'
    get:
      tags:
        - Fine-tuning
      summary: List all jobs
      description: >-
        List the metadata for all fine-tuning jobs. Returns a list of
        FinetuneResponseTruncated objects.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.fine_tuning.list()

            for fine_tune in response.data:
                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.fine_tuning.list()

            for fine_tune in response.data:
                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.fineTuning.list();

            for (const fineTune of response.data) {
              console.log(fineTune.id, fineTune.status);
            }
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.fineTuning.list();

            for (const fineTune of response.data) {
              console.log(fineTune.id, fineTune.status);
            }
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/fine-tunes" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      responses:
        '200':
          description: List of fine-tune jobs
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FinetuneTruncatedList'
  /fine-tunes/estimate-price:
    post:
      tags:
        - Fine-tuning
      summary: Estimate price
      description: Estimate the price of a fine-tuning job.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - training_file
              properties:
                training_file:
                  type: string
                  description: File-ID of a training file uploaded to the Together API
                validation_file:
                  type: string
                  description: File-ID of a validation file uploaded to the Together API
                model:
                  type: string
                  description: Name of the base model to run fine-tune job on
                n_epochs:
                  type: integer
                  default: 1
                  description: >-
                    Number of complete passes through the training dataset
                    (higher values may improve results but increase cost and
                    risk of overfitting)
                n_evals:
                  type: integer
                  default: 0
                  description: >-
                    Number of evaluations to be run on a given validation set
                    during training
                training_method:
                  type: object
                  oneOf:
                    - $ref: '#/components/schemas/TrainingMethodSFT'
                    - $ref: '#/components/schemas/TrainingMethodDPO'
                  description: >-
                    The training method to use. 'sft' for Supervised Fine-Tuning
                    or 'dpo' for Direct Preference Optimization.
                training_type:
                  type: object
                  default: null
                  nullable: true
                  oneOf:
                    - $ref: '#/components/schemas/FullTrainingType'
                    - $ref: '#/components/schemas/LoRATrainingType'
                  description: >-
                    The training type to use. If not provided, the job will
                    default to LoRA training type.
                from_checkpoint:
                  type: string
                  description: >-
                    The checkpoint identifier to continue training from a
                    previous fine-tuning job. Format is `{$JOB_ID}` or
                    `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or
                    `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional;
                    without it, the final checkpoint will be used.
      responses:
        '200':
          description: Price estimated successfully
          content:
            application/json:
              schema:
                type: object
                properties:
                  estimated_total_price:
                    type: number
                    description: The price of the fine-tuning job
                  allowed_to_proceed:
                    type: boolean
                    description: >-
                      Whether the user is allowed to proceed with the
                      fine-tuning job
                    example: true
                  user_limit:
                    type: number
                    description: The user's credit limit in dollars
                  estimated_train_token_count:
                    type: number
                    description: The estimated number of tokens to be trained
                  estimated_eval_token_count:
                    type: number
                    description: The estimated number of tokens for evaluation
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /fine-tunes/{id}:
    get:
      tags:
        - Fine-tuning
      summary: List job
      description: List the metadata for a single fine-tuning job.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            fine_tune = client.fine_tuning.retrieve(id="ft-id")

            print(fine_tune)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            fine_tune = client.fine_tuning.retrieve(id="ft-id")

            print(fine_tune)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const fineTune = await client.fineTuning.retrieve("ft-id");

            console.log(fineTune);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const fineTune = await client.fineTuning.retrieve("ft-id");

            console.log(fineTune);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/fine-tunes/ft-id" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: The ID of the job to retrieve
            type: string
      responses:
        '200':
          description: Fine-tune job details retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FinetuneResponse'
    delete:
      tags:
        - Fine-tuning
      summary: Delete a fine-tune job
      description: Delete a fine-tuning job.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.fine_tuning.delete(id="ft-id")

            print(response)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.fine_tuning.delete(id="ft-id")

            print(response)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.fineTuning.delete("ft-id");

            console.log(response);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.fineTuning.delete("ft-id");

            console.log(response);
        - lang: Shell
          label: cURL
          source: >
            curl -X "DELETE"
            "https://api.together.ai/v1/fine-tunes/ft-id?force=false" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: The ID of the fine-tune job to delete
            type: string
        - name: force
          deprecated: true
          in: query
          schema:
            description: Deprecated and unused parameter.
            type: boolean
            default: false
      responses:
        '200':
          description: Fine-tune job deleted successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FinetuneDeleteResponse'
        '404':
          description: Fine-tune job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /fine-tunes/{id}/events:
    get:
      tags:
        - Fine-tuning
      summary: List job events
      description: List the events for a single fine-tuning job.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.fine_tuning.list_events(id="ft-id")

            for event in response.data:
                print(event)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            events = client.fine_tuning.list_events(id="ft-id")

            print(events)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const events = await client.fineTuning.listEvents("ft-id");

            console.log(events);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const events = await client.fineTuning.listEvents("ft-id");

            console.log(events);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/fine-tunes/ft-id/events" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: The ID of the fine-tune job to list events for
            type: string
      responses:
        '200':
          description: List of fine-tune events
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FinetuneListEvents'
  /fine-tunes/{id}/checkpoints:
    get:
      tags:
        - Fine-tuning
      summary: List checkpoints
      description: List the checkpoints for a single fine-tuning job.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")

            print(checkpoints)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")

            print(checkpoints)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: >
            import Together from "together-ai";


            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });


            const checkpoints = await
            client.fineTuning.listCheckpoints("ft-id");


            console.log(checkpoints);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: >
            import Together from "together-ai";


            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });


            const checkpoints = await
            client.fineTuning.listCheckpoints("ft-id");


            console.log(checkpoints);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/fine-tunes/ft-id/checkpoints" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: The ID of the fine-tune job to list checkpoints for
            type: string
      responses:
        '200':
          description: List of fine-tune checkpoints
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FinetuneListCheckpoints'
  /finetune/download:
    get:
      tags:
        - Fine-tuning
      summary: Download model
      description: Receive a compressed fine-tuned model or checkpoint.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: >
            # Docs for v1 can be found by changing the above selector ^

            from together import Together

            import os


            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )


            # Using `with_streaming_response` gives you control to do what you
            want with the response.

            stream =
            client.fine_tuning.with_streaming_response.content(ft_id="ft-id")


            with stream as response:
                for line in response.iter_lines():
                    print(line)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            # This will download the content to a location on disk
            response = client.fine_tuning.download(id="ft-id")

            print(response)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.fineTuning.content({
              ft_id: "ft-id",
            });

            console.log(await response.blob());
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.fineTuning.content({
              ft_id: "ft-id",
            });

            console.log(await response.blob());
        - lang: Shell
          label: cURL
          source: >
            curl
            "https://api.together.ai/v1/finetune/download?ft_id=ft-id&checkpoint=merged"
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      parameters:
        - in: query
          name: ft_id
          required: true
          schema:
            description: Fine-tune ID to download. A string that starts with `ft-`.
            type: string
        - in: query
          name: checkpoint_step
          required: false
          schema:
            description: >-
              Specifies step number for checkpoint to download. Ignores
              `checkpoint` value if set.
            type: integer
        - in: query
          name: checkpoint
          schema:
            description: >-
              Specifies checkpoint type to download - `merged` vs `adapter`.
              This field is required if the checkpoint_step is not set.
            type: string
            enum:
              - merged
              - adapter
              - model_output_path
      responses:
        '200':
          description: Successfully downloaded the fine-tuned model or checkpoint.
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
        '400':
          description: Invalid request parameters.
        '404':
          description: Fine-tune ID not found.
  /fine-tunes/{id}/cancel:
    post:
      tags:
        - Fine-tuning
      summary: Cancel job
      description: >-
        Cancel a currently running fine-tuning job. Returns a
        FinetuneResponseTruncated object.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.fine_tuning.cancel(id="ft-id")

            print(response)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.fine_tuning.cancel(id="ft-id")

            print(response)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.fineTuning.cancel("ft-id");

            console.log(response);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.fineTuning.cancel("ft-id");

            console.log(response);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/fine-tunes/ft-id/cancel" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Fine-tune ID to cancel. A string that starts with `ft-`.
            type: string
      responses:
        '200':
          description: Successfully cancelled the fine-tuning job.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FinetuneResponseTruncated'
        '400':
          description: Invalid request parameters.
        '404':
          description: Fine-tune ID not found.
  /fine-tunes/{id}/metrics:
    get:
      tags:
        - Fine-tuning
      summary: Get metrics
      description: >
        Retrieves recorded training metrics for a fine-tuning job in
        chronological order. All filter fields are optional — omit the body or
        send `{}` to retrieve all metrics.
      x-codeSamples:
        - lang: Shell
          label: cURL
          source: |
            curl -X GET "https://api.together.ai/v1/fine-tunes/ft-id/metrics" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "global_step_from": 0,
                   "global_step_to": 500
                 }'
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: Fine-tune job ID. A string that starts with `ft-`.
            type: string
      requestBody:
        required: false
        content:
          application/json:
            schema:
              type: object
              properties:
                global_step_from:
                  type: integer
                  format: int64
                  description: Return only metrics with global_step >= this value.
                  example: 0
                global_step_to:
                  type: integer
                  format: int64
                  description: Return only metrics with global_step <= this value.
                  example: 500
                logged_at_from:
                  type: string
                  format: date-time
                  description: >-
                    Return only metrics logged at or after this ISO-8601
                    timestamp.
                  example: '2024-01-01T00:00:00Z'
                logged_at_to:
                  type: string
                  format: date-time
                  description: >-
                    Return only metrics logged at or before this ISO-8601
                    timestamp.
                  example: '2024-01-01T12:00:00Z'
                resolution:
                  type: integer
                  format: int64
                  description: Number of (uniformly sampled) train metrics to return.
                  example: 100
      responses:
        '200':
          description: List of metrics snapshots in chronological order.
          content:
            application/json:
              schema:
                type: object
                properties:
                  metrics:
                    type: array
                    items:
                      type: object
                      additionalProperties:
                        type: number
                      description: A flat dictionary of scalar metric values.
                example:
                  metrics:
                    - train/loss: 0.5
                      train/learning_rate: 0.0001
                      train/global_step: 7
                    - train/loss: 0.45
                      train/learning_rate: 0.00009
                      train/global_step: 14
        '400':
          description: Invalid request — bad JSON body or missing job ID.
        '404':
          description: Fine-tune job not found.
        '500':
          description: Internal server error — failed to retrieve metrics.
  /fine-tunes/models/supported:
    get:
      tags:
        - Fine-tuning
      summary: List supported models
      description: List models supported for fine-tuning.
      x-codeSamples:
        - lang: Shell
          label: cURL (list all)
          source: |
            curl "https://api.together.ai/v1/fine-tunes/models/supported" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY"
      responses:
        '200':
          description: List of supported models.
          content:
            application/json:
              schema:
                type: object
                required:
                  - models
                properties:
                  models:
                    type: array
                    items:
                      type: string
                    description: List of supported model names.
  /fine-tunes/models/limits:
    get:
      tags:
        - Fine-tuning
      summary: Get model limits
      description: Get model limits for a specific fine-tuning model.
      x-codeSamples:
        - lang: Shell
          label: cURL
          source: >
            curl
            "https://api.together.ai/v1/fine-tunes/models/limits?model_name=meta-llama/Meta-Llama-3.1-8B-Instruct-Reference"
            \
                 -H "Authorization: Bearer $TOGETHER_API_KEY"
      parameters:
        - in: query
          name: model_name
          schema:
            type: string
            description: The model name to get limits for.
          required: true
      responses:
        '200':
          description: Model limits.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FineTuneModelLimits'
        '404':
          description: Model not found or not supported for fine-tuning.
          content:
            application/json:
              schema:
                type: object
                properties:
                  message:
                    type: string
                    description: Error message explaining the model is not available.
  /rerank:
    post:
      tags:
        - Rerank
      summary: Create a rerank request
      description: >-
        Rerank a list of documents by relevance to a query. Returns a relevance
        score and ordering index for each document.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            documents = [
                {
                    "title": "Llama",
                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
                },
                {
                    "title": "Panda",
                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
                },
                {
                    "title": "Guanaco",
                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
                },
                {
                    "title": "Wild Bactrian camel",
                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
                }
            ]

            response = client.rerank.create(
                model="Salesforce/Llama-Rank-v1",
                query="What animals can I find near Peru?",
                documents=documents,
            )

            for result in response.results:
                print(f"Rank: {result.index + 1}")
                print(f"Title: {documents[result.index]['title']}")
                print(f"Text: {documents[result.index]['text']}")
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            documents = [
                {
                    "title": "Llama",
                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
                },
                {
                    "title": "Panda",
                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
                },
                {
                    "title": "Guanaco",
                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
                },
                {
                    "title": "Wild Bactrian camel",
                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
                }
            ]

            response = client.rerank.create(
                model="Salesforce/Llama-Rank-v1",
                query="What animals can I find near Peru?",
                documents=documents,
            )

            for result in response.results:
                print(f"Rank: {result.index + 1}")
                print(f"Title: {documents[result.index]['title']}")
                print(f"Text: {documents[result.index]['text']}")
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const documents = [{
              "title": "Llama",
              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
            },
            {
              "title": "Panda",
              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
            },
            {
              "title": "Guanaco",
              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
            },
            {
              "title": "Wild Bactrian camel",
              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
            }];

            const response = await client.rerank.create({
              model: "Salesforce/Llama-Rank-v1",
              query: "What animals can I find near Peru?",
              documents,
            });

            for (const result of response.results) {
              console.log(`Rank: ${result.index + 1}`);
              console.log(`Title: ${documents[result.index].title}`);
              console.log(`Text: ${documents[result.index].text}`);
            }
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const documents = [{
              "title": "Llama",
              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
            },
            {
              "title": "Panda",
              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
            },
            {
              "title": "Guanaco",
              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
            },
            {
              "title": "Wild Bactrian camel",
              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
            }];

            const response = await client.rerank.create({
              model: "Salesforce/Llama-Rank-v1",
              query: "What animals can I find near Peru?",
              documents,
            });

            for (const result of response.results) {
              console.log(`Rank: ${result.index + 1}`);
              console.log(`Title: ${documents[result.index].title}`);
              console.log(`Text: ${documents[result.index].text}`);
            }
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/rerank" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "model": "Salesforce/Llama-Rank-v1",
                   "query": "What animals can I find near Peru?",
                   "documents": [{
                      "title": "Llama",
                      "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
                    },
                    {
                      "title": "Panda",
                      "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
                    },
                    {
                      "title": "Guanaco",
                      "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
                    },
                    {
                      "title": "Wild Bactrian camel",
                      "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
                    }]
                 }'
      operationId: rerank
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RerankRequest'
      responses:
        '200':
          description: '200'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RerankResponse'
        '400':
          description: BadRequest
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '404':
          description: NotFound
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '429':
          description: RateLimit
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '503':
          description: Overloaded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '504':
          description: Timeout
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      deprecated: false
  /audio/speech:
    post:
      tags:
        - Audio
      summary: Create audio generation request
      description: Generate audio from input text
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.audio.speech.with_streaming_response.create(
                model="cartesia/sonic-2",
                input="The quick brown fox jumps over the lazy dog.",
                voice="laidback woman",
            )

            with response as stream:
              stream.stream_to_file("audio.wav")
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.audio.speech.create(
                model="cartesia/sonic-2",
                input="The quick brown fox jumps over the lazy dog.",
                voice="laidback woman",
            )

            response.stream_to_file("audio.wav")
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            import { createWriteStream } from "fs";
            import { join } from "path";
            import { pipeline } from "stream/promises";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.audio.speech.create({
              model: "cartesia/sonic-2",
              input: "The quick brown fox jumps over the lazy dog.",
              voice: "laidback woman",
            });

            const filepath = join(process.cwd(), "audio.wav");
            const writeStream = createWriteStream(filepath);

            if (response.body) {
              await pipeline(response.body, writeStream);
            }
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            import { createWriteStream } from "fs";
            import { join } from "path";
            import { pipeline } from "stream/promises";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.audio.speech.create({
              model: "cartesia/sonic-2",
              input: "The quick brown fox jumps over the lazy dog.",
              voice: "laidback woman",
            });

            const filepath = join(process.cwd(), "audio.wav");
            const writeStream = createWriteStream(filepath);

            if (response.body) {
              await pipeline(response.body, writeStream);
            }
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/audio/speech" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "model": "cartesia/sonic-2",
                   "input": "The quick brown fox jumps over the lazy dog.",
                   "voice": "laidback woman"
                 }' \
                 --output audio.wav
      operationId: audio-speech
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AudioSpeechRequest'
      responses:
        '200':
          description: OK
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
            audio/wav:
              schema:
                type: string
                format: binary
            audio/mpeg:
              schema:
                type: string
                format: binary
            text/event-stream:
              schema:
                $ref: '#/components/schemas/AudioSpeechStreamResponse'
        '400':
          description: BadRequest
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '429':
          description: RateLimit
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /audio/speech/websocket:
    get:
      tags:
        - Audio
      summary: Real-time text-to-speech via WebSocket
      description: >
        Establishes a WebSocket connection for real-time text-to-speech
        generation. This endpoint uses WebSocket protocol
        (wss://api.together.ai/v1/audio/speech/websocket) for bidirectional
        streaming communication.


        **Connection Setup:**

        - Protocol: WebSocket (wss://)

        - Authentication: Pass API key as Bearer token in Authorization header

        - Parameters: Sent as query parameters (model, voice,
        max_partial_length, language)


        **Client Events:**

        - `tts_session.updated`: Update session parameters like voice. The
        `session` object also accepts an `extra_params` field for additional
        model-specific parameters that fine-tune speech generation behavior,
        such as `pronunciation_dict` (a list of pronunciation rules for specific
        characters or symbols, where each entry uses the format
        `"<source>/<replacement>"` (e.g., `["omg/oh my god"]`) to override how
        the model pronounces matching tokens).
          ```json
          {
            "type": "tts_session.updated",
            "session": {
              "voice": "tara",
              "extra_params": {
                "pronunciation_dict": ["omg/oh my god"]
              }
            }
          }
          ```
        - `input_text_buffer.append`: Send text chunks for TTS generation
          ```json
          {
            "type": "input_text_buffer.append",
            "text": "Hello, this is a test."
          }
          ```
        - `input_text_buffer.clear`: Clear the buffered text
          ```json
          {
            "type": "input_text_buffer.clear"
          }
          ```
        - `input_text_buffer.commit`: Signal end of text input and process
        remaining text
          ```json
          {
            "type": "input_text_buffer.commit"
          }
          ```

        **Server Events:**

        - `session.created`: Initial session confirmation (sent first)
          ```json
          {
            "event_id": "evt_123456",
            "type": "session.created",
            "session": {
              "id": "session-id",
              "object": "realtime.tts.session",
              "modalities": ["text", "audio"],
              "model": "hexgrad/Kokoro-82M",
              "voice": "tara"
            }
          }
          ```
        - `conversation.item.input_text.received`: Acknowledgment that text was
        received
          ```json
          {
            "type": "conversation.item.input_text.received",
            "text": "Hello, this is a test."
          }
          ```
        - `conversation.item.audio_output.delta`: Audio chunks as base64-encoded
        data
          ```json
          {
            "type": "conversation.item.audio_output.delta",
            "item_id": "tts_1",
            "delta": "<base64_encoded_audio_chunk>"
          }
          ```
        - `conversation.item.audio_output.done`: Audio generation complete for
        an item
          ```json
          {
            "type": "conversation.item.audio_output.done",
            "item_id": "tts_1"
          }
          ```
        - `conversation.item.tts.failed`: Error occurred
          ```json
          {
            "type": "conversation.item.tts.failed",
            "error": {
              "message": "Error description",
              "type": "invalid_request_error",
              "param": null,
              "code": "invalid_api_key"
            }
          }
          ```

        **Text Processing:**

        - Partial text (no sentence ending) is held in buffer until:
          - We believe that the text is complete enough to be processed for TTS generation
          - The partial text exceeds `max_partial_length` characters (default: 250)
          - The `input_text_buffer.commit` event is received

        **Audio Format:**

        - Format: Raw PCM (s16le, mono)

        - Sample Rate: 24000 Hz

        - Encoding: Base64 (per delta event)

        - Delivered via `conversation.item.audio_output.delta` events


        **Error Codes:**

        - `invalid_api_key`: Invalid API key provided (401)

        - `missing_api_key`: Authorization header missing (401)

        - `model_not_available`: Invalid or unavailable model (400)

        - Invalid text format errors (400)
      operationId: realtime-tts
      x-codeSamples:
        - lang: Python
          label: Python WebSocket Client
          source: |
            import asyncio
            import websockets
            import json
            import base64
            import os

            async def generate_speech():
                api_key = os.environ.get("TOGETHER_API_KEY")
                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=af_heart"

                headers = {
                    "Authorization": f"Bearer {api_key}"
                }

                async with websockets.connect(url, additional_headers=headers) as ws:
                    # Wait for session created
                    session_msg = await ws.recv()
                    session_data = json.loads(session_msg)
                    if session_data.get("type") != "session.created":
                        print(f"Failed to start session: {session_data}")
                        return
                    print(f"Session created: {session_data['session']['id']}")

                    # Send text for TTS
                    text_chunks = [
                        "Hello, this is a test.",
                        "This is the second sentence.",
                        "And this is the final one."
                    ]

                    async def send_text():
                        for chunk in text_chunks:
                            await ws.send(json.dumps({
                                "type": "input_text_buffer.append",
                                "text": chunk
                            }))
                            await asyncio.sleep(0.5)  # Simulate typing

                        # Commit to process any remaining text
                        await ws.send(json.dumps({
                            "type": "input_text_buffer.commit"
                        }))

                    async def receive_audio():
                        audio_data = bytearray()
                        async for message in ws:
                            data = json.loads(message)

                            if data["type"] == "conversation.item.input_text.received":
                                print(f"Text received: {data['text']}")
                            elif data["type"] == "conversation.item.audio_output.delta":
                                # Decode base64 audio chunk
                                audio_chunk = base64.b64decode(data['delta'])
                                audio_data.extend(audio_chunk)
                                print(f"Received audio chunk for item {data['item_id']}")
                            elif data["type"] == "conversation.item.audio_output.done":
                                print(f"Audio generation complete for item {data['item_id']}")
                            elif data["type"] == "conversation.item.tts.failed":
                                error = data.get("error", {})
                                print(f"Error: {error.get('message')}")
                                break

                        # Save the raw PCM samples to a file
                        with open("output.pcm", "wb") as f:
                            f.write(audio_data)
                        print("Audio saved to output.pcm")

                    # Run send and receive concurrently
                    await asyncio.gather(send_text(), receive_audio())

            asyncio.run(generate_speech())
        - lang: JavaScript
          label: Node.js WebSocket Client
          source: >
            import WebSocket from 'ws';

            import fs from 'fs';


            const apiKey = process.env.TOGETHER_API_KEY;

            const url =
            'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=af_heart';


            const ws = new WebSocket(url, {
              headers: {
                'Authorization': `Bearer ${apiKey}`
              }
            });


            const audioData = [];


            ws.on('open', () => {
              console.log('WebSocket connection established!');
            });


            ws.on('message', (data) => {
              const message = JSON.parse(data.toString());

              if (message.type === 'session.created') {
                console.log(`Session created: ${message.session.id}`);

                // Send text chunks
                const textChunks = [
                  "Hello, this is a test.",
                  "This is the second sentence.",
                  "And this is the final one."
                ];

                textChunks.forEach((text, index) => {
                  setTimeout(() => {
                    ws.send(JSON.stringify({
                      type: 'input_text_buffer.append',
                      text: text
                    }));
                  }, index * 500);
                });

                // Commit after all chunks
                setTimeout(() => {
                  ws.send(JSON.stringify({
                    type: 'input_text_buffer.commit'
                  }));
                }, textChunks.length * 500 + 100);

              } else if (message.type === 'conversation.item.input_text.received') {
                console.log(`Text received: ${message.text}`);
              } else if (message.type === 'conversation.item.audio_output.delta') {
                // Decode base64 audio chunk
                const audioChunk = Buffer.from(message.delta, 'base64');
                audioData.push(audioChunk);
                console.log(`Received audio chunk for item ${message.item_id}`);
              } else if (message.type === 'conversation.item.audio_output.done') {
                console.log(`Audio generation complete for item ${message.item_id}`);
              } else if (message.type === 'conversation.item.tts.failed') {
                const errorMessage = message.error?.message ?? 'Unknown error';
                console.error(`Error: ${errorMessage}`);
                ws.close();
              }
            });


            ws.on('close', () => {
              // Save the raw PCM samples to a file
              if (audioData.length > 0) {
                const completeAudio = Buffer.concat(audioData);
                fs.writeFileSync('output.pcm', completeAudio);
                console.log('Audio saved to output.pcm');
              }
            });


            ws.on('error', (error) => {
              console.error('WebSocket error:', error);
            });
      parameters:
        - in: query
          name: model
          required: false
          schema:
            description: >-
              The TTS model to use for speech generation. Can also be set via
              `tts_session.updated` event.
            type: string
            enum:
              - hexgrad/Kokoro-82M
              - cartesia/sonic-english
            default: hexgrad/Kokoro-82M
        - in: query
          name: voice
          required: false
          schema:
            type: string
            description: >
              The voice to use for speech generation. Default is 'tara'.

              Available voices vary by model. Can also be updated via
              `tts_session.updated` event.
        - in: query
          name: max_partial_length
          required: false
          schema:
            type: integer
            default: 250
            description: >
              Maximum number of characters in partial text before forcing TTS
              generation

              even without a sentence ending. Helps reduce latency for long text
              without punctuation.
        - in: query
          name: language
          required: false
          schema:
            type: string
            default: en
            example: en
            description: >
              Language or locale of input text. Accepts ISO 639-1 language codes
              (e.g., `en`, `fr`, `es`, `zh`) as well as locale codes for
              region-specific variants. Locale codes must be lowercase (e.g.,
              `zh-hk` for Cantonese). Can also be set via `tts_session.updated`
              event.
      responses:
        '101':
          description: |
            Switching Protocols - WebSocket connection established successfully.

            Error message format:
            ```json
            {
              "type": "conversation.item.tts.failed",
              "error": {
                "message": "Error description",
                "type": "invalid_request_error",
                "param": null,
                "code": "error_code"
              }
            }
            ```
  /audio/transcriptions:
    post:
      tags:
        - Audio
      summary: Create audio transcription request
      description: Transcribes audio into text
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            file = open("audio.wav", "rb")

            response = client.audio.transcriptions.create(
                model="openai/whisper-large-v3",
                file=file,
            )

            print(response.text)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            file = open("audio.wav", "rb")

            response = client.audio.transcriptions.create(
                model="openai/whisper-large-v3",
                file=file,
            )

            print(response.text)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: >
            import Together from "together-ai";

            import { readFileSync } from "fs";

            import { join } from "path";


            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });


            const audioFilePath = join(process.cwd(), "audio.wav");

            const audioBuffer = readFileSync(audioFilePath);

            const audioFile = new File([audioBuffer], "audio.wav", { type:
            "audio/wav" });


            const response = await client.audio.transcriptions.create({
              model: "openai/whisper-large-v3",
              file: audioFile,
            });


            console.log(response.text);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: >
            import Together from "together-ai";

            import { readFileSync } from "fs";

            import { join } from "path";


            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });


            const audioFilePath = join(process.cwd(), "audio.wav");

            const audioBuffer = readFileSync(audioFilePath);

            const audioFile = new File([audioBuffer], "audio.wav", { type:
            "audio/wav" });


            const response = await client.audio.transcriptions.create({
              model: "openai/whisper-large-v3",
              file: audioFile,
            });


            console.log(response.text);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/audio/transcriptions" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -F "file=@audio.wav" \
                 -F "model=openai/whisper-large-v3"
      operationId: audio-transcriptions
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/AudioTranscriptionRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AudioTranscriptionResponse'
        '400':
          description: BadRequest
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '429':
          description: RateLimit
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /audio/translations:
    post:
      tags:
        - Audio
      summary: Create audio translation request
      description: Translates audio into English
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            file = open("audio.wav", "rb")

            response = client.audio.translations.create(
                model="openai/whisper-large-v3",
                file=file,
                language="es",
            )

            print(response.text)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            file = open("audio.wav", "rb")

            response = client.audio.translations.create(
                model="openai/whisper-large-v3",
                file=file,
                language="es",
            )

            print(response.text)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: >
            import Together from "together-ai";

            import { readFileSync } from "fs";

            import { join } from "path";


            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });


            const audioFilePath = join(process.cwd(), "audio.wav");

            const audioBuffer = readFileSync(audioFilePath);

            const audioFile = new File([audioBuffer], "audio.wav", { type:
            "audio/wav" });


            const response = await client.audio.translations.create({
              model: "openai/whisper-large-v3",
              file: audioFile,
              language: "es"
            });


            console.log(response.text);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: >
            import Together from "together-ai";

            import { readFileSync } from "fs";

            import { join } from "path";


            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });


            const audioFilePath = join(process.cwd(), "audio.wav");

            const audioBuffer = readFileSync(audioFilePath);

            const audioFile = new File([audioBuffer], "audio.wav", { type:
            "audio/wav" });


            const response = await client.audio.translations.create({
              model: "openai/whisper-large-v3",
              file: audioFile,
              language: "es"
            });


            console.log(response.text);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/audio/translations" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -F "file=@audio.wav" \
                 -F "model=openai/whisper-large-v3" \
                 -F "language=es"
      operationId: audio-translations
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/AudioTranslationRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AudioTranslationResponse'
        '400':
          description: BadRequest
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '429':
          description: RateLimit
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /compute/clusters:
    get:
      tags:
        - GPUClusterService
      summary: List all GPU clusters.
      description: List all GPU clusters.
      operationId: GPUClusterService_List
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GPUClusters'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.beta.clusters.list()

            print(response.clusters)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.beta.clusters.list();

            console.log(response.clusters);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.beta.clusters.list();

            console.log(response.clusters);
    post:
      tags:
        - GPUClusterService
      summary: Create GPU Cluster
      description: >
        Create an Instant Cluster on Together's high-performance GPU clusters.

        With features like on-demand scaling, long-lived resizable
        high-bandwidth shared DC-local storage,

        Kubernetes and Slurm cluster flavors, a REST API, and Terraform support,

        you can run workloads flexibly without complex infrastructure
        management.
      operationId: GPUClusterService_Create
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GPUClusterCreateRequest'
        required: true
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GPUClusterInfo'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together

            client = Together()

            response = client.beta.clusters.create(
              cluster_name="my-gpu-cluster",
              region="us-central-8",
              gpu_type="H100_SXM",
              num_gpus=8,
              driver_version="CUDA_12_6_560",
              billint_type="ON_DEMAND",
            )

            print(response.cluster_id)
        - lang: TypeScript
          label: Together AI SDK (v2)
          source: |
            import Together from "together-ai";

            const client = new Together();

            const response = await client.beta.clusters.create({
              cluster_name: "my-gpu-cluster",
              region: "us-central-8",
              gpu_type: "H100_SXM",
              num_gpus: 8,
              driver_version: "CUDA_12_6_560",
              billint_type: "ON_DEMAND",
            });

            console.log(response.cluster_id)
        - lang: JavaScript
          label: Together AI SDK (v2)
          source: |
            import Together from "together-ai";

            const client = new Together();

            const response = await client.beta.clusters.create({
              cluster_name: "my-gpu-cluster",
              region: "us-central-8",
              gpu_type: "H100_SXM",
              num_gpus: 8,
              driver_version: "CUDA_12_6_560",
              billint_type: "ON_DEMAND",
            });

            console.log(response.cluster_id)
        - lang: Shell
          label: CLI
          source: |
            curl -X POST \
                  -H "Authorization Bearer $TOGETHER_API_KEY" \
                  --data '{ "region": "us-west-2", "gpu_type": "H100_SXM", "num_gpus": 8, "cluster_name": "my-gpu-cluster", "driver_version": "CUDA_12_6_560" }' \
                  https://api.together.ai/v1/compute/clusters
  /compute/clusters/{cluster_id}:
    get:
      tags:
        - GPUClusterService
      summary: Get GPU cluster by cluster ID
      description: Retrieve information about a specific GPU cluster.
      operationId: GPUClusterService_Get
      parameters:
        - name: cluster_id
          in: path
          required: true
          schema:
            description: The ID of the cluster to retrieve
            type: string
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GPUClusterInfo'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            cluster = client.beta.clusters.retrieve("cluster_id")
            print(cluster)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const cluster = await client.beta.clusters.retrieve("cluster_id");
            console.log(cluster);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const cluster = await client.beta.clusters.retrieve("cluster_id");
            console.log(cluster);
    put:
      tags:
        - GPUClusterService
      summary: Update a GPU Cluster.
      description: Update the configuration of an existing GPU cluster.
      operationId: GPUClusterService_Update
      parameters:
        - name: cluster_id
          in: path
          required: true
          schema:
            description: The ID of the cluster to update
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GPUClusterUpdateRequest'
        required: true
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GPUClusterInfo'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: >
            from together import Together

            client = Together()


            cluster = client.beta.clusters.update("cluster_id",
            cluster_type="KUBERNETES", num_gpus=24)

            print(cluster)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const cluster = await client.beta.clusters.update({
              cluster_id: "cluster_id",
              cluster_type: "kubernetes",
              num_gpus: 24,
            })
            console.log(cluster)
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const cluster = await client.beta.clusters.update({
              cluster_id: "cluster_id",
              cluster_type: "kubernetes",
              num_gpus: 24,
            })
            console.log(cluster)
        - lang: Shell
          label: cURL
          source: |
            curl -X PUT \
                  -H "Authorization Bearer $TOGETHER_API_KEY" \
                  --data '{ "cluster_id": "cluster id", "cluster_type": "kubernetes", "num_gpus": 24 }' \
                  https://api.together.ai/v1/compute/clusters
    delete:
      tags:
        - GPUClusterService
      summary: Delete GPU cluster by cluster ID
      description: Delete a GPU cluster by cluster ID.
      operationId: GPUClusterService_Delete
      parameters:
        - name: cluster_id
          in: path
          required: true
          schema:
            description: The ID of the cluster to delete
            type: string
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                type: object
                required:
                  - cluster_id
                properties:
                  cluster_id:
                    type: string
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            cluster = client.beta.clusters.delete("cluster_id")
            print(cluster)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const cluster = await client.beta.clusters.delete("cluster_id");
            console.log(cluster);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const cluster = await client.beta.clusters.delete("cluster_id");
            console.log(cluster);
        - lang: Shell
          label: cURL
          source: |
            curl -X DELETE \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/compute/clusters/${CLUSTER_ID}
  /compute/regions:
    get:
      tags:
        - RegionService
      summary: List regions and corresponding supported driver versions
      operationId: RegionService_List
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RegionListResponse'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            regions = client.beta.clusters.list_regions()
            print(regions)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const regions = await client.beta.clusters.list_regions();
            console.log(regions);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const regions = await client.beta.clusters.list_regions();
            console.log(regions);
        - lang: Shell
          label: cURL
          source: |
            curl -X GET \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/compute/regions
  /compute/clusters/storage/volumes:
    get:
      tags:
        - SharedVolumeService
      summary: List all shared volumes.
      description: List all shared volumes.
      operationId: SharedVolumeService_List
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GPUClustersSharedVolumes'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            volumes = client.beta.clusters.storage.list()
            print(volumes)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const volumes = await client.beta.clusters.storage.list();
            console.log(volumes);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const volumes = await client.beta.clusters.storage.list();
            console.log(volumes);
        - lang: Shell
          label: cURL
          source: |
            curl -X GET \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/compute/clusters/storages
    put:
      tags:
        - SharedVolumeService
      summary: Update a shared volume.
      description: |
        Update the configuration of an existing shared volume.
      operationId: SharedVolumeService_Update
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GPUClustersSharedVolumeUpdateRequest'
        required: true
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GPUClustersSharedVolume'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            volume = client.beta.clusters.storage.update(
              volume_id="12345-67890-12345-67890",
              size_tib=3
            )
            print(volume)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const volume = await client.beta.clusters.storage.update({
              volume_id: "12345-67890-12345-67890",
              size_tib: 3
            });
            console.log(volume);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const volume = await client.beta.clusters.storage.update({
              volume_id: "12345-67890-12345-67890",
              size_tib: 3
            });
            console.log(volume);
        - lang: Shell
          label: cURL
          source: |
            curl -X PUT \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  --data '{ "volume_id": "12345-67890-12345-67890", "size_tib": 3}' \
                  https://api.together.ai/v1/compute/clusters/storage/volumes
    post:
      tags:
        - SharedVolumeService
      summary: Create a shared volume.
      description: >
        Instant Clusters supports long-lived, resizable in-DC shared storage
        with user data persistence.

        You can dynamically create and attach volumes to your cluster at cluster
        creation time, and resize as your data grows.

        All shared storage is backed by multi-NIC bare metal paths, ensuring
        high-throughput and low-latency performance for shared storage.
      operationId: SharedVolumeService_Create
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GPUClustersSharedVolumeCreateRequest'
        required: true
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GPUClustersSharedVolume'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            volume = client.beta.clusters.storage.create(
              volume_name="my-shared-volume",
              size_tib=2,
              region="us-west-2"
            )
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const volume = await client.beta.clusters.storage.create({
              volume_name: "my-shared-volume",
              size_tib: 2,
              region: "us-west-2"
            });
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";
            const client = new Together();

            const volume = await client.beta.clusters.storage.create({
              volume_name: "my-shared-volume",
              size_tib: 2,
              region: "us-west-2"
            });
        - lang: Shell
          label: cURL
          source: |
            curl -X POST \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  --data '{ "volume_name": "my-shared-volume", "size_tib": 2, "region": "us-west-2" }' \
                  https://api.together.ai/v1/compute/clusters/storage/volumes
  /compute/clusters/storage/volumes/{volume_id}:
    get:
      tags:
        - SharedVolumeService
      summary: Get shared volume by volume Id.
      description: Retrieve information about a specific shared volume.
      operationId: SharedVolumeService_Get
      parameters:
        - name: volume_id
          in: path
          required: true
          schema:
            description: The ID of the volume to retrieve
            type: string
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GPUClustersSharedVolume'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            volume = client.beta.clusters.storage.retrieve("volume_id")
            print(volume)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            const volume = await
            client.beta.clusters.storage.retrieve("volume_id");

            console.log(volume);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            const volume = await
            client.beta.clusters.storage.retrieve("volume_id");

            console.log(volume);
        - lang: Shell
          label: cURL
          source: |
            curl -X GET \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/compute/clusters/storage/volumes/${VOLUME_ID}
    delete:
      tags:
        - SharedVolumeService
      summary: Delete shared volume by volume id.
      description: >
        Delete a shared volume. Note that if this volume is attached to a
        cluster, deleting will fail.
      operationId: SharedVolumeService_Delete
      parameters:
        - name: volume_id
          in: path
          required: true
          schema:
            description: The ID of the volume to delete
            type: string
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GPUClustersSharedVolumeDeleteResponse'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            client = Together()

            volume = client.beta.clusters.storage.delete("volume_id")
            print(volume)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            const volume = await
            client.beta.clusters.storage.delete("volume_id");

            console.log(volume);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: >
            import Together from "together-ai";

            const client = new Together();


            const volume = await
            client.beta.clusters.storage.delete("volume_id");

            console.log(volume);
        - lang: Shell
          label: cURL
          source: |
            curl -X DELETE \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  https://api.together.ai/v1/compute/clusters/storage/volumes/${VOLUME_ID}
  /clusters/availability-zones:
    get:
      tags:
        - endpoints
      summary: List all available availability zones.
      description: List all available availability zones.
      operationId: availabilityZones
      responses:
        '200':
          description: Success
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListAvailibilityZonesResponse'
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.endpoints.list_avzones()

            print(response.avzones)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.endpoints.listAvzones();

            console.log(response.avzones);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.endpoints.listAvzones();

            console.log(response.avzones);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/clusters/availability-zones" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
  /endpoints:
    get:
      tags:
        - Endpoints
      summary: List all endpoints, can be filtered by type
      description: >-
        Returns a list of all endpoints associated with your account. You can
        filter the results by type (dedicated or serverless).
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.endpoints.list()

            for endpoint in response.data:
                print(endpoint.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            endpoints = client.endpoints.list()

            for endpoint in endpoints:
                print(endpoint.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const endpoints = await client.endpoints.list();

            for (const endpoint of endpoints.data) {
              console.log(endpoint);
            }
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const endpoints = await client.endpoints.list();

            for (const endpoint of endpoints.data) {
              console.log(endpoint);
            }
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/endpoints" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      operationId: listEndpoints
      parameters:
        - name: type
          in: query
          required: false
          schema:
            description: Filter endpoints by type
            type: string
            enum:
              - dedicated
              - serverless
        - name: usage_type
          in: query
          required: false
          schema:
            type: string
            enum:
              - on-demand
              - reserved
            description: Filter endpoints by usage type
        - name: mine
          in: query
          required: false
          schema:
            type: boolean
            description: If true, return only endpoints owned by the caller
      responses:
        '200':
          description: '200'
          content:
            application/json:
              schema:
                type: object
                required:
                  - object
                  - data
                properties:
                  object:
                    description: The object type, which is always `list`.
                    const: list
                  data:
                    type: array
                    items:
                      $ref: '#/components/schemas/ListEndpoint'
                example:
                  object: list
                  data:
                    - object: endpoint
                      id: endpoint-5c0c20db-62fe-4f41-8ffc-d9e4ea1a264e
                      name: allenai/OLMo-7B
                      model: allenai/OLMo-7B
                      type: serverless
                      owner: together
                      state: STARTED
                      created_at: '2024-02-28T21:34:35.444Z'
        '403':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Internal error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
    post:
      tags:
        - Endpoints
      summary: Create a dedicated endpoint, it will start automatically
      description: >-
        Creates a new dedicated endpoint for serving models. The endpoint will
        automatically start after creation. You can deploy any supported model
        on hardware configurations that meet the model's requirements.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            endpoint = client.endpoints.create(
                model="Qwen/Qwen3.5-9B-FP8",
                hardware="1x_nvidia_a100_80gb_sxm",
                autoscaling={
                  "min_replicas": 2,
                  "max_replicas": 5,
                }
            )

            print(endpoint.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            endpoint = client.endpoints.create(
                model="Qwen/Qwen3.5-9B-FP8",
                hardware="1x_nvidia_a100_80gb_sxm",
                min_replicas=2,
                max_replicas=5,
            )

            print(endpoint.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const endpoint = await client.endpoints.create({
              model: "Qwen/Qwen3.5-9B-FP8",
              hardware: "1x_nvidia_a100_80gb_sxm",
              autoscaling: {
                max_replicas: 5,
                min_replicas: 2,
              }
            });

            console.log(endpoint.id);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const endpoint = await client.endpoints.create({
              model: "Qwen/Qwen3.5-9B-FP8",
              hardware: "1x_nvidia_a100_80gb_sxm",
              autoscaling: {
                max_replicas: 5,
                min_replicas: 2,
              }
            });

            console.log(endpoint.id);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/endpoints" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "model": "Qwen/Qwen3.5-9B-FP8",
                   "hardware": "1x_nvidia_a100_80gb_sxm",
                   "autoscaling": {
                     "max_replicas": 5,
                     "min_replicas": 2
                   }
                 }'
      operationId: createEndpoint
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateEndpointRequest'
      responses:
        '200':
          description: '200'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DedicatedEndpoint'
        '403':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Internal error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /endpoints/{endpointId}:
    get:
      tags:
        - Endpoints
      summary: Get endpoint by ID
      description: >-
        Retrieves details about a specific endpoint, including its current
        state, configuration, and scaling settings.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            endpoint = client.endpoints.retrieve("endpoint-id")

            print(endpoint.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            endpoint = client.endpoints.get("endpoint-id")

            print(endpoint.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const endpoint = await client.endpoints.retrieve("endpoint-id");

            console.log(endpoint);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const endpoint = await client.endpoints.retrieve("endpoint-id");

            console.log(endpoint);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/endpoints/endpoint-id" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      operationId: getEndpoint
      parameters:
        - name: endpointId
          in: path
          required: true
          schema:
            type: string
            description: The ID of the endpoint to retrieve
            example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
      responses:
        '200':
          description: '200'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DedicatedEndpoint'
        '403':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '404':
          description: Not Found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Internal error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
    patch:
      tags:
        - Endpoints
      summary: >-
        Update endpoint, this can also be used to start or stop a dedicated
        endpoint
      description: >-
        Updates an existing endpoint's configuration. You can modify the display
        name, autoscaling settings, or change the endpoint's state (start/stop).
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            endpoint = client.endpoints.update(
                "endpoint-id",
                state="STOPPED"
            )

            print(endpoint)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            endpoint = client.endpoints.update(
                endpoint_id="endpoint-id",
                state="STOPPED"
            )

            print(endpoint)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const endpoint = await client.endpoints.update("endpoint-id", {
              state: "STOPPED"
            });

            console.log(endpoint);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const endpoint = await client.endpoints.update("endpoint-id", {
              state: "STOPPED"
            });

            console.log(endpoint);
        - lang: Shell
          label: cURL
          source: |
            curl -X PATCH "https://api.together.ai/v1/endpoints/endpoint-id" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "state": "STOPPED"
                 }'
      operationId: updateEndpoint
      parameters:
        - name: endpointId
          in: path
          required: true
          schema:
            type: string
            description: The ID of the endpoint to update
            example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                display_name:
                  type: string
                  description: A human-readable name for the endpoint
                  example: My Llama3 70b endpoint
                state:
                  type: string
                  description: The desired state of the endpoint
                  enum:
                    - STARTED
                    - STOPPED
                  example: STARTED
                autoscaling:
                  $ref: '#/components/schemas/Autoscaling'
                  description: New autoscaling configuration for the endpoint
                inactive_timeout:
                  type: integer
                  description: >-
                    The number of minutes of inactivity after which the endpoint
                    will be automatically stopped. Set to 0 to disable automatic
                    timeout.
                  nullable: true
                  example: 60
      responses:
        '200':
          description: '200'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DedicatedEndpoint'
        '403':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '404':
          description: Not Found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Internal error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
    delete:
      tags:
        - Endpoints
      summary: Delete endpoint
      description: Permanently deletes an endpoint. This action cannot be undone.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            endpoint = client.endpoints.delete("endpoint-id")

            print(endpoint)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            endpoint = client.endpoints.delete(
                endpoint_id="endpoint-id",
            )

            print(endpoint)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const endpoint = await client.endpoints.delete("endpoint-id");

            console.log(endpoint);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const endpoint = await client.endpoints.delete("endpoint-id");

            console.log(endpoint);
        - lang: Shell
          label: cURL
          source: >
            curl -X "DELETE" "https://api.together.ai/v1/endpoints/endpoint-id"
            \
                 -H "Authorization: Bearer $TOGETHER_API_KEY"
      operationId: deleteEndpoint
      parameters:
        - name: endpointId
          in: path
          required: true
          schema:
            type: string
            description: The ID of the endpoint to delete
            example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
      responses:
        '204':
          description: No Content - Endpoint successfully deleted
        '403':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '404':
          description: Not Found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Internal error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /hardware:
    get:
      tags:
        - Hardware
      summary: List available hardware configurations
      description: >
        Returns a list of available hardware configurations for deploying
        models. When a model parameter is provided, it returns only hardware
        configurations compatible with that model, including their current
        availability status.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.endpoints.list_hardware()

            for hardware in response.data:
                print(hardware.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.endpoints.list_hardware()

            for hardware in response:
                print(hardware.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const hardware = await client.endpoints.list_hardware();

            console.log(hardware);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const hardware = await client.endpoints.list_hardware();

            console.log(hardware);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/hardware" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      operationId: listHardware
      parameters:
        - name: model
          in: query
          required: false
          schema:
            type: string
            description: >
              Filter hardware configurations by model compatibility. When
              provided,

              the response includes availability status for each compatible
              configuration.

              [See all of Together AI's dedicated
              models](https://docs.together.ai/docs/dedicated-models)
            example: deepseek-ai/DeepSeek-R1
      responses:
        '200':
          description: List of available hardware configurations
          content:
            application/json:
              schema:
                type: object
                required:
                  - object
                  - data
                properties:
                  object:
                    description: The object type, which is always `list`.
                    const: list
                  data:
                    type: array
                    items:
                      $ref: '#/components/schemas/HardwareWithStatus'
        '403':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Internal error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /tci/execute:
    post:
      tags:
        - Code Interpreter
      callbacks: {}
      description: >
        Executes the given code snippet and returns the output. Without a
        session_id, a new session will be created to run the code. If you do
        pass in a valid session_id, the code will be run in that session. This
        is useful for running multiple code snippets in the same environment,
        because dependencies and similar things are persisted

        between calls to the same session.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.code_interpreter.execute(
                code="print('Hello world!')",
                language="python",
            )

            print(response.data.outputs[0].data);
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.code_interpreter.run(
                code="print('Hello world!')",
                language="python",
            )

            print(response.data.outputs[0].data);
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.codeInterpreter.execute({
              code: "print('Hello world!')",
              language: "python"
            });

            console.log(response.data?.outputs?.[0]?.data);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.codeInterpreter.execute({
              code: "print('Hello world!')",
              language: "python"
            });

            console.log(response.data?.outputs?.[0]?.data);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/tci/execute" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "code": "print(\'Hello world!\')",
                   "language": "python"
                 }'
      operationId: tci/execute
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ExecuteRequest'
        description: Execute Request
        required: false
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExecuteResponse'
          description: Execute Response
  /tci/sessions:
    get:
      tags:
        - Code Interpreter
      callbacks: {}
      description: |
        Lists all your currently active sessions.
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.code_interpreter.sessions.list()

            for session in response.data.sessions:
                print(session.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            # together v1 does not support this method
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.codeInterpreter.sessions.list();

            for (const session of response.data?.sessions) {
              console.log(session.id);
            }
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.codeInterpreter.sessions.list();

            for (const session of response.data?.sessions) {
              console.log(session.id);
            }
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/tci/sessions" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      operationId: sessions/list
      parameters: []
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SessionListResponse'
          description: List Response
  /batches:
    get:
      tags:
        - Batches
      summary: List batch jobs
      description: List all batch jobs for the authenticated user
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            batches = client.batches.list()

            for batch in batches:
                print(batch.id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            batches = client.batches.list_batches()

            for batch in batches:
                print(batch.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const batches = await client.batches.list();

            console.log(batches);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const batches = await client.batches.list();

            console.log(batches);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/batches" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      security:
        - bearerAuth: []
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/BatchJob'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
    post:
      tags:
        - Batches
      summary: Create a batch job
      description: Create a new batch job with the given input file and endpoint
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: >
            # Docs for v1 can be found by changing the above selector ^

            from together import Together

            import os


            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )


            batch = client.batches.create(input_file_id="file_id",
            endpoint="/v1/chat/completions")


            print(batch.job)
        - lang: Python
          label: Together AI SDK (v1)
          source: >
            from together import Together

            import os


            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )


            batch = client.batches.create_batch("file_id",
            endpoint="/v1/chat/completions")


            print(batch.id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const batch = await client.batches.create({
              endpoint: "/v1/chat/completions",
              input_file_id: "file-id",
            });

            console.log(batch);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const batch = await client.batches.create({
              endpoint: "/v1/chat/completions",
              input_file_id: "file-id",
            });

            console.log(batch);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/batches" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json" \
                 -d '{
                   "endpoint": "/v1/chat/completions",
                   "input_file_id": "file-id"
                 }'
      security:
        - bearerAuth: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateBatchRequest'
      responses:
        '201':
          description: Job created (potentially with warnings)
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchJobWithWarning'
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '429':
          description: Too Many Requests
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
  /batches/{id}:
    get:
      tags:
        - Batches
      summary: Get a batch job
      description: Get details of a batch job by ID
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            batch = client.batches.retrieve("batch_id")

            print(batch)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            batch = client.batches.get_batch("batch_id")

            print(batch)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const batch = await client.batches.retrieve("batch-id");

            console.log(batch);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const batch = await client.batches.retrieve("batch-id");

            console.log(batch);
        - lang: Shell
          label: cURL
          source: |
            curl "https://api.together.ai/v1/batches/ID" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      security:
        - bearerAuth: []
      parameters:
        - name: id
          in: path
          required: true
          schema:
            type: string
            description: The ID of the batch job to retrieve
            example: batch_job_abc123def456
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchJob'
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '403':
          description: Forbidden
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '404':
          description: Not Found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
  /batches/{id}/cancel:
    post:
      tags:
        - Batches
      summary: Cancel a batch job
      description: Cancel a batch job by ID
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            batch = client.batches.cancel("batch_id")

            print(batch)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            batch = client.batches.cancel("batch_id")

            print(batch)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const batch = await client.batches.cancel("batch-id");

            console.log(batch);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const batch = await client.batches.cancel("batch-id");

            console.log(batch);
        - lang: Shell
          label: cURL
          source: |
            curl -X POST "https://api.together.ai/v1/batches/ID/cancel" \
                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
                 -H "Content-Type: application/json"
      security:
        - bearerAuth: []
      parameters:
        - name: id
          in: path
          required: true
          schema:
            type: string
            description: The ID of the batch job to cancel
            example: batch_job_abc123def456
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchJob'
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '403':
          description: Forbidden
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '404':
          description: Not Found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BatchErrorResponse'
  /evaluation:
    post:
      tags:
        - evaluation
      summary: Create an evaluation job
      operationId: createEvaluationJob
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.evals.create(
                type="classify",
                parameters=ParametersEvaluationClassifyParameters(
                    judge=ParametersEvaluationClassifyParametersJudge(
                        model="openai/gpt-oss-120b",
                        model_source="serverless",
                        system_template="You are an expert evaluator...",
                    ),
                    input_data_file_path="file-abc123",
                    labels=["good", "bad"],
                    pass_labels=["good"],
                    model_to_evaluate="Qwen/Qwen3.5-9B"
                )
            )

            print(response.workflow_id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.evaluation.create(
                type="classify",
                judge_model_name="openai/gpt-oss-120b",
                judge_system_template="You are an expert evaluator...",
                input_data_file_path="file-abc123",
                labels=["good", "bad"],
                pass_labels=["good"],
                model_to_evaluate="Qwen/Qwen3.5-9B"
            )

            print(response.workflow_id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.evals.create({
              type: 'classify',
              parameters: {
                judge: {
                  model: 'openai/gpt-oss-120b',
                  model_source: 'serverless',
                  system_template: 'You are an expert evaluator...',
                },
                input_data_file_path: 'file-abc123',
                labels: ['good', 'bad'],
                pass_labels: ['good'],
                model_to_evaluate: 'Qwen/Qwen3.5-9B',
              },
            });

            console.log(response.workflow_id);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.evals.create({
              type: 'classify',
              parameters: {
                judge: {
                  model: 'openai/gpt-oss-120b',
                  model_source: 'serverless',
                  system_template: 'You are an expert evaluator...',
                },
                input_data_file_path: 'file-abc123',
                labels: ['good', 'bad'],
                pass_labels: ['good'],
                model_to_evaluate: 'Qwen/Qwen3.5-9B',
              },
            });

            console.log(response.workflow_id);
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EvaluationTypedRequest'
      responses:
        '200':
          description: Evaluation job created successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EvaluationResponse'
        '400':
          description: Invalid request format
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Failed to create evaluation job
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
    get:
      tags:
        - evaluation
      summary: Get all evaluation jobs
      operationId: getAllEvaluationJobs
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.evals.list()

            for job in response:
                print(job.workflow_id)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            jobs = client.evaluation.list()

            for job in jobs:
                print(job.workflow_id)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.evals.list();

            for (const job of response) {
              console.log(job.workflow_id);
            }
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.evals.list();

            for (const job of response) {
              console.log(job.workflow_id);
            }
      parameters:
        - name: status
          in: query
          required: false
          schema:
            type: string
            description: Filter evaluation jobs by status
        - name: limit
          in: query
          required: false
          schema:
            type: integer
            default: 10
            description: Limit the number of results
      responses:
        '200':
          description: evaluation jobs retrieved successfully
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/EvaluationJob'
        '400':
          description: Invalid request format
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Error retrieving jobs from manager
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /evaluation/model-list:
    get:
      tags:
        - evaluation
      summary: Get model list
      operationId: getModelList
      parameters:
        - name: model_source
          in: query
          required: false
          schema:
            type: string
            description: Filter models by source
            default: all
      responses:
        '200':
          description: Model list retrieved successfully
          content:
            application/json:
              schema:
                type: object
                properties:
                  model_list:
                    type: array
                    items:
                      type: string
                      description: The name of the model
        '400':
          description: Invalid request format
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Error retrieving model list
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /evaluation/{id}:
    get:
      tags:
        - evaluation
      summary: Get evaluation job details
      operationId: getEvaluationJobDetails
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.evals.retrieve('eval_id')

            print(response)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.evaluation.retrieve('eval_id')

            print(response)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.evals.retrieve('eval_id');

            console.log(response);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.evals.retrieve('eval_id');

            console.log(response);
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: The ID of the evaluation job to retrieve
            type: string
      responses:
        '200':
          description: Evaluation job details retrieved successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EvaluationJob'
        '404':
          description: Evaluation job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Failed to get evaluation job
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /evaluation/{id}/status:
    get:
      tags:
        - evaluation
      summary: Get evaluation job status and results
      operationId: getEvaluationJobStatusAndResults
      x-codeSamples:
        - lang: Python
          label: Together AI SDK (v2)
          source: |
            # Docs for v1 can be found by changing the above selector ^
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.evals.status('eval_id')

            print(response.status)
            print(response.results)
        - lang: Python
          label: Together AI SDK (v1)
          source: |
            from together import Together
            import os

            client = Together(
                api_key=os.environ.get("TOGETHER_API_KEY"),
            )

            response = client.evaluation.status('eval_id')

            print(response.status)
            print(response.results)
        - lang: TypeScript
          label: Together AI SDK (TypeScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.evals.status('eval_id');

            console.log(response.status);
            console.log(response.results);
        - lang: JavaScript
          label: Together AI SDK (JavaScript)
          source: |
            import Together from "together-ai";

            const client = new Together({
              apiKey: process.env.TOGETHER_API_KEY,
            });

            const response = await client.evals.status('eval_id');

            console.log(response.status);
            console.log(response.results);
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: The ID of the evaluation job to get the status of
            type: string
      responses:
        '200':
          description: Evaluation job status and results retrieved successfully
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                    description: The status of the evaluation job
                    enum:
                      - completed
                      - error
                      - user_error
                      - running
                      - queued
                      - pending
                  results:
                    description: The results of the evaluation job
                    oneOf:
                      - $ref: '#/components/schemas/EvaluationClassifyResults'
                      - $ref: '#/components/schemas/EvaluationScoreResults'
                      - $ref: '#/components/schemas/EvaluationCompareResults'
        '404':
          description: Evaluation job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
        '500':
          description: Failed to get evaluation job
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /realtime:
    get:
      tags:
        - Audio
      summary: Real-time audio transcription via WebSocket
      description: >
        Establishes a WebSocket connection for real-time audio transcription.
        This endpoint uses WebSocket protocol
        (wss://api.together.ai/v1/realtime) for bidirectional streaming
        communication.


        **Connection Setup:**

        - Protocol: WebSocket (wss://)

        - Authentication: Pass API key as Bearer token in Authorization header

        - Parameters: Sent as query parameters (model, input_audio_format)


        **Client Events:**

        - `input_audio_buffer.append`: Send audio chunks as base64-encoded data
          ```json
          {
            "type": "input_audio_buffer.append",
            "audio": "<base64_encoded_audio_chunk>"
          }
          ```
        - `input_audio_buffer.commit`: Signal end of audio stream. When VAD is
        enabled, the server automatically detects speech boundaries and emits
        `completed` events. When VAD is disabled, you must send `commit` to
        trigger transcription of the buffered audio.
          ```json
          {
            "type": "input_audio_buffer.commit"
          }
          ```
        - `transcription_session.updated`: Update session configuration,
        including Voice Activity Detection (VAD) parameters. Send this after
        receiving `session.created`. Can also be sent at any time during the
        session to change VAD settings.
          ```json
          {
            "type": "transcription_session.updated",
            "session": {
              "turn_detection": {
                "type": "server_vad",
                "threshold": 0.3,
                "min_silence_duration_ms": 500,
                "min_speech_duration_ms": 250,
                "max_speech_duration_s": 5.0,
                "speech_pad_ms": 250
              }
            }
          }
          ```
          To disable VAD entirely (manual commit mode), set `turn_detection` to `null`:
          ```json
          {
            "type": "transcription_session.updated",
            "session": {
              "turn_detection": null
            }
          }
          ```

        **Voice Activity Detection (VAD)**


        VAD controls how the server automatically detects speech segments in the
        audio stream. When enabled (the default), the server uses Silero VAD to
        identify speech regions and emits transcription events as each segment
        completes. When disabled, you must manually call
        `input_audio_buffer.commit` to trigger transcription.


        VAD can be configured in two ways:

        1. **Query parameters** at connection time:
        `turn_detection=server_vad&threshold=0.3&min_silence_duration_ms=500`

        2. **Session message** after connection: Send
        `transcription_session.updated` with a `turn_detection` object (see
        above)


        To disable VAD at connection time, use `turn_detection=none` as a query
        parameter.


        **VAD Parameters:**


        All parameters are optional — omitted fields use their defaults.


        | Parameter | Type | Default | Description |

        |-----------|------|---------|-------------|

        | `type` | string | `server_vad` | VAD mode. Use `server_vad` to enable,
        or set `turn_detection` to `null` to disable. |

        | `threshold` | float | `0.3` | Speech probability threshold (0.0–1.0).
        Audio frames with probability above this value are classified as speech.
        Lower values detect more speech but may increase false positives. For
        low-SNR audio (e.g., 8kHz phone calls), values of 0.01–0.2 may work
        better. |

        | `min_silence_duration_ms` | int | `500` | Minimum silence duration in
        milliseconds before ending a speech segment. Higher values merge nearby
        speech bursts into single segments. For phone calls with mid-sentence
        pauses, 2000–5000ms prevents over-segmentation. |

        | `min_speech_duration_ms` | int | `250` | Minimum speech segment
        duration in milliseconds. Segments shorter than this are discarded.
        Filters out brief noise bursts or clicks. |

        | `max_speech_duration_s` | float | `5.0` | Maximum speech segment
        duration in seconds. Segments longer than this are force-split at the
        longest internal silence gap. Useful for continuous speech without
        natural pauses. |

        | `speech_pad_ms` | int | `250` | Padding in milliseconds added to the
        start and end of each detected segment. Prevents clipping speech edges.
        When padding would cause adjacent segments to overlap, the gap is split
        at the midpoint instead. |


        **Server Events:**

        - `session.created`: Initial session confirmation (sent first)
          ```json
          {
            "type": "session.created",
            "session": {
              "id": "session-id",
              "object": "realtime.session",
              "modalities": ["audio"],
              "model": "openai/whisper-large-v3"
            }
          }
          ```
        - `transcription_session.updated`: Confirms session configuration was
        applied. Sent in response to a client `transcription_session.updated`
        message.
          ```json
          {
            "type": "transcription_session.updated",
            "session": {
              "turn_detection": {
                "type": "server_vad",
                "threshold": 0.3,
                "min_silence_duration_ms": 500,
                "min_speech_duration_ms": 250,
                "max_speech_duration_s": 5.0,
                "speech_pad_ms": 250
              }
            }
          }
          ```
        - `conversation.item.input_audio_transcription.delta`: Partial
        transcription results
          ```json
          {
            "type": "conversation.item.input_audio_transcription.delta",
            "delta": "The quick brown"
          }
          ```
        - `conversation.item.input_audio_transcription.completed`: Final
        transcription
          ```json
          {
            "type": "conversation.item.input_audio_transcription.completed",
            "transcript": "The quick brown fox jumps over the lazy dog"
          }
          ```
        - `conversation.item.input_audio_transcription.failed`: Error occurred
          ```json
          {
            "type": "conversation.item.input_audio_transcription.failed",
            "error": {
              "message": "Error description",
              "type": "invalid_request_error",
              "param": null,
              "code": "invalid_api_key"
            }
          }
          ```

        **Error Codes:**

        - `invalid_api_key`: Invalid API key provided (401)

        - `missing_api_key`: Authorization header missing (401)

        - `model_not_available`: Invalid or unavailable model (400)

        - Unsupported audio format errors (400)
      operationId: realtime-transcription
      x-codeSamples:
        - lang: Python
          label: Python WebSocket Client
          source: |
            import asyncio
            import websockets
            import json
            import base64
            import os

            async def transcribe_audio():
                api_key = os.environ.get("TOGETHER_API_KEY")
                url = "wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000"

                headers = {
                    "Authorization": f"Bearer {api_key}"
                }

                async with websockets.connect(url, additional_headers=headers) as ws:
                    # Read audio file
                    with open("audio.wav", "rb") as f:
                        audio_data = f.read()

                    # Send audio in chunks with delay to simulate real-time
                    chunk_size = 8192
                    bytes_per_second = 16000 * 2  # 16kHz * 2 bytes (16-bit)
                    delay_per_chunk = chunk_size / bytes_per_second

                    for i in range(0, len(audio_data), chunk_size):
                        chunk = audio_data[i:i+chunk_size]
                        base64_chunk = base64.b64encode(chunk).decode('utf-8')
                        await ws.send(json.dumps({
                            "type": "input_audio_buffer.append",
                            "audio": base64_chunk
                        }))
                        # Simulate real-time streaming
                        if i + chunk_size < len(audio_data):
                            await asyncio.sleep(delay_per_chunk)

                    # Commit the audio buffer
                    await ws.send(json.dumps({
                        "type": "input_audio_buffer.commit"
                    }))

                    # Receive transcription results
                    async for message in ws:
                        data = json.loads(message)
                        if data["type"] == "conversation.item.input_audio_transcription.delta":
                            print(f"Partial: {data['delta']}")
                        elif data["type"] == "conversation.item.input_audio_transcription.completed":
                            print(f"Final: {data['transcript']}")
                            break
                        elif data["type"] == "conversation.item.input_audio_transcription.failed":
                            error = data.get("error", {})
                            print(f"Error: {error.get('message')}")
                            break

            asyncio.run(transcribe_audio())
        - lang: JavaScript
          label: Node.js WebSocket Client
          source: >
            import WebSocket from 'ws';

            import fs from 'fs';


            const apiKey = process.env.TOGETHER_API_KEY;

            const url =
            'wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000';


            const ws = new WebSocket(url, {
              headers: {
                'Authorization': `Bearer ${apiKey}`
              }
            });


            ws.on('open', async () => {
              console.log('WebSocket connection established!');

              // Read audio file
              const audioData = fs.readFileSync('audio.wav');

              // Send audio in chunks with delay to simulate real-time
              const chunkSize = 8192;
              const bytesPerSecond = 16000 * 2;  // 16kHz * 2 bytes (16-bit)
              const delayPerChunk = (chunkSize / bytesPerSecond) * 1000;  // Convert to ms

              for (let i = 0; i < audioData.length; i += chunkSize) {
                const chunk = audioData.slice(i, i + chunkSize);
                const base64Chunk = chunk.toString('base64');
                ws.send(JSON.stringify({
                  type: 'input_audio_buffer.append',
                  audio: base64Chunk
                }));

                // Simulate real-time streaming
                if (i + chunkSize < audioData.length) {
                  await new Promise(resolve => setTimeout(resolve, delayPerChunk));
                }
              }

              // Commit audio buffer
              ws.send(JSON.stringify({
                type: 'input_audio_buffer.commit'
              }));
            });


            ws.on('message', (data) => {
              const message = JSON.parse(data.toString());

              if (message.type === 'conversation.item.input_audio_transcription.delta') {
                console.log(`Partial: ${message.delta}`);
              } else if (message.type === 'conversation.item.input_audio_transcription.completed') {
                console.log(`Final: ${message.transcript}`);
                ws.close();
              } else if (message.type === 'conversation.item.input_audio_transcription.failed') {
                const errorMessage = message.error?.message ?? message.message ?? 'Unknown error';
                console.error(`Error: ${errorMessage}`);
                ws.close();
              }
            });


            ws.on('error', (error) => {
              console.error('WebSocket error:', error);
            });
      parameters:
        - in: query
          name: model
          required: true
          schema:
            type: string
            description: The Whisper model to use for transcription
        - in: query
          name: input_audio_format
          required: true
          schema:
            type: string
            enum:
              - pcm_s16le_16000
            default: pcm_s16le_16000
          description: >-
            Audio format specification. Currently supports 16-bit PCM at 16kHz
            sample rate.
      responses:
        '101':
          description: |
            Switching Protocols - WebSocket connection established successfully.

            Error message format:
            ```json
            {
              "type": "conversation.item.input_audio_transcription.failed",
              "error": {
                "message": "Error description",
                "type": "invalid_request_error",
                "param": null,
                "code": "error_code"
              }
            }
            ```
  /queue/cancel:
    post:
      description: |
        Cancel a pending job. Only jobs in pending status can be canceled.
        Running jobs cannot be stopped. Returns the job status after the
        attempt. If the job is not pending, returns 409 with the current status
        unchanged.
      operationId: cancelQueueJob
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QueueCancelRequest'
        description: Cancel request
        required: true
      responses:
        '200':
          description: Successfully canceled
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueCancelResponse'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
        '404':
          description: Request not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
        '409':
          description: Job could not be canceled (already completed/failed)
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueCancelResponse'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
      summary: Cancel a queued job
      tags:
        - Queue
  /queue/metrics:
    get:
      description: >-
        Get the current queue statistics for a model, including pending and
        running job counts.
      operationId: getQueueMetrics
      parameters:
        - name: model
          in: query
          required: true
          schema:
            description: Model name to get metrics for
            type: string
      responses:
        '200':
          description: Queue metrics
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueMetricsResponse'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
      summary: Get queue metrics
      tags:
        - Queue
  /queue/status:
    get:
      description: >-
        Poll the current status of a previously submitted job. Provide the
        request_id and model as query parameters.
      operationId: getQueueJobStatus
      parameters:
        - name: request_id
          in: query
          required: true
          schema:
            description: Request ID returned from the submit endpoint
            type: string
        - name: model
          in: query
          required: true
          schema:
            description: Model name the job was submitted to
            type: string
      responses:
        '200':
          description: Status information
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueJobStatusResponse'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
        '404':
          description: Request not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
      summary: Get job status
      tags:
        - Queue
  /queue/submit:
    post:
      description: |
        Submit a new job to the queue for asynchronous processing. Jobs are
        processed in strict priority order (higher priority first, FIFO within
        the same priority). Returns a request ID that can be used to poll status
        or cancel the job.
      operationId: submitQueueJob
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QueueJobRequest'
        description: Job request
        required: true
      responses:
        '200':
          description: Successfully queued request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueJobResponse'
        '400':
          description: Invalid request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueueError'
      summary: Submit a queued job
      tags:
        - Queue
  /rl/training-sessions:
    get:
      summary: List training sessions
      description: Lists all training sessions.
      operationId: listTrainingSessions
      tags:
        - RL
      responses:
        '200':
          description: List of training sessions
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.TrainingSessionsListResponse'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: status
          in: query
          required: false
          schema:
            description: Status of the training sessions to list
            $ref: '#/components/schemas/RL.TrainingSessionStatus'
        - name: limit
          in: query
          required: false
          schema:
            description: Maximum number of sessions to return (1-100)
            type: integer
            format: int32
            default: '20'
        - name: after
          in: query
          required: false
          schema:
            description: >-
              Cursor for pagination (ID of the last session from the previous
              page)
            type: string
    post:
      summary: Create training session
      description: Creates a training session and returns its details.
      operationId: startTrainingSession
      tags:
        - RL
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RL.StartTrainingSessionRequest'
        required: true
      responses:
        '200':
          description: Training session details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.TrainingSession'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
  /rl/training-sessions/{session_id}:
    get:
      summary: Get training session
      description: Gets a training session by its ID and returns its details.
      operationId: getTrainingSession
      tags:
        - RL
      responses:
        '200':
          description: Training session details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.TrainingSession'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: ID of the training session
            type: string
  /rl/training-sessions/{session_id}/stop:
    post:
      summary: Stop training session
      description: Stops a training session.
      operationId: stopTrainingSession
      tags:
        - RL
      responses:
        '200':
          description: Training session details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.TrainingSession'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: ID of the training session
            type: string
  /rl/training-sessions/{session_id}/operations/forward-backward/{operation_id}:
    get:
      summary: Get forward-backward operation
      description: Retrieves the current status and result of a forward-backward operation.
      operationId: getForwardBackwardOperation
      tags:
        - RL
      responses:
        '200':
          description: Forward-backward operation details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.ForwardBackwardOperation'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: Training session ID
            type: string
        - name: operation_id
          in: path
          required: true
          schema:
            description: Operation ID
            type: string
  /rl/training-sessions/{session_id}/operations/optim-step/{operation_id}:
    get:
      summary: Get optim-step operation
      description: Retrieves the current status and result of an optim-step operation.
      operationId: getOptimStepOperation
      tags:
        - RL
      responses:
        '200':
          description: ''
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.OptimStepOperation'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: Training session ID
            type: string
        - name: operation_id
          in: path
          required: true
          schema:
            description: Operation ID
            type: string
  /rl/training-sessions/{session_id}/operations/sample/{operation_id}:
    get:
      summary: Get sample operation
      description: Retrieves the current status and result of a sample operation.
      operationId: GetSample
      tags:
        - RL
      responses:
        '200':
          description: ''
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.SampleOperation'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: Training session ID
            type: string
        - name: operation_id
          in: path
          required: true
          schema:
            description: Operation ID
            type: string
  /rl/training-sessions/{session_id}/operations/forward-backward:
    post:
      summary: Forward-backward pass
      description: >-
        Submits a forward-backward pass operation that will asynchronously
        compute gradients via backpropagation.
      operationId: forwardBackward
      tags:
        - RL
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RL.ForwardBackwardBody'
        required: true
      responses:
        '200':
          description: ''
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.ForwardBackwardOperation'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: Training session ID
            type: string
  /rl/training-sessions/{session_id}/operations/optim-step:
    post:
      summary: Optimizer step
      description: >-
        Submits an optimizer step operation that will asynchronously apply
        accumulated gradients to update model parameters.
      operationId: OptimStep
      tags:
        - RL
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RL.OptimStepBody'
        required: true
      responses:
        '200':
          description: Optimizer step operation details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.OptimStepOperation'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: Training session ID
            type: string
  /rl/training-sessions/{session_id}/operations/sample:
    post:
      summary: Sample
      description: >-
        Submits a sample operation that will asynchronously generate text
        completions with logprobs.
      operationId: Sample
      tags:
        - RL
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RL.SampleBody'
        required: true
      responses:
        '200':
          description: ''
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.SampleOperation'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: Training session ID
            type: string
  /rl/training-sessions/{session_id}/operations/inference-checkpoint:
    post:
      summary: Create inference checkpoint
      description: >-
        Submits an operation that will asynchronously save the current LoRA
        adapter as an inference checkpoint and upload it to object storage.
      operationId: createInferenceCheckpoint
      tags:
        - RL
      responses:
        '200':
          description: Inference checkpoint operation details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.InferenceCheckpointOperation'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: Training session ID
            type: string
  /rl/training-sessions/{session_id}/operations/inference-checkpoint/{operation_id}:
    get:
      summary: Get inference checkpoint operation
      description: >-
        Retrieves the current status and result of an inference checkpoint
        operation.
      operationId: getInferenceCheckpointOperation
      tags:
        - RL
      responses:
        '200':
          description: Inference checkpoint operation details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.InferenceCheckpointOperation'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: Training session ID
            type: string
        - name: operation_id
          in: path
          required: true
          schema:
            description: Operation ID
            type: string
  /rl/training-sessions/{session_id}/operations/training-checkpoint:
    post:
      summary: Save training checkpoint
      description: >-
        Submits an operation that will asynchronously save the full training
        state (adapter + optimizer + step).
      operationId: createTrainingCheckpoint
      tags:
        - RL
      responses:
        '200':
          description: Save training checkpoint operation details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.TrainingCheckpointOperation'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: Training session ID
            type: string
  /rl/training-sessions/{session_id}/operations/training-checkpoint/{operation_id}:
    get:
      summary: Get save training checkpoint operation
      description: >-
        Retrieves the current status and result of a save training checkpoint
        operation.
      operationId: getTrainingCheckpointOperation
      tags:
        - RL
      responses:
        '200':
          description: Save training checkpoint operation details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.TrainingCheckpointOperation'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: session_id
          in: path
          required: true
          schema:
            description: Training session ID
            type: string
        - name: operation_id
          in: path
          required: true
          schema:
            description: Operation ID
            type: string
  /rl/checkpoints/{id}/download:
    get:
      summary: Download checkpoint
      description: >-
        Returns presigned URLs for downloading a checkpoint's model files. Only
        inference checkpoints support downloading.
      operationId: downloadCheckpoint
      tags:
        - RL
      responses:
        '200':
          description: Checkpoint download URLs
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RL.CheckpointDownloadResponse'
        default:
          description: An unexpected error response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorData'
      parameters:
        - name: id
          in: path
          required: true
          schema:
            description: ID of the checkpoint
            type: string
        - name: variant
          in: query
          required: true
          schema:
            description: >-
              Checkpoint variant to download: merged (full model) or adapter
              (LoRA weights only)
            $ref: '#/components/schemas/RL.CheckpointVariant'
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      x-bearer-format: bearer
      x-default: default
  schemas:
    RL.OptimStepBody:
      type: object
      properties:
        learning_rate:
          description: Learning rate for this step.
          type: number
          format: float
          example: 0.0001
          default: '0.0001'
        adamw_params:
          $ref: '#/components/schemas/RL.AdamWOptimizerParams'
    RL.SampleBody:
      type: object
      required:
        - prompts
      properties:
        prompts:
          description: Input prompts as tokenized chunks
          type: array
          items:
            type: object
            $ref: '#/components/schemas/RL.ModelInput'
        sampling_params:
          $ref: '#/components/schemas/RL.SamplingParams'
          description: Optional sampling parameters
        num_samples:
          type: integer
          format: int64
          example: 1
          default: '1'
          description: Number of completions to generate per prompt
    RL.ForwardBackwardBody:
      type: object
      required:
        - samples
        - loss
      properties:
        samples:
          description: Batch of training samples to process
          type: array
          items:
            type: object
            $ref: '#/components/schemas/RL.TrainingSample'
        loss:
          description: Loss function configuration
          $ref: '#/components/schemas/RL.LossConfig'
    RL.ModelInput:
      type: object
      required:
        - chunks
      properties:
        chunks:
          description: Input chunks for the model
          type: array
          items:
            type: object
            $ref: '#/components/schemas/RL.InputChunk'
    RL.SamplingParams:
      type: object
      properties:
        max_tokens:
          type: integer
          format: int32
          example: 100
          default: '100'
          description: Maximum number of tokens to generate per completion
        temperature:
          type: number
          example: 1
          format: float
          default: '1.0'
          description: Sampling temperature
        top_p:
          type: number
          example: 1
          format: float
          default: '1.0'
          description: Nucleus sampling probability threshold
        top_k:
          type: integer
          format: int32
          example: -1
          default: '-1'
          description: Top-k sampling limit
        stop:
          type: array
          example:
            - |+

            - END
          items:
            type: string
          description: Generation stops when any of these strings is produced
        seed:
          oneOf:
            - type: string
            - type: integer
          example: '42'
          description: Random seed for reproducibility
    RL.InputChunk:
      type: object
      properties:
        encoded_text:
          $ref: '#/components/schemas/RL.EncodedText'
    RL.ListMeta:
      type: object
      description: Pagination metadata for list responses
      properties:
        limit:
          type: integer
          format: int32
          example: 20
          description: Maximum number of items returned per page
        has_more:
          type: boolean
          example: true
          description: Whether more items exist beyond this page
        next_cursor:
          type: string
          example: 123e4567-e89b-12d3-a456-426614174000
          description: >-
            Cursor to use as the 'after' parameter for the next page. Empty when
            has_more is false.
    RL.EncodedText:
      type: object
      required:
        - tokens
      properties:
        tokens:
          description: Pre-tokenized text input
          type: array
          example:
            - 123
            - 456
            - 789
          items:
            oneOf:
              - type: string
              - type: integer
    RL.DType:
      type: string
      enum:
        - D_TYPE_UNSPECIFIED
        - D_TYPE_INT64
        - D_TYPE_FLOAT32
        - D_TYPE_BFLOAT16
      default: D_TYPE_UNSPECIFIED
    RL.LossType:
      type: string
      enum:
        - LOSS_TYPE_UNSPECIFIED
        - LOSS_TYPE_CROSS_ENTROPY
        - LOSS_TYPE_GRPO
      default: LOSS_TYPE_UNSPECIFIED
    RL.CrossEntropyLossParams:
      type: object
      description: Cross-entropy loss parameters (currently empty).
      properties: {}
    RL.GRPOLossParams:
      type: object
      properties:
        clip_low:
          type: number
          format: float
          example: 0.2
          default: '0.2'
          description: Lower clip bound for importance ratio
        clip_high:
          type: number
          format: float
          example: 0.28
          default: '0.28'
          description: Upper clip bound for importance ratio
        beta:
          type: number
          format: float
          example: 0.1
          default: '0.0'
          description: KL penalty coefficient
        agg_type:
          $ref: '#/components/schemas/RL.GRPOLossAggregationType'
          example: GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON
          default: GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON
          description: Aggregation type for loss computation
    RL.GRPOLossAggregationType:
      type: string
      enum:
        - GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
        - GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON
        - GRPO_LOSS_AGGREGATION_TYPE_TOKEN_MEAN
        - GRPO_LOSS_AGGREGATION_TYPE_SEQUENCE_MEAN
      default: GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
    RL.LossConfig:
      type: object
      required:
        - type
      properties:
        type:
          $ref: '#/components/schemas/RL.LossType'
          description: Type of loss function to use
          example: LOSS_TYPE_GRPO
        cross_entropy_params:
          $ref: '#/components/schemas/RL.CrossEntropyLossParams'
        grpo_params:
          $ref: '#/components/schemas/RL.GRPOLossParams'
    RL.LossTargetTokens:
      type: object
      required:
        - data
      properties:
        data:
          description: Integer array of target tokens
          type: array
          example:
            - 123
            - 456
            - 789
          items:
            oneOf:
              - type: string
              - type: integer
        dtype:
          description: Data type of the integer array
          $ref: '#/components/schemas/RL.DType'
          example: D_TYPE_INT64
    RL.LossMask:
      type: object
      required:
        - data
      description: Per-token loss mask (1=compute loss, 0=ignore)
      properties:
        data:
          description: Integer array of per-token mask values (0s and 1s)
          type: array
          example:
            - 0
            - 0
            - 1
          items:
            oneOf:
              - type: string
              - type: integer
        dtype:
          description: Data type of the integer array (must be D_TYPE_INT64)
          $ref: '#/components/schemas/RL.DType'
          example: D_TYPE_INT64
    RL.LossAdvantages:
      type: object
      required:
        - data
      properties:
        data:
          description: Float array of per-token advantages
          type: array
          example:
            - 0.5
            - 0.5
          items:
            type: number
            format: float
        dtype:
          description: Data type of the float array (D_TYPE_FLOAT32 or D_TYPE_BFLOAT16)
          $ref: '#/components/schemas/RL.DType'
          example: D_TYPE_FLOAT32
    RL.LossLogprobs:
      type: object
      required:
        - data
      properties:
        data:
          description: Float array of per-token log probabilities
          type: array
          example:
            - -1.2
            - -0.8
          items:
            type: number
            format: float
        dtype:
          description: Data type of the float array (D_TYPE_FLOAT32 or D_TYPE_BFLOAT16)
          $ref: '#/components/schemas/RL.DType'
          example: D_TYPE_FLOAT32
    RL.GRPOLossInputs:
      type: object
      required:
        - advantages
        - generator_logprobs
      properties:
        advantages:
          $ref: '#/components/schemas/RL.LossAdvantages'
          description: Per-token advantages for GRPO
        generator_logprobs:
          $ref: '#/components/schemas/RL.LossLogprobs'
          description: Generator log probabilities for GRPO
        reference_logprobs:
          $ref: '#/components/schemas/RL.LossLogprobs'
          description: Reference model log probabilities (required if beta > 0)
    RL.LossInputs:
      type: object
      properties:
        target_tokens:
          $ref: '#/components/schemas/RL.LossTargetTokens'
          description: Target tokens for loss computation
        loss_mask:
          $ref: '#/components/schemas/RL.LossMask'
          description: Per-token loss mask (1=compute loss, 0=ignore)
        grpo_inputs:
          $ref: '#/components/schemas/RL.GRPOLossInputs'
      required:
        - target_tokens
    RL.TrainingSample:
      type: object
      required:
        - model_input
        - loss_inputs
      properties:
        model_input:
          description: Model input
          $ref: '#/components/schemas/RL.ModelInput'
        loss_inputs:
          description: Loss function inputs
          $ref: '#/components/schemas/RL.LossInputs'
    RL.OptimStepOperation:
      type: object
      description: Async optimizer step operation
      required:
        - id
        - status
      properties:
        id:
          type: string
          example: 550e8400-e29b-41d4-a716-446655440000
          description: Operation ID
        status:
          $ref: '#/components/schemas/RL.TrainingOperationStatus'
          description: Operation status
        output:
          $ref: '#/components/schemas/RL.OptimStepResult'
          description: Result on success
        error:
          $ref: '#/components/schemas/RL.TrainingOperationError'
          description: Error details on failure
    RL.SampleOperation:
      type: object
      description: Async sample operation
      required:
        - id
        - status
      properties:
        id:
          type: string
          example: 550e8400-e29b-41d4-a716-446655440000
          description: Operation ID
        status:
          $ref: '#/components/schemas/RL.TrainingOperationStatus'
          example: TRAINING_OPERATION_STATUS_PENDING
          description: Operation status
        output:
          $ref: '#/components/schemas/RL.SampleResult'
          description: Result on success
        error:
          $ref: '#/components/schemas/RL.TrainingOperationError'
          description: Error details on failure
    RL.OptimStepResult:
      type: object
      description: Result of an optimizer step operation
      required:
        - step
      properties:
        step:
          description: Step number
          oneOf:
            - type: string
            - type: integer
          example: '100'
    RL.SampleResult:
      type: object
      description: Result of a sample operation
      required:
        - rollouts
      properties:
        rollouts:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/RL.SampleRollout'
          description: Completions grouped by prompt
    RL.SampleRollout:
      type: object
      description: Completions generated for a single prompt
      required:
        - sequences
      properties:
        sequences:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/RL.SampleSequence'
          description: Completions generated for one prompt
    RL.SampleSequence:
      type: object
      description: A single generated completion sequence with tokens and logprobs
      required:
        - tokens
      properties:
        tokens:
          type: array
          example:
            - '123'
            - '456'
            - '789'
          items:
            oneOf:
              - type: string
              - type: integer
          description: Generated token IDs
        logprobs:
          type: array
          example:
            - -0.5
            - -1.2
            - -0.3
          items:
            type: number
            format: double
          description: Log probabilities for each generated token
        stop_reason:
          type: string
          example: length
          description: Reason for stopping generation
    RL.ForwardBackwardOperation:
      type: object
      description: Async forward-backward pass operation
      required:
        - id
        - status
      properties:
        id:
          type: string
          example: 550e8400-e29b-41d4-a716-446655440000
          description: Operation ID
        status:
          $ref: '#/components/schemas/RL.TrainingOperationStatus'
          example: TRAINING_OPERATION_STATUS_PENDING
          description: Operation status
        output:
          $ref: '#/components/schemas/RL.ForwardBackwardResult'
          description: Result on success
        error:
          $ref: '#/components/schemas/RL.TrainingOperationError'
          description: Error details on failure
    RL.ForwardBackwardResult:
      type: object
      description: Result of a forward-backward pass operation
      required:
        - loss
      properties:
        loss:
          type: number
          format: double
          example: 2.345
          description: Loss value
        metrics:
          type: object
          description: Loss-specific metrics (e.g., KL divergence, clip fraction for GRPO)
          example:
            loss/clip/high_fraction: 0.1
            loss/kl_ref/mean: 0.05
          additionalProperties:
            type: number
            format: double
    RL.TrainingOperationError:
      type: object
      description: Error details for a failed training operation
      properties:
        code:
          description: Application error code
          $ref: '#/components/schemas/RL.TrainingOperationErrorCode'
          example: TRAINING_OPERATION_ERROR_CODE_TIMEOUT
        message:
          description: Human-readable error message
          type: string
          example: Operation timed out
    RL.TrainingOperationErrorCode:
      type: string
      enum:
        - TRAINING_OPERATION_ERROR_CODE_UNSPECIFIED
        - TRAINING_OPERATION_ERROR_CODE_RESOURCE_EXHAUSTED
        - TRAINING_OPERATION_ERROR_CODE_TIMEOUT
        - TRAINING_OPERATION_ERROR_CODE_INTERNAL_ERROR
        - TRAINING_OPERATION_ERROR_CODE_SESSION_NOT_ACTIVE
        - TRAINING_OPERATION_ERROR_CODE_INVALID_INPUT
      default: TRAINING_OPERATION_ERROR_CODE_UNSPECIFIED
    RL.TrainingOperationStatus:
      type: string
      enum:
        - TRAINING_OPERATION_STATUS_UNSPECIFIED
        - TRAINING_OPERATION_STATUS_PENDING
        - TRAINING_OPERATION_STATUS_RUNNING
        - TRAINING_OPERATION_STATUS_COMPLETED
        - TRAINING_OPERATION_STATUS_FAILED
      default: TRAINING_OPERATION_STATUS_UNSPECIFIED
    RL.StartTrainingSessionRequest:
      type: object
      required:
        - base_model
      properties:
        base_model:
          description: Base model to use for the training session
          type: string
          example: meta-llama/Meta-Llama-3-8B-Instruct
        resume_from_checkpoint_id:
          description: Checkpoint ID to resume from
          type: string
          example: 123e4567-e89b-12d3-a456-426614174000
        type:
          $ref: '#/components/schemas/RL.SessionType'
          default: SESSION_TYPE_TRAINER_AND_GENERATOR
          example: SESSION_TYPE_TRAINER_ONLY
          description: >-
            Type of the training session. Defaults to TRAINER_AND_GENERATOR when
            unspecified. TRAINER_ONLY provisions only the trainer and rejects
            sample requests.
        lora_config:
          $ref: '#/components/schemas/RL.LoraConfig'
    RL.SessionType:
      description: >-
        Type of a training session. TRAINER_AND_GENERATOR provisions both
        trainer and generator; TRAINER_ONLY provisions only the trainer and
        rejects generator-dependent operations such as sample.
      default: SESSION_TYPE_UNSPECIFIED
      type: string
      enum:
        - SESSION_TYPE_UNSPECIFIED
        - SESSION_TYPE_TRAINER_AND_GENERATOR
        - SESSION_TYPE_TRAINER_ONLY
    RL.TrainingSessionStatus:
      description: Status of the training session
      default: TRAINING_SESSION_STATUS_UNSPECIFIED
      type: string
      enum:
        - TRAINING_SESSION_STATUS_UNSPECIFIED
        - TRAINING_SESSION_STATUS_CREATING
        - TRAINING_SESSION_STATUS_RUNNING
        - TRAINING_SESSION_STATUS_STOPPED
        - TRAINING_SESSION_STATUS_STOPPING
        - TRAINING_SESSION_STATUS_ERROR
        - TRAINING_SESSION_STATUS_EXPIRED
    RL.TrainingSession:
      type: object
      description: A training session and its current state
      required:
        - id
        - status
        - base_model
        - inference_checkpoints
        - training_checkpoints
        - step
        - created_at
        - updated_at
        - lora_config
        - type
      properties:
        id:
          type: string
          example: 123e4567-e89b-12d3-a456-426614174000
          description: ID of the training session
        status:
          $ref: '#/components/schemas/RL.TrainingSessionStatus'
          example: TRAINING_SESSION_STATUS_RUNNING
          description: Status of the training session
        base_model:
          type: string
          example: meta-llama/Meta-Llama-3-8B-Instruct
          description: Base model used for the training session
        inference_checkpoints:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/RL.InferenceCheckpoint'
          description: List of saved inference checkpoints for this session
        training_checkpoints:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/RL.TrainingCheckpoint'
          description: List of saved training checkpoints for this session
        resume_from_checkpoint_id:
          type: string
          example: 123e4567-e89b-12d3-a456-426614174000
          description: Checkpoint ID this session was resumed from
        step:
          description: Current training step
          oneOf:
            - type: string
            - type: integer
          example: '100'
          default: '0'
        created_at:
          type: string
          format: date-time
          example: '2026-01-02T00:00:00Z'
          description: Timestamp when the training session was created
        updated_at:
          type: string
          format: date-time
          example: '2026-01-02T00:00:05Z'
          description: Timestamp when the training session was last updated
        lora_config:
          $ref: '#/components/schemas/RL.LoraConfig'
        type:
          $ref: '#/components/schemas/RL.SessionType'
          example: SESSION_TYPE_TRAINER_AND_GENERATOR
          description: >-
            Type of the training session. TRAINER_AND_GENERATOR provisions both
            trainer and generator; TRAINER_ONLY provisions only the trainer and
            rejects generator-dependent operations such as sample.
    RL.TrainingSessionsListResponse:
      type: object
      description: Paginated list of training sessions
      properties:
        data:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/RL.TrainingSession'
          description: List of training sessions
        meta:
          $ref: '#/components/schemas/RL.ListMeta'
          description: Pagination metadata
    RL.LoraConfig:
      type: object
      description: LoRA adapter configuration
      properties:
        rank:
          type: integer
          format: int64
          example: 8
          default: '8'
          description: Rank of the LoRA adapter
        alpha:
          type: integer
          format: int64
          example: 16
          default: '16'
          description: Alpha of the LoRA adapter
        dropout:
          type: number
          format: float
          example: 0.05
          default: '0.05'
          description: Dropout of the LoRA adapter
    RL.AdamWOptimizerParams:
      description: AdamW optimizer parameters
      type: object
      properties:
        beta1:
          description: First moment decay rate
          type: number
          format: float
          example: 0.9
          default: '0.9'
        beta2:
          description: Second moment decay rate
          type: number
          format: float
          example: 0.95
          default: '0.95'
        eps:
          description: Epsilon for numerical stability
          type: number
          format: float
          example: 1.e-8
          default: '1e-8'
        weight_decay:
          description: Weight decay coefficient
          type: number
          format: float
          example: 0.1
          default: '0.1'
    RL.InferenceCheckpointOperation:
      type: object
      description: Async inference checkpoint operation
      required:
        - id
        - status
      properties:
        id:
          type: string
          example: 550e8400-e29b-41d4-a716-446655440000
          description: Operation ID
        status:
          $ref: '#/components/schemas/RL.TrainingOperationStatus'
          example: TRAINING_OPERATION_STATUS_PENDING
          description: Operation status
        output:
          $ref: '#/components/schemas/RL.InferenceCheckpointResult'
          description: Result on success
        error:
          $ref: '#/components/schemas/RL.TrainingOperationError'
          description: Error details on failure
    RL.InferenceCheckpointResult:
      type: object
      description: Result of an inference checkpoint operation
      required:
        - model_name
      properties:
        model_name:
          type: string
          example: username/Meta-Llama-3-8B-rl-step-42-20260216
          description: Registered model name for downloading the checkpoint
    RL.InferenceCheckpoint:
      type: object
      description: Saved inference checkpoint
      required:
        - id
        - step
        - created_at
      properties:
        id:
          type: string
          example: 123e4567-e89b-12d3-a456-426614174000
          description: Unique identifier for the checkpoint
        step:
          oneOf:
            - type: string
            - type: integer
          example: '42'
          description: Training step at time of save
        created_at:
          type: string
          format: date-time
          example: '2026-01-02T00:00:00Z'
          description: Timestamp when the checkpoint was created
        registration:
          $ref: '#/components/schemas/RL.InferenceCheckpointRegistration'
          description: Model registration details
    RL.InferenceCheckpointRegistration:
      type: object
      description: Model registration details for an inference checkpoint
      required:
        - model_name
        - registered_at
      properties:
        model_name:
          type: string
          example: username/Meta-Llama-3-8B-rl-step-42-20260216
          description: Registered model name for downloading the checkpoint
        registered_at:
          type: string
          format: date-time
          example: '2026-01-02T00:00:00Z'
          description: Timestamp when the model was registered
    RL.TrainingCheckpoint:
      type: object
      description: Saved training checkpoint
      required:
        - id
        - step
        - created_at
      properties:
        id:
          type: string
          example: 123e4567-e89b-12d3-a456-426614174000
          description: Unique identifier for the checkpoint
        step:
          oneOf:
            - type: string
            - type: integer
          example: '42'
          description: Training step at time of save
        created_at:
          type: string
          format: date-time
          example: '2026-01-02T00:00:00Z'
          description: Timestamp when the checkpoint was created
    RL.TrainingCheckpointResult:
      type: object
      description: Result of a save training checkpoint operation
      required:
        - checkpoint_id
      properties:
        checkpoint_id:
          type: string
          example: 550e8400-e29b-41d4-a716-446655440000
          description: ID of the saved training checkpoint (use for resume via Start)
    RL.TrainingCheckpointOperation:
      type: object
      description: Async save training checkpoint operation
      required:
        - id
        - status
      properties:
        id:
          type: string
          example: 550e8400-e29b-41d4-a716-446655440000
          description: Operation ID
        status:
          $ref: '#/components/schemas/RL.TrainingOperationStatus'
          example: TRAINING_OPERATION_STATUS_PENDING
          description: Operation status
        output:
          $ref: '#/components/schemas/RL.TrainingCheckpointResult'
          description: Result on success
        error:
          $ref: '#/components/schemas/RL.TrainingOperationError'
          description: Error details on failure
    RL.CheckpointVariant:
      type: string
      enum:
        - CHECKPOINT_VARIANT_UNSPECIFIED
        - CHECKPOINT_VARIANT_MERGED
        - CHECKPOINT_VARIANT_ADAPTER
      default: CHECKPOINT_VARIANT_UNSPECIFIED
      description: 'Checkpoint variant: merged (full model) or adapter (LoRA weights only)'
    RL.CheckpointFile:
      type: object
      description: A downloadable file within a checkpoint
      required:
        - filename
        - url
        - size
      properties:
        filename:
          type: string
          example: model-00001-of-00002.safetensors
          description: Name of the file
        url:
          type: string
          example: https://...
          description: Presigned URL for downloading the file
        size:
          oneOf:
            - type: string
            - type: integer
          example: '123456789'
          description: File size in bytes
    RL.CheckpointDownloadResponse:
      type: object
      description: Presigned download URLs for a checkpoint's files
      required:
        - data
      properties:
        data:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/RL.CheckpointFile'
          description: List of files with presigned download URLs
    ErrorResponse:
      type: object
      properties:
        code:
          type: integer
        message:
          type: string
    GPUClusterControlPlaneNode:
      type: object
      required:
        - node_id
        - node_name
        - status
        - host_name
        - num_cpu_cores
        - memory_gib
        - network
      properties:
        node_id:
          type: string
        node_name:
          type: string
        status:
          type: string
        host_name:
          type: string
        num_cpu_cores:
          type: integer
        memory_gib:
          type: number
        network:
          type: string
    GPUClusterCreateRequest:
      description: GPU Cluster create request
      required:
        - region
        - gpu_type
        - num_gpus
        - cluster_name
        - cuda_version
        - nvidia_driver_version
        - billing_type
      type: object
      properties:
        cluster_type:
          description: Type of cluster to create.
          type: string
          enum:
            - KUBERNETES
            - SLURM
        region:
          description: >-
            Region to create the GPU cluster in. Usable regions can be found
            from `client.clusters.list_regions()`
          type: string
        gpu_type:
          description: Type of GPU to use in the cluster
          type: string
          enum:
            - H100_SXM
            - H200_SXM
            - RTX_6000_PCI
            - L40_PCIE
            - B200_SXM
            - H100_SXM_INF
        num_gpus:
          description: >-
            Number of GPUs to allocate in the cluster. This must be multiple of
            8. For example, 8, 16 or 24
          type: integer
        cluster_name:
          description: Name of the GPU cluster.
          type: string
        duration_days:
          x-stainless-terraform-configurability: computed
          description: Duration in days to keep the cluster running.
          type: integer
        shared_volume:
          x-stainless-terraform-configurability: computed
          $ref: '#/components/schemas/GPUClustersSharedVolumeCreateRequest'
          description: >-
            Inline configuration to create a shared volume with the cluster
            creation.
        volume_id:
          description: ID of an existing volume to use with the cluster creation.
          type: string
        billing_type:
          description: >
            RESERVED billing types allow you to specify the duration of the
            cluster reservation via the duration_days field.

            ON_DEMAND billing types will give you ownership of the cluster until
            you delete it.

            SCHEDULED_CAPACITY billing types allow you to reserve capacity for a
            scheduled time window. You must specify the reservation_start_time
            and reservation_end_time with this request.
          x-stainless-terraform-configurability: computed
          type: string
          enum:
            - RESERVED
            - ON_DEMAND
            - SCHEDULED_CAPACITY
        gpu_node_failover_enabled:
          type: boolean
          default: false
          description: >-
            Whether automated GPU node failover should be enabled for this
            cluster. By default, it is disabled.
        auto_scaled:
          type: boolean
          default: false
          description: >-
            Whether GPU cluster should be auto-scaled based on the workload. By
            default, it is not auto-scaled.
        auto_scale_max_gpus:
          type: integer
          description: >-
            Maximum number of GPUs to which the cluster can be auto-scaled up.
            This field is required if auto_scaled is true.
        slurm_shm_size_gib:
          type: integer
          description: >-
            Shared memory size in GiB for Slurm cluster. This field is required
            if cluster_type is SLURM.
        capacity_pool_id:
          type: string
          description: >-
            ID of the capacity pool to use for the cluster. This field is
            optional and only applicable if the cluster is created from a
            capacity pool.
        reservation_start_time:
          type: string
          description: >-
            Reservation start time of the cluster. This field is required for
            SCHEDULED billing to specify the reservation start time for the
            cluster. If not provided, the cluster will be provisioned
            immediately.
          format: date-time
        reservation_end_time:
          type: string
          description: >-
            Reservation end time of the cluster. This field is required for
            SCHEDULED billing to specify the reservation end time for the
            cluster.
          format: date-time
        install_traefik:
          type: boolean
          default: false
          description: >-
            Whether to install Traefik ingress controller in the cluster. This
            field is only applicable for Kubernetes clusters and is false by
            default.
        cuda_version:
          type: string
          description: CUDA version for this cluster. For example, 12.5
        nvidia_driver_version:
          type: string
          description: >-
            Nvidia driver version for this cluster. For example, 550. Only some
            combination of cuda_version and nvidia_driver_version are supported.
        slurm_image:
          type: string
          description: Custom Slurm image for Slurm clusters.
    GPUClusterGPUWorkerNode:
      type: object
      required:
        - node_id
        - node_name
        - status
        - host_name
        - num_cpu_cores
        - num_gpus
        - memory_gib
        - networks
      properties:
        node_id:
          type: string
        node_name:
          type: string
        status:
          type: string
        host_name:
          type: string
        num_cpu_cores:
          type: integer
        num_gpus:
          type: integer
        memory_gib:
          type: number
        networks:
          type: array
          items:
            type: string
        instance_id:
          type: string
    GPUClusterInfo:
      type: object
      required:
        - cluster_id
        - cluster_type
        - region
        - gpu_type
        - cluster_name
        - cuda_version
        - nvidia_driver_version
        - volumes
        - status
        - control_plane_nodes
        - gpu_worker_nodes
        - kube_config
        - num_gpus
      properties:
        cluster_id:
          type: string
        cluster_type:
          description: Type of cluster.
          enum:
            - KUBERNETES
            - SLURM
        region:
          type: string
        gpu_type:
          enum:
            - H100_SXM
            - H200_SXM
            - RTX_6000_PCI
            - L40_PCIE
            - B200_SXM
            - H100_SXM_INF
        cluster_name:
          type: string
        duration_hours:
          type: integer
        volumes:
          type: array
          items:
            $ref: '#/components/schemas/GPUClusterVolume'
        status:
          description: Current status of the GPU cluster.
          enum:
            - WaitingForControlPlaneNodes
            - WaitingForDataPlaneNodes
            - WaitingForSubnet
            - WaitingForSharedVolume
            - InstallingDrivers
            - RunningAcceptanceTests
            - Paused
            - OnDemandComputePaused
            - Ready
            - Degraded
            - Deleting
        control_plane_nodes:
          type: array
          items:
            $ref: '#/components/schemas/GPUClusterControlPlaneNode'
        gpu_worker_nodes:
          type: array
          items:
            $ref: '#/components/schemas/GPUClusterGPUWorkerNode'
        kube_config:
          type: string
        num_gpus:
          type: integer
        slurm_shm_size_gib:
          type: integer
        capacity_pool_id:
          type: string
        reservation_start_time:
          type: string
          format: date-time
        reservation_end_time:
          type: string
          format: date-time
        install_traefik:
          type: boolean
        cuda_version:
          type: string
        nvidia_driver_version:
          type: string
        created_at:
          type: string
          format: date-time
    GPUClusterUpdateRequest:
      type: object
      properties:
        cluster_type:
          description: Type of cluster to update.
          enum:
            - KUBERNETES
            - SLURM
        num_gpus:
          description: >-
            Number of GPUs to allocate in the cluster. This must be multiple of
            8. For example, 8, 16 or 24
          type: integer
        reservation_end_time:
          type: string
          description: >-
            Timestamp at which the cluster should be decommissioned. Only
            accepted for prepaid clusters.
          format: date-time
    GPUClusterVolume:
      type: object
      required:
        - volume_id
        - volume_name
        - size_tib
        - status
      properties:
        volume_id:
          type: string
        volume_name:
          type: string
        size_tib:
          type: integer
        status:
          type: string
    GPUClusters:
      type: object
      required:
        - clusters
      properties:
        clusters:
          type: array
          items:
            $ref: '#/components/schemas/GPUClusterInfo'
    InstanceTypesResponse:
      type: object
      properties:
        types:
          type: array
          items:
            enum:
              - H100_SXM
              - H200_SXM
              - RTX_6000_PCI
              - L40_PCIE
              - B200_SXM
              - H100_SXM_INF
          error:
            $ref: '#/components/schemas/ErrorResponse'
    RegionListResponse:
      type: object
      required:
        - regions
      properties:
        regions:
          type: array
          items:
            type: object
            required:
              - name
              - supported_instance_types
              - driver_versions
            properties:
              name:
                description: Identifiable name of the region.
                type: string
              driver_versions:
                description: >-
                  List of supported identifiable cuda/nvidia driver versions
                  pairs available in the region.
                type: array
                items:
                  $ref: '#/components/schemas/ClusterDriverVersionInfo'
              supported_instance_types:
                description: List of supported identifiable gpus available in the region.
                type: array
                items:
                  type: string
    ClusterDriverVersionInfo:
      type: object
      description: >-
        CUDA/NVIDIA driver versions pair available in the region to use in the
        create cluster request.
      required:
        - cuda_version
        - nvidia_driver_version
      properties:
        cuda_version:
          description: CUDA driver version.
          type: string
        nvidia_driver_version:
          description: NVIDIA driver version.
          type: string
    GPUClustersSharedVolumeCreateRequest:
      type: object
      required:
        - volume_name
        - size_tib
        - region
      properties:
        volume_name:
          description: Customizable name of the volume to create.
          type: string
        size_tib:
          description: Volume size in whole tebibytes (TiB).
          type: integer
        region:
          type: string
          description: >-
            Region name. Usable regions can be found from
            `client.clusters.list_regions()`
    GPUClustersSharedVolumeDeleteResponse:
      type: object
      required:
        - success
      properties:
        success:
          type: boolean
    GPUClustersSharedVolume:
      type: object
      required:
        - volume_id
        - volume_name
        - size_tib
        - status
      properties:
        volume_id:
          description: ID of the volume.
          type: string
        volume_name:
          description: Provided name of the volume.
          type: string
        size_tib:
          description: Size of the volume in whole tebibytes (TiB).
          type: integer
        status:
          description: Deployment status of the volume.
          type: string
          enum:
            - available
            - bound
            - provisioning
    GPUClustersSharedVolumeUpdateRequest:
      type: object
      properties:
        volume_id:
          description: ID of the volume to update.
          type: string
        size_tib:
          description: Size of the volume in whole tebibytes (TiB).
          type: integer
    GPUClustersSharedVolumes:
      type: object
      required:
        - volumes
      properties:
        volumes:
          type: array
          items:
            $ref: '#/components/schemas/GPUClustersSharedVolume'
    ListVoicesResponse:
      description: Response containing a list of models and their available voices.
      type: object
      required:
        - data
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/ModelVoices'
    ModelVoices:
      description: Represents a model with its available voices.
      type: object
      required:
        - model
        - voices
      properties:
        model:
          description: Model name.
          type: string
        voices:
          description: List of available voices for the model.
          type: array
          items:
            type: object
            required:
              - id
              - name
            properties:
              id:
                type: string
              name:
                description: Voice name to be used for audio inference.
                type: string
    ListAvailibilityZonesResponse:
      description: List of unique availability zones
      type: object
      required:
        - avzones
      properties:
        avzones:
          type: array
          items:
            type: string
    RerankRequest:
      type: object
      properties:
        model:
          type: string
          description: >
            The model to be used for the rerank request.<br> <br> [See all of
            Together AI's rerank
            models](https://docs.together.ai/docs/serverless-models#rerank-models)
          example: Salesforce/Llama-Rank-V1
          anyOf:
            - type: string
              enum:
                - Salesforce/Llama-Rank-v1
            - type: string
        query:
          type: string
          description: The search query to be used for ranking.
          example: What animals can I find near Peru?
        documents:
          description: List of documents, which can be either strings or objects.
          oneOf:
            - type: array
              items:
                type: object
                additionalProperties: true
            - type: array
              items:
                type: string
                example: >-
                  Our solar system orbits the Milky Way galaxy at about 515,000
                  mph
          example:
            - title: Llama
              text: >-
                The llama is a domesticated South American camelid, widely used
                as a meat and pack animal by Andean cultures since the
                pre-Columbian era.
            - title: Panda
              text: >-
                The giant panda (Ailuropoda melanoleuca), also known as the
                panda bear or simply panda, is a bear species endemic to China.
            - title: Guanaco
              text: >-
                The guanaco is a camelid native to South America, closely
                related to the llama. Guanacos are one of two wild South
                American camelids; the other species is the vicuña, which lives
                at higher elevations.
            - title: Wild Bactrian camel
              text: >-
                The wild Bactrian camel (Camelus ferus) is an endangered species
                of camel endemic to Northwest China and southwestern Mongolia.
        top_n:
          type: integer
          description: The number of top results to return.
          example: 2
        return_documents:
          type: boolean
          description: Whether to return supplied documents with the response.
          example: true
        rank_fields:
          type: array
          items:
            type: string
          description: >-
            List of keys in the JSON Object document to rank by. Defaults to use
            all supplied keys for ranking.
          example:
            - title
            - text
      required:
        - model
        - query
        - documents
      additionalProperties: false
    RerankResponse:
      type: object
      required:
        - object
        - model
        - results
      properties:
        object:
          description: The object type, which is always `rerank`.
          const: rerank
        id:
          type: string
          description: Request ID
          example: 9dfa1a09-5ebc-4a40-970f-586cb8f4ae47
        model:
          type: string
          description: The model to be used for the rerank request.
          example: salesforce/turboranker-0.8-3778-6328
        results:
          type: array
          items:
            type: object
            required:
              - index
              - relevance_score
              - document
            properties:
              index:
                type: integer
              relevance_score:
                type: number
              document:
                type: object
                properties:
                  text:
                    type: string
                    nullable: true
          example:
            - index: 0
              relevance_score: 0.29980177813003117
              document:
                text: >-
                  {"title":"Llama","text":"The llama is a domesticated South
                  American camelid, widely used as a meat and pack animal by
                  Andean cultures since the pre-Columbian era."}
            - index: 2
              relevance_score: 0.2752447527354349
              document:
                text: >-
                  {"title":"Guanaco","text":"The guanaco is a camelid native to
                  South America, closely related to the llama. Guanacos are one
                  of two wild South American camelids; the other species is the
                  vicuña, which lives at higher elevations."}
        usage:
          $ref: '#/components/schemas/UsageData'
          example:
            prompt_tokens: 1837
            completion_tokens: 0
            total_tokens: 1837
    ErrorData:
      type: object
      required:
        - error
      properties:
        error:
          type: object
          properties:
            message:
              type: string
              nullable: false
            type:
              type: string
              nullable: false
            param:
              type: string
              nullable: true
              default: null
            code:
              type: string
              nullable: true
              default: null
          required:
            - type
            - message
    FinishReason:
      type: string
      enum:
        - stop
        - eos
        - length
        - tool_calls
        - function_call
    TopLogprobs:
      description: Top log probabilities for the tokens.
      type: object
      additionalProperties:
        type: number
    LogprobsPart:
      type: object
      properties:
        token_ids:
          type: array
          items:
            type: number
          description: List of token IDs corresponding to the logprobs
        tokens:
          type: array
          items:
            type: string
          description: List of token strings
        token_logprobs:
          type: array
          items:
            type: number
          description: List of token log probabilities
        top_logprobs:
          $ref: '#/components/schemas/TopLogprobs'
    PromptPart:
      type: array
      items:
        type: object
        properties:
          text:
            type: string
            example: <s>[INST] What is the capital of France? [/INST]
          logprobs:
            $ref: '#/components/schemas/LogprobsPart'
    InferenceWarning:
      type: object
      required:
        - message
      properties:
        message:
          type: string
    UsageData:
      type: object
      properties:
        prompt_tokens:
          type: integer
        completion_tokens:
          type: integer
        total_tokens:
          type: integer
      required:
        - prompt_tokens
        - completion_tokens
        - total_tokens
      nullable: true
    CompletionChoicesData:
      type: array
      items:
        type: object
        properties:
          text:
            type: string
            example: >-
              The capital of France is Paris. It's located in the north-central
              part of the country and is one of the most populous and visited
              cities in the world, known for its iconic landmarks like the
              Eiffel Tower, Louvre Museum, Notre-Dame Cathedral, and more. Paris
              is also the capital of the Île-de-France region and is a major
              global center for art, fashion, gastronomy, and culture.
          seed:
            type: integer
          finish_reason:
            $ref: '#/components/schemas/FinishReason'
          logprobs:
            $ref: '#/components/schemas/LogprobsPart'
    CompletionRequest:
      type: object
      required:
        - model
        - prompt
      properties:
        prompt:
          type: string
          description: A string providing context for the model to complete.
          example: <s>[INST] What is the capital of France? [/INST]
        model:
          type: string
          description: >
            The name of the model to query.<br> <br> [See all of Together AI's
            chat
            models](https://docs.together.ai/docs/serverless-models#chat-models)
          example: mistralai/Mixtral-8x7B-Instruct-v0.1
          anyOf:
            - type: string
              enum:
                - meta-llama/Llama-2-70b-hf
                - mistralai/Mistral-7B-v0.1
                - mistralai/Mixtral-8x7B-v0.1
                - Meta-Llama/Llama-Guard-7b
            - type: string
        max_tokens:
          type: integer
          description: The maximum number of tokens to generate.
        stop:
          type: array
          description: >-
            A list of string sequences that will truncate (stop) inference text
            output. For example, "</s>" will stop generation as soon as the
            model generates the given token.
          items:
            type: string
        temperature:
          type: number
          description: >-
            A decimal number from 0-1 that determines the degree of randomness
            in the response. A temperature less than 1 favors more correctness
            and is appropriate for question answering or summarization. A value
            closer to 1 introduces more randomness in the output.
          format: float
        top_p:
          type: number
          description: >-
            A percentage (also called the nucleus parameter) that's used to
            dynamically adjust the number of choices for each predicted token
            based on the cumulative probabilities. It specifies a probability
            threshold below which all less likely tokens are filtered out. This
            technique helps maintain diversity and generate more fluent and
            natural-sounding text.
          format: float
        top_k:
          type: integer
          description: >-
            An integer that's used to limit the number of choices for the next
            predicted word or token. It specifies the maximum number of tokens
            to consider at each step, based on their probability of occurrence.
            This technique helps to speed up the generation process and can
            improve the quality of the generated text by focusing on the most
            likely options.
          format: int32
        repetition_penalty:
          type: number
          description: >-
            A number that controls the diversity of generated text by reducing
            the likelihood of repeated sequences. Higher values decrease
            repetition.
          format: float
        stream:
          type: boolean
          description: >-
            If true, stream tokens as Server-Sent Events as the model generates
            them instead of waiting for the full model response. The stream
            terminates with `data: [DONE]`. If false, return a single JSON
            object containing the results.
        logprobs:
          type: integer
          minimum: 0
          maximum: 20
          description: >-
            An integer between 0 and 20 of the top k tokens to return log
            probabilities for at each generation step, instead of just the
            sampled token. Log probabilities help assess model confidence in
            token predictions.
        echo:
          type: boolean
          description: >-
            If true, the response will contain the prompt. Can be used with
            `logprobs` to return prompt logprobs.
        'n':
          type: integer
          description: The number of completions to generate for each prompt.
          minimum: 1
          maximum: 128
        safety_model:
          type: string
          description: >-
            The name of the moderation model used to validate tokens. Choose
            from the available moderation models found
            [here](https://docs.together.ai/docs/inference-models#moderation-models).
          example: safety_model_name
          anyOf:
            - type: string
              enum:
                - Meta-Llama/Llama-Guard-7b
            - type: string
        min_p:
          type: number
          description: >-
            A number between 0 and 1 that can be used as an alternative to top-p
            and top-k.
          format: float
        presence_penalty:
          type: number
          description: >-
            A number between -2.0 and 2.0 where a positive value increases the
            likelihood of a model talking about new topics.
          format: float
        frequency_penalty:
          type: number
          description: >-
            A number between -2.0 and 2.0 where a positive value decreases the
            likelihood of repeating tokens that have already been mentioned.
          format: float
        logit_bias:
          type: object
          additionalProperties:
            type: number
            format: float
          description: >-
            Adjusts the likelihood of specific tokens appearing in the generated
            output.
          example:
            '105': 21.4
            '1024': -10.5
        seed:
          type: integer
          description: Seed value for reproducibility.
          example: 42
    CompletionResponse:
      type: object
      properties:
        id:
          type: string
        choices:
          $ref: '#/components/schemas/CompletionChoicesData'
        prompt:
          description: >-
            When `echo` is true, the prompt is included in the response.
            Additionally, when `logprobs` is also provided, log probability
            information is provided on the prompt.
          $ref: '#/components/schemas/PromptPart'
        usage:
          $ref: '#/components/schemas/UsageData'
        created:
          type: integer
        model:
          type: string
        object:
          description: The object type, which is always `text.completion`.
          const: text.completion
      required:
        - id
        - choices
        - usage
        - created
        - prompt
        - model
        - object
    CompletionStream:
      oneOf:
        - $ref: '#/components/schemas/CompletionEvent'
        - $ref: '#/components/schemas/StreamSentinel'
    CompletionEvent:
      type: object
      required:
        - data
      properties:
        data:
          $ref: '#/components/schemas/CompletionChunk'
    CompletionChunk:
      type: object
      required:
        - id
        - token
        - choices
        - usage
        - finish_reason
      properties:
        id:
          type: string
        token:
          $ref: '#/components/schemas/CompletionToken'
        created:
          type: integer
        object:
          description: The object type, which is always `completion.chunk`.
          const: completion.chunk
        choices:
          title: CompletionChoices
          type: array
          items:
            $ref: '#/components/schemas/CompletionChoice'
        usage:
          allOf:
            - $ref: '#/components/schemas/UsageData'
            - nullable: true
        seed:
          type: integer
        finish_reason:
          allOf:
            - $ref: '#/components/schemas/FinishReason'
            - nullable: true
    CompletionChoice:
      type: object
      required:
        - index
      properties:
        text:
          type: string
        index:
          type: integer
        delta:
          title: CompletionChoiceDelta
          type: object
          required:
            - role
          properties:
            token_id:
              type: integer
            role:
              type: string
              enum:
                - system
                - user
                - assistant
                - function
                - tool
            content:
              type: string
              nullable: true
            reasoning:
              type: string
              nullable: true
            tool_calls:
              type: array
              items:
                $ref: '#/components/schemas/ToolChoice'
            function_call:
              type: object
              deprecated: true
              nullable: true
              properties:
                arguments:
                  type: string
                name:
                  type: string
              required:
                - arguments
                - name
    CompletionToken:
      type: object
      required:
        - id
        - text
        - logprob
        - special
      properties:
        id:
          type: integer
        text:
          type: string
        logprob:
          type: number
        special:
          type: boolean
    ChatCompletionChoicesData:
      type: array
      items:
        type: object
        properties:
          text:
            type: string
          index:
            type: integer
          seed:
            type: integer
          finish_reason:
            $ref: '#/components/schemas/FinishReason'
          message:
            $ref: '#/components/schemas/ChatCompletionMessage'
          logprobs:
            allOf:
              - nullable: true
              - $ref: '#/components/schemas/LogprobsPart'
          top_logprobs:
            $ref: '#/components/schemas/TopLogprobs'
    ChatCompletionMessage:
      type: object
      required:
        - role
        - content
      properties:
        content:
          type: string
          nullable: true
        role:
          type: string
          enum:
            - assistant
        tool_calls:
          type: array
          items:
            $ref: '#/components/schemas/ToolChoice'
        function_call:
          type: object
          deprecated: true
          required:
            - arguments
            - name
          properties:
            arguments:
              type: string
            name:
              type: string
        reasoning:
          type: string
          nullable: true
    ChatCompletionTool:
      type: object
      required:
        - type
        - function
      properties:
        type:
          type: string
          enum:
            - function
        function:
          type: object
          required:
            - name
          properties:
            description:
              type: string
            name:
              type: string
            parameters:
              type: object
              additionalProperties: true
    ChatCompletionRequest:
      type: object
      required:
        - model
        - messages
      properties:
        messages:
          type: array
          description: A list of messages comprising the conversation so far.
          items:
            $ref: '#/components/schemas/ChatCompletionMessageParam'
        model:
          type: string
          description: >
            The name of the model to query.<br> <br> [See all of Together AI's
            chat
            models](https://docs.together.ai/docs/serverless-models#chat-models)
        max_tokens:
          type: integer
          description: The maximum number of tokens to generate.
        stop:
          type: array
          description: >-
            A list of string sequences that will truncate (stop) inference text
            output. For example, "</s>" will stop generation as soon as the
            model generates the given token.
          items:
            type: string
        temperature:
          type: number
          description: >-
            A decimal number from 0-1 that determines the degree of randomness
            in the response. A temperature less than 1 favors more correctness
            and is appropriate for question answering or summarization. A value
            closer to 1 introduces more randomness in the output.
          format: float
        top_p:
          type: number
          description: >-
            A percentage (also called the nucleus parameter) that's used to
            dynamically adjust the number of choices for each predicted token
            based on the cumulative probabilities. It specifies a probability
            threshold below which all less likely tokens are filtered out. This
            technique helps maintain diversity and generate more fluent and
            natural-sounding text.
          format: float
        top_k:
          type: integer
          description: >-
            An integer that's used to limit the number of choices for the next
            predicted word or token. It specifies the maximum number of tokens
            to consider at each step, based on their probability of occurrence.
            This technique helps to speed up the generation process and can
            improve the quality of the generated text by focusing on the most
            likely options.
          format: int32
        context_length_exceeded_behavior:
          type: string
          enum:
            - truncate
            - error
          default: error
          description: >-
            Defined the behavior of the API when max_tokens exceed the maximum
            context length of the model. When set to 'error', API will return
            400 with appropriate error message. When set to 'truncate', override
            the max_tokens with maximum context length of the model.
        repetition_penalty:
          type: number
          description: >-
            A number that controls the diversity of generated text by reducing
            the likelihood of repeated sequences. Higher values decrease
            repetition.
        stream:
          type: boolean
          description: >-
            If true, stream tokens as Server-Sent Events as the model generates
            them instead of waiting for the full model response. The stream
            terminates with `data: [DONE]`. If false, return a single JSON
            object containing the results.
        logprobs:
          type: integer
          minimum: 0
          maximum: 20
          description: >-
            An integer between 0 and 20 of the top k tokens to return log
            probabilities for at each generation step, instead of just the
            sampled token. Log probabilities help assess model confidence in
            token predictions.
        echo:
          type: boolean
          description: >-
            If true, the response will contain the prompt. Can be used with
            `logprobs` to return prompt logprobs.
        'n':
          type: integer
          description: The number of completions to generate for each prompt.
          minimum: 1
          maximum: 128
        min_p:
          type: number
          description: >-
            A number between 0 and 1 that can be used as an alternative to top_p
            and top-k.
          format: float
        presence_penalty:
          type: number
          description: >-
            A number between -2.0 and 2.0 where a positive value increases the
            likelihood of a model talking about new topics.
          format: float
        frequency_penalty:
          type: number
          description: >-
            A number between -2.0 and 2.0 where a positive value decreases the
            likelihood of repeating tokens that have already been mentioned.
          format: float
        logit_bias:
          type: object
          additionalProperties:
            type: number
            format: float
          description: >-
            Adjusts the likelihood of specific tokens appearing in the generated
            output.
          example:
            '105': 21.4
            '1024': -10.5
        seed:
          type: integer
          description: Seed value for reproducibility.
          example: 42
        function_call:
          oneOf:
            - type: string
              enum:
                - none
                - auto
            - type: object
              required:
                - name
              properties:
                name:
                  type: string
        response_format:
          description: >
            An object specifying the format that the model must output.


            Setting to `{ "type": "json_schema", "json_schema": {...} }` enables

            Structured Outputs which ensures the model will match your supplied
            JSON

            schema. Learn more in the [Structured Outputs

            guide](https://docs.together.ai/docs/json-mode).


            Setting to `{ "type": "json_object" }` enables the older JSON mode,
            which

            ensures the message the model generates is valid JSON. Using
            `json_schema`

            is preferred for models that support it.
          discriminator:
            propertyName: type
          anyOf:
            - $ref: '#/components/schemas/ResponseFormatText'
            - $ref: '#/components/schemas/ResponseFormatJsonSchema'
            - $ref: '#/components/schemas/ResponseFormatJsonObject'
        tools:
          type: array
          description: >-
            A list of tools the model may call. Currently, only functions are
            supported as a tool. Use this to provide a list of functions the
            model may generate JSON inputs for.
          items:
            $ref: '#/components/schemas/ToolsPart'
        tool_choice:
          description: >-
            Controls which (if any) function is called by the model. By default
            uses `auto`, which lets the model pick between generating a message
            or calling a function.
          oneOf:
            - type: string
              example: tool_name
            - $ref: '#/components/schemas/ToolChoice'
        compliance:
          const: hipaa
        chat_template_kwargs:
          description: Additional configuration to pass to model engine.
          type: object
          additional_properties: true
        safety_model:
          type: string
          description: >-
            The name of the moderation model used to validate tokens. Choose
            from the available moderation models found
            [here](https://docs.together.ai/docs/inference-models#moderation-models).
          example: safety_model_name
        reasoning_effort:
          type: string
          enum:
            - low
            - medium
            - high
          description: >-
            Controls the level of reasoning effort the model should apply when
            generating responses. Higher values may result in more thoughtful
            and detailed responses but may take longer to generate.
          example: medium
        reasoning:
          description: >-
            For models that support toggling reasoning functionality, this
            object can be used to control that functionality.
          type: object
          properties:
            enabled:
              type: boolean
    ResponseFormatText:
      type: object
      title: Text
      description: |
        Default response format. Used to generate text responses.
      properties:
        type:
          type: string
          description: The type of response format being defined. Always `text`.
          enum:
            - text
          x-stainless-const: true
      required:
        - type
    ResponseFormatJsonObject:
      type: object
      title: JSON object
      description: >
        JSON object response format. An older method of generating JSON
        responses.

        Using `json_schema` is recommended for models that support it. Note that
        the

        model will not generate JSON without a system or user message
        instructing it

        to do so.
      properties:
        type:
          type: string
          description: The type of response format being defined. Always `json_object`.
          enum:
            - json_object
          x-stainless-const: true
      required:
        - type
    ResponseFormatJsonSchema:
      type: object
      title: JSON schema
      description: >
        JSON Schema response format. Used to generate structured JSON responses.

        Learn more about [Structured
        Outputs](https://docs.together.ai/docs/json-mode).
      properties:
        type:
          type: string
          description: The type of response format being defined. Always `json_schema`.
          enum:
            - json_schema
          x-stainless-const: true
        json_schema:
          type: object
          title: JSON schema
          description: |
            Structured Outputs configuration options, including a JSON Schema.
          properties:
            description:
              type: string
              description: >
                A description of what the response format is for, used by the
                model to

                determine how to respond in the format.
            name:
              type: string
              description: >
                The name of the response format. Must be a-z, A-Z, 0-9, or
                contain

                underscores and dashes, with a maximum length of 64.
            schema:
              $ref: '#/components/schemas/ResponseFormatJsonSchemaSchema'
            strict:
              anyOf:
                - type: boolean
                  default: false
                  description: >
                    Whether to enable strict schema adherence when generating
                    the output.

                    If set to true, the model will always follow the exact
                    schema defined

                    in the `schema` field. Only a subset of JSON Schema is
                    supported when

                    `strict` is `true`. To learn more, read the [Structured
                    Outputs

                    guide](https://docs.together.ai/docs/json-mode).
                - type: 'null'
          required:
            - name
      required:
        - type
        - json_schema
    ResponseFormatJsonSchemaSchema:
      type: object
      title: JSON schema
      description: |
        The schema for the response format, described as a JSON Schema object.
        Learn how to build JSON schemas [here](https://json-schema.org/).
      additionalProperties: true
    ChatCompletionMessageParam:
      oneOf:
        - $ref: '#/components/schemas/ChatCompletionSystemMessageParam'
        - $ref: '#/components/schemas/ChatCompletionUserMessageParam'
        - $ref: '#/components/schemas/ChatCompletionAssistantMessageParam'
        - $ref: '#/components/schemas/ChatCompletionToolMessageParam'
        - $ref: '#/components/schemas/ChatCompletionFunctionMessageParam'
    ChatCompletionSystemMessageParam:
      type: object
      required:
        - content
        - role
      properties:
        content:
          type: string
        role:
          type: string
          enum:
            - system
        name:
          type: string
    ChatCompletionUserMessageParam:
      type: object
      required:
        - content
        - role
      properties:
        content:
          $ref: '#/components/schemas/ChatCompletionUserMessageContent'
        role:
          type: string
          enum:
            - user
        name:
          type: string
    ChatCompletionUserMessageContentString:
      type: string
      description: A plain text message.
    ChatCompletionUserMessageContentMultimodal:
      type: array
      description: A structured message with mixed content types.
      items:
        type: object
        oneOf:
          - type: object
            properties:
              type:
                type: string
                enum:
                  - text
              text:
                type: string
            required:
              - type
              - text
          - type: object
            properties:
              type:
                type: string
                enum:
                  - image_url
              image_url:
                type: object
                properties:
                  url:
                    type: string
                    description: The URL of the image
                required:
                  - url
          - type: object
            title: Video
            properties:
              type:
                type: string
                enum:
                  - video_url
              video_url:
                type: object
                properties:
                  url:
                    type: string
                    description: The URL of the video
                required:
                  - url
            required:
              - type
              - video_url
          - type: object
            title: Audio
            properties:
              type:
                type: string
                enum:
                  - audio_url
              audio_url:
                type: object
                properties:
                  url:
                    type: string
                    description: The URL of the audio
                required:
                  - url
            required:
              - type
              - audio_url
          - type: object
            title: Input Audio
            properties:
              type:
                type: string
                enum:
                  - input_audio
              input_audio:
                type: object
                properties:
                  data:
                    type: string
                    description: The base64 encoded audio data
                  format:
                    type: string
                    description: The format of the audio data
                    enum:
                      - wav
                required:
                  - data
                  - format
            required:
              - type
              - input_audio
    ChatCompletionUserMessageContent:
      description: >-
        The content of the message, which can either be a simple string or a
        structured format.
      oneOf:
        - $ref: '#/components/schemas/ChatCompletionUserMessageContentString'
        - $ref: '#/components/schemas/ChatCompletionUserMessageContentMultimodal'
    ChatCompletionAssistantMessageParam:
      type: object
      required:
        - role
      properties:
        content:
          type: string
          nullable: true
        role:
          type: string
          enum:
            - assistant
        name:
          type: string
        tool_calls:
          type: array
          items:
            $ref: '#/components/schemas/ToolChoice'
        function_call:
          type: object
          deprecated: true
          properties:
            arguments:
              type: string
            name:
              type: string
          required:
            - arguments
            - name
    ChatCompletionFunctionMessageParam:
      type: object
      deprecated: true
      required:
        - content
        - role
        - name
      properties:
        role:
          type: string
          enum:
            - function
        content:
          type: string
        name:
          type: string
    ChatCompletionToolMessageParam:
      type: object
      properties:
        name:
          type: string
        role:
          type: string
          enum:
            - tool
        content:
          type: string
        tool_call_id:
          type: string
      required:
        - role
        - content
        - tool_call_id
    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
        choices:
          $ref: '#/components/schemas/ChatCompletionChoicesData'
        usage:
          $ref: '#/components/schemas/UsageData'
        created:
          type: integer
        model:
          type: string
        prompt:
          description: >-
            When `echo` is true, the prompt is included in the response.
            Additionally, when `logprobs` is also provided, log probability
            information is provided on the prompt.
          $ref: '#/components/schemas/PromptPart'
        object:
          description: The object type, which is always `chat.completion`.
          const: chat.completion
        warnings:
          type: array
          items:
            $ref: '#/components/schemas/InferenceWarning'
      required:
        - choices
        - id
        - created
        - model
        - object
        - prompt
    ChatCompletionStream:
      oneOf:
        - $ref: '#/components/schemas/ChatCompletionEvent'
        - $ref: '#/components/schemas/StreamSentinel'
    ChatCompletionEvent:
      type: object
      required:
        - data
      properties:
        data:
          $ref: '#/components/schemas/ChatCompletionChunk'
    ChatCompletionChunk:
      type: object
      required:
        - id
        - object
        - created
        - choices
        - model
      properties:
        id:
          type: string
        object:
          description: The object type, which is always `chat.completion.chunk`.
          const: chat.completion.chunk
        created:
          type: integer
        system_fingerprint:
          type: string
        model:
          type: string
          example: mistralai/Mixtral-8x7B-Instruct-v0.1
        choices:
          title: ChatCompletionChoices
          type: array
          items:
            type: object
            required:
              - index
              - delta
              - finish_reason
            properties:
              index:
                type: integer
              finish_reason:
                $ref: '#/components/schemas/FinishReason'
                nullable: true
              logprobs:
                type: number
                nullable: true
              top_logprobs:
                $ref: '#/components/schemas/TopLogprobs'
              seed:
                type: integer
                nullable: true
              delta:
                title: ChatCompletionChoiceDelta
                type: object
                required:
                  - role
                properties:
                  token_id:
                    type: integer
                  role:
                    type: string
                    enum:
                      - system
                      - user
                      - assistant
                      - function
                      - tool
                  content:
                    type: string
                    nullable: true
                  reasoning:
                    type: string
                    nullable: true
                  tool_calls:
                    type: array
                    items:
                      $ref: '#/components/schemas/ToolChoice'
                  function_call:
                    type: object
                    deprecated: true
                    nullable: true
                    properties:
                      arguments:
                        type: string
                      name:
                        type: string
                    required:
                      - arguments
                      - name
        usage:
          allOf:
            - $ref: '#/components/schemas/UsageData'
            - nullable: true
        warnings:
          type: array
          items:
            $ref: '#/components/schemas/InferenceWarning'
    AudioSpeechRequest:
      type: object
      required:
        - model
        - input
        - voice
      properties:
        model:
          description: >
            The name of the model to query.<br> <br> [See all of Together AI's
            chat
            models](https://docs.together.ai/docs/serverless-models#audio-models)
            The current supported tts models are: - cartesia/sonic -
            hexgrad/Kokoro-82M - canopylabs/orpheus-3b-0.1-ft
          example: canopylabs/orpheus-3b-0.1-ft
          anyOf:
            - type: string
              enum:
                - cartesia/sonic
                - hexgrad/Kokoro-82M
                - canopylabs/orpheus-3b-0.1-ft
            - type: string
        input:
          type: string
          description: Input text to generate the audio for
        voice:
          description: >
            The voice to use for generating the audio. The voices supported are
            different for each model. For eg - for canopylabs/orpheus-3b-0.1-ft,
            one of the voices supported is tara, for hexgrad/Kokoro-82M, one of
            the voices supported is af_alloy and for cartesia/sonic, one of the
            voices supported is "friendly sidekick". <br> <br> You can view the
            voices supported for each model using the /v1/voices endpoint
            sending the model name as the query parameter. [View all supported
            voices
            here](https://docs.together.ai/docs/text-to-speech#supported-voices).
            <br> <br> `hexgrad/Kokoro-82M` additionally supports voice mixing,
            where two or more voices are combined into a single blended voice by
            joining their names with `+` (e.g. `af_bella+af_heart`). Optional
            per-voice weights can be provided in parentheses (e.g.
            `af_bella(2)+af_heart(1)`). Other models require a single voice
            name.
          type: string
        response_format:
          type: string
          description: >-
            The format of audio output. Supported formats are mp3, wav, raw if
            streaming is false. If streaming is true, the only supported format
            is raw.
          default: wav
          enum:
            - mp3
            - wav
            - raw
        language:
          type: string
          description: >
            Language or locale of input text. Accepts ISO 639-1 language codes
            (e.g., `en`, `fr`, `es`, `zh`) as well as locale codes for
            region-specific variants. Locale codes must be lowercase (e.g.,
            `zh-hk` for Cantonese).
          default: en
          example: en
        response_encoding:
          type: string
          description: >-
            Audio encoding of response. Only applicable when response_format is
            raw or pcm. Cartesia models respect this parameter and support all
            values. Orpheus, Kokoro, and Minimax models always return pcm_s16le
            regardless of this setting.
          default: pcm_f32le
          enum:
            - pcm_f32le
            - pcm_s16le
            - pcm_mulaw
            - pcm_alaw
        sample_rate:
          type: integer
          default: 44100
          description: >-
            Sampling rate in Hz for the output audio. Cartesia and Minimax
            models respect this parameter. Orpheus and Kokoro models always
            output at 24000 Hz regardless of this setting.
        bit_rate:
          type: integer
          description: >-
            Bitrate of the MP3 audio output in bits per second. Only applicable
            when response_format is mp3. Higher values produce better audio
            quality at larger file sizes. Default is 128000. Currently supported
            on Cartesia models.
          default: 128000
          enum:
            - 32000
            - 64000
            - 96000
            - 128000
            - 192000
        stream:
          type: boolean
          default: false
          description: >-
            If true, output is streamed for several characters at a time instead
            of waiting for the full response. The stream terminates with `data:
            [DONE]`. If false, return the encoded audio as octet stream
        extra_params:
          type: object
          description: >-
            Additional model-specific parameters that fine-tune speech
            generation behavior.
          properties:
            pronunciation_dict:
              type: array
              items:
                type: string
              description: >-
                A list of pronunciation rules for specific characters or
                symbols. Each entry uses the format `"<source>/<replacement>"`
                (e.g., `["omg/oh my god"]`) to override how the model pronounces
                matching tokens.
              example:
                - omg/oh my god
    AudioTranscriptionRequest:
      type: object
      required:
        - file
      properties:
        file:
          oneOf:
            - $ref: '#/components/schemas/AudioFileBinary'
            - $ref: '#/components/schemas/AudioFileUrl'
          description: >-
            Audio file upload or public HTTP/HTTPS URL. Supported formats .wav,
            .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac.
        model:
          type: string
          description: Model to use for transcription
          default: openai/whisper-large-v3
          enum:
            - openai/whisper-large-v3
        language:
          type: string
          description: >-
            Optional ISO 639-1 language code. If `auto` is provided, language is
            auto-detected.
          default: en
          example: en
        prompt:
          type: string
          description: >-
            Optional text to bias decoding. Supported only on Whisper-family
            models (e.g. `openai/whisper-large-v3`). Other STT models (e.g.
            `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API
            compatibility but ignore it.
        response_format:
          type: string
          description: The format of the response
          default: json
          enum:
            - json
            - verbose_json
        temperature:
          type: number
          format: float
          description: Sampling temperature between 0.0 and 1.0
          default: 0
          minimum: 0
          maximum: 1
        timestamp_granularities:
          oneOf:
            - type: string
              enum:
                - segment
                - word
            - type: array
              items:
                type: string
                enum:
                  - segment
                  - word
              uniqueItems: true
              minItems: 1
              maxItems: 2
          description: >-
            Controls level of timestamp detail in verbose_json. Only used when
            response_format is verbose_json. Can be a single granularity or an
            array to get multiple levels.
          default: segment
          example:
            - word
            - segment
        diarize:
          type: boolean
          description: >
            Whether to enable speaker diarization. When enabled, you will get
            the speaker id for each word in the transcription. In the response,
            in the words array, you will get the speaker id for each word. In
            addition, we also return the speaker_segments array which contains
            the speaker id for each speaker segment along with the start and end
            time of the segment along with all the words in the segment. <br>
            <br> For eg - ... "speaker_segments": [
              "speaker_id": "SPEAKER_00",
              "start": 0,
              "end": 30.02,
              "words": [
                {
                  "id": 0,
                  "word": "Tijana",
                  "start": 0,
                  "end": 11.475,
                  "speaker_id": "SPEAKER_00"
                },
                ...
          default: false
        min_speakers:
          type: integer
          description: >-
            Minimum number of speakers expected in the audio. Used to improve
            diarization accuracy when the approximate number of speakers is
            known.
        max_speakers:
          type: integer
          description: >-
            Maximum number of speakers expected in the audio. Used to improve
            diarization accuracy when the approximate number of speakers is
            known.
    AudioTranscriptionResponse:
      oneOf:
        - $ref: '#/components/schemas/AudioTranscriptionJsonResponse'
        - $ref: '#/components/schemas/AudioTranscriptionVerboseJsonResponse'
    AudioTranscriptionJsonResponse:
      type: object
      required:
        - text
      properties:
        text:
          type: string
          description: The transcribed text
          example: Hello, world!
    AudioTranscriptionVerboseJsonResponse:
      type: object
      required:
        - language
        - duration
        - text
        - segments
      properties:
        language:
          type: string
          description: The language of the audio
          example: english
        duration:
          type: number
          format: float
          description: The duration of the audio in seconds
          example: 3.5
        text:
          type: string
          description: The transcribed text
          example: Hello, world!
        segments:
          type: array
          items:
            $ref: '#/components/schemas/AudioTranscriptionSegment'
          description: Array of transcription segments
        words:
          type: array
          items:
            $ref: '#/components/schemas/AudioTranscriptionWord'
          description: >-
            Array of transcription words (only when timestamp_granularities
            includes 'word')
        speaker_segments:
          type: array
          items:
            $ref: '#/components/schemas/AudioTranscriptionSpeakerSegment'
          description: >-
            Array of transcription speaker segments (only when diarize is
            enabled)
    AudioTranscriptionSegment:
      type: object
      required:
        - id
        - start
        - end
        - text
      properties:
        id:
          type: integer
          description: Unique identifier for the segment
          example: 0
        start:
          type: number
          format: float
          description: Start time of the segment in seconds
          example: 0
        end:
          type: number
          format: float
          description: End time of the segment in seconds
          example: 3.5
        text:
          type: string
          description: The text content of the segment
          example: Hello, world!
    AudioTranscriptionWord:
      type: object
      required:
        - word
        - start
        - end
      properties:
        word:
          type: string
          description: The word
          example: Hello
        start:
          type: number
          format: float
          description: Start time of the word in seconds
          example: 0
        end:
          type: number
          format: float
          description: End time of the word in seconds
          example: 0.5
        speaker_id:
          type: string
          description: The speaker id for the word (only when diarize is enabled)
          example: SPEAKER_00
    AudioTranscriptionSpeakerSegment:
      type: object
      required:
        - speaker_id
        - start
        - end
        - words
        - text
        - id
      properties:
        speaker_id:
          type: string
          description: The speaker identifier
          example: SPEAKER_00
        start:
          type: number
          format: float
          description: Start time of the speaker segment in seconds
          example: 0
        end:
          type: number
          format: float
          description: End time of the speaker segment in seconds
          example: 30.02
        words:
          type: array
          items:
            $ref: '#/components/schemas/AudioTranscriptionWord'
          description: Array of words spoken by this speaker in this segment
        text:
          type: string
          description: The full text spoken by this speaker in this segment
          example: Hello, how are you doing today?
        id:
          type: integer
          description: Unique identifier for the speaker segment
          example: 1
    AudioTranslationRequest:
      type: object
      required:
        - file
      properties:
        file:
          oneOf:
            - type: string
              format: binary
              description: Audio file to translate
            - type: string
              format: uri
              description: Public HTTP/HTTPS URL to audio file
          description: >-
            Audio file upload or public HTTP/HTTPS URL. Supported formats .wav,
            .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac.
        model:
          type: string
          description: Model to use for translation
          default: openai/whisper-large-v3
          enum:
            - openai/whisper-large-v3
        language:
          type: string
          description: >-
            Target output language. Optional ISO 639-1 language code. If
            omitted, language is set to English.
          default: en
          example: en
        prompt:
          type: string
          description: >-
            Optional text to bias decoding. Supported only on Whisper-family
            models (e.g. `openai/whisper-large-v3`). Other STT models (e.g.
            `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API
            compatibility but ignore it.
        response_format:
          type: string
          description: The format of the response
          default: json
          enum:
            - json
            - verbose_json
        temperature:
          type: number
          format: float
          description: Sampling temperature between 0.0 and 1.0
          default: 0
          minimum: 0
          maximum: 1
        timestamp_granularities:
          oneOf:
            - type: string
              enum:
                - segment
                - word
            - type: array
              items:
                type: string
                enum:
                  - segment
                  - word
              uniqueItems: true
              minItems: 1
              maxItems: 2
          description: >-
            Controls level of timestamp detail in verbose_json. Only used when
            response_format is verbose_json. Can be a single granularity or an
            array to get multiple levels.
          default: segment
          example:
            - word
            - segment
    AudioTranslationResponse:
      oneOf:
        - $ref: '#/components/schemas/AudioTranslationJsonResponse'
        - $ref: '#/components/schemas/AudioTranslationVerboseJsonResponse'
    AudioTranslationJsonResponse:
      type: object
      required:
        - text
      properties:
        text:
          type: string
          description: The translated text
          example: Hello, world!
    AudioTranslationVerboseJsonResponse:
      type: object
      required:
        - language
        - duration
        - text
        - segments
      properties:
        language:
          type: string
          description: The target language of the translation
          example: english
        duration:
          type: number
          format: float
          description: The duration of the audio in seconds
          example: 3.5
        text:
          type: string
          description: The translated text
          example: Hello, world!
        segments:
          type: array
          items:
            $ref: '#/components/schemas/AudioTranscriptionSegment'
          description: Array of translation segments
        words:
          type: array
          items:
            $ref: '#/components/schemas/AudioTranscriptionWord'
          description: >-
            Array of translation words (only when timestamp_granularities
            includes 'word')
    AudioSpeechStreamResponse:
      oneOf:
        - $ref: '#/components/schemas/AudioSpeechStreamEvent'
        - $ref: '#/components/schemas/StreamSentinel'
    AudioSpeechStreamEvent:
      type: object
      required:
        - data
      properties:
        data:
          $ref: '#/components/schemas/AudioSpeechStreamChunk'
    AudioSpeechStreamChunk:
      type: object
      required:
        - object
        - model
        - b64
      properties:
        object:
          description: The object type, which is always `audio.tts.chunk`.
          const: audio.tts.chunk
        model:
          type: string
          example: cartesia/sonic
        b64:
          type: string
          description: base64 encoded audio stream
    StreamSentinel:
      type: object
      required:
        - data
      properties:
        data:
          title: stream_signal
          type: string
          enum:
            - '[DONE]'
    ChatCompletionToken:
      type: object
      required:
        - id
        - text
        - logprob
        - special
      properties:
        id:
          type: integer
        text:
          type: string
        logprob:
          type: number
        special:
          type: boolean
    ChatCompletionChoice:
      type: object
      required:
        - index
        - delta
        - finish_reason
      properties:
        index:
          type: integer
        finish_reason:
          $ref: '#/components/schemas/FinishReason'
        logprobs:
          $ref: '#/components/schemas/LogprobsPart'
        delta:
          title: ChatCompletionChoiceDelta
          type: object
          required:
            - role
          properties:
            token_id:
              type: integer
            role:
              type: string
              enum:
                - system
                - user
                - assistant
                - function
                - tool
            content:
              type: string
              nullable: true
            tool_calls:
              type: array
              items:
                $ref: '#/components/schemas/ToolChoice'
            function_call:
              type: object
              deprecated: true
              nullable: true
              properties:
                arguments:
                  type: string
                name:
                  type: string
              required:
                - arguments
                - name
            reasoning:
              type: string
              nullable: true
    EmbeddingsRequest:
      type: object
      required:
        - model
        - input
      properties:
        model:
          type: string
          description: >
            The name of the embedding model to use.<br> <br> [See all of
            Together AI's embedding
            models](https://docs.together.ai/docs/serverless-models#embedding-models)
          example: togethercomputer/m2-bert-80M-8k-retrieval
          anyOf:
            - type: string
              enum:
                - WhereIsAI/UAE-Large-V1
                - BAAI/bge-large-en-v1.5
                - BAAI/bge-base-en-v1.5
                - togethercomputer/m2-bert-80M-8k-retrieval
            - type: string
        input:
          oneOf:
            - type: string
              description: A string providing the text for the model to embed.
              example: >-
                Our solar system orbits the Milky Way galaxy at about 515,000
                mph
            - type: array
              items:
                type: string
                description: A string providing the text for the model to embed.
                example: >-
                  Our solar system orbits the Milky Way galaxy at about 515,000
                  mph
          example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
    EmbeddingsResponse:
      type: object
      required:
        - object
        - model
        - data
      properties:
        object:
          description: The object type, which is always `list`.
          const: list
        model:
          type: string
        data:
          type: array
          items:
            type: object
            required:
              - index
              - object
              - embedding
            properties:
              object:
                description: The object type, which is always `embedding`.
                const: embedding
              embedding:
                type: array
                items:
                  type: number
              index:
                type: integer
    ModelInfoList:
      type: array
      items:
        $ref: '#/components/schemas/ModelInfo'
    ModelInfo:
      type: object
      required:
        - id
        - object
        - created
        - type
      properties:
        id:
          type: string
          example: Austism/chronos-hermes-13b
        object:
          description: The object type, which is always `model`.
          const: model
        created:
          type: integer
          example: 1692896905
        type:
          enum:
            - chat
            - language
            - code
            - image
            - embedding
            - moderation
            - rerank
          example: chat
        display_name:
          type: string
          example: Chronos Hermes (13B)
        organization:
          type: string
          example: Austism
        link:
          type: string
        license:
          type: string
          example: other
        context_length:
          type: integer
          example: 2048
        pricing:
          $ref: '#/components/schemas/Pricing'
    ModelUploadRequest:
      type: object
      required:
        - model_name
        - model_source
      properties:
        model_name:
          type: string
          description: The name to give to your uploaded model
          example: Qwen2.5-72B-Instruct
        model_source:
          type: string
          description: The source location of the model (Hugging Face repo or S3 path)
          example: unsloth/Qwen2.5-72B-Instruct
        model_type:
          type: string
          description: Whether the model is a full model or an adapter
          default: model
          enum:
            - model
            - adapter
          example: model
        hf_token:
          type: string
          description: Hugging Face token (if uploading from Hugging Face)
          example: hf_examplehuggingfacetoken
        description:
          type: string
          description: A description of your model
          example: Finetuned Qwen2.5-72B-Instruct by Unsloth
        base_model:
          type: string
          description: >-
            The base model to use for an adapter if setting it to run against a
            serverless pool.  Only used for model_type `adapter`.
          example: Qwen/Qwen2.5-72B-Instruct
        lora_model:
          type: string
          description: >-
            The lora pool to use for an adapter if setting it to run against,
            say, a dedicated pool.  Only used for model_type `adapter`.
          example: my_username/Qwen2.5-72B-Instruct-lora
    ModelUploadSuccessResponse:
      type: object
      required:
        - data
        - message
      properties:
        data:
          type: object
          required:
            - job_id
            - model_name
            - model_id
            - model_source
          properties:
            job_id:
              type: string
              example: job-a15dad11-8d8e-4007-97c5-a211304de284
            model_name:
              type: string
              example: necolinehubner/Qwen2.5-72B-Instruct
            model_id:
              type: string
              example: model-c0e32dfc-637e-47b2-bf4e-e9b2e58c9da7
            model_source:
              type: string
              example: huggingface
        message:
          type: string
          example: Processing model weights. Job created.
    ImageResponse:
      type: object
      properties:
        id:
          type: string
        model:
          type: string
        object:
          description: The object type, which is always `list`.
          const: list
        data:
          type: array
          items:
            oneOf:
              - $ref: '#/components/schemas/ImageResponseDataB64'
              - $ref: '#/components/schemas/ImageResponseDataUrl'
            discriminator:
              propertyName: type
      required:
        - id
        - model
        - object
        - data
    ImageResponseDataB64:
      type: object
      required:
        - index
        - b64_json
        - type
      properties:
        index:
          type: integer
        b64_json:
          type: string
        type:
          type: string
          enum:
            - b64_json
    ImageResponseDataUrl:
      type: object
      required:
        - index
        - url
        - type
      properties:
        index:
          type: integer
        url:
          type: string
        type:
          type: string
          enum:
            - url
    JobInfoSuccessResponse:
      type: object
      required:
        - type
        - job_id
        - status
        - status_updates
        - args
        - created_at
        - updated_at
      properties:
        type:
          type: string
          example: model_upload
        job_id:
          type: string
          example: job-a15dad11-8d8e-4007-97c5-a211304de284
        status:
          type: string
          enum:
            - Queued
            - Running
            - Complete
            - Failed
          example: Complete
        status_updates:
          type: array
          items:
            type: object
            required:
              - status
              - message
              - timestamp
            properties:
              status:
                type: string
                example: Complete
              message:
                type: string
                example: Job is Complete
              timestamp:
                type: string
                format: date-time
                example: '2025-03-11T22:36:12Z'
        args:
          type: object
          properties:
            description:
              type: string
              example: Finetuned Qwen2.5-72B-Instruct by Unsloth
            modelName:
              type: string
              example: necolinehubner/Qwen2.5-72B-Instruct
            modelSource:
              type: string
              example: unsloth/Qwen2.5-72B-Instruct
        created_at:
          type: string
          format: date-time
          example: '2025-03-11T22:05:43Z'
        updated_at:
          type: string
          format: date-time
          example: '2025-03-11T22:36:12Z'
    JobsInfoSuccessResponse:
      type: object
      required:
        - data
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/JobInfoSuccessResponse'
    Pricing:
      type: object
      required:
        - hourly
        - input
        - output
        - base
        - finetune
      properties:
        base:
          type: number
          example: 0
        finetune:
          type: number
          example: 0
        hourly:
          type: number
          example: 0
        input:
          type: number
          example: 0.3
        output:
          type: number
          example: 0.3
        cached_input:
          type: number
          example: 0.2
    ToolsPart:
      type: object
      properties:
        type:
          type: string
          example: tool_type
        function:
          type: object
          properties:
            description:
              type: string
              example: A description of the function.
            name:
              type: string
              example: function_name
            parameters:
              type: object
              additionalProperties: true
              description: A map of parameter names to their values.
    ToolChoice:
      type: object
      required:
        - id
        - type
        - function
        - index
      properties:
        index:
          type: number
        id:
          type: string
        type:
          type: string
          enum:
            - function
        function:
          type: object
          required:
            - name
            - arguments
          properties:
            name:
              type: string
              example: function_name
            arguments:
              type: string
    FileResponse:
      description: Structured information describing a file uploaded to Together.
      type: object
      required:
        - id
        - object
        - created_at
        - filename
        - bytes
        - purpose
        - FileType
        - Processed
      properties:
        id:
          description: ID of the file.
          type: string
        object:
          description: The object type, which is always `file`.
          const: file
        created_at:
          description: The timestamp when the file was created.
          type: integer
        filename:
          description: The name of the file as it was uploaded.
          type: string
          example: my_file.jsonl
        bytes:
          description: The number of bytes in the file.
          type: integer
        purpose:
          description: The purpose of the file as it was uploaded.
          $ref: '#/components/schemas/FilePurpose'
        Processed:
          description: >-
            Whether the file has been parsed and analyzed for correctness for
            fine-tuning.
          type: boolean
        FileType:
          description: The type of the file such as `jsonl`, `csv`, or `parquet`.
          $ref: '#/components/schemas/FileType'
    FileList:
      required:
        - data
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/FileResponse'
    FilePurpose:
      type: string
      description: The purpose of the file
      example: fine-tune
      enum:
        - fine-tune
        - eval
        - batch-api
    FileType:
      type: string
      description: The type of the file
      default: jsonl
      example: jsonl
      enum:
        - csv
        - jsonl
        - parquet
    FileDeleteResponse:
      type: object
      properties:
        id:
          type: string
        deleted:
          type: boolean
    FinetuneResponse:
      type: object
      required:
        - id
        - status
      properties:
        id:
          type: string
          format: uuid
        training_file:
          type: string
        validation_file:
          type: string
        model:
          type: string
        model_output_name:
          type: string
        model_output_path:
          type: string
        trainingfile_numlines:
          type: integer
        trainingfile_size:
          type: integer
        created_at:
          type: string
          format: date-time
        updated_at:
          type: string
          format: date-time
        started_at:
          type: string
          format: date-time
        n_epochs:
          type: integer
        n_checkpoints:
          type: integer
        n_evals:
          type: integer
        batch_size:
          oneOf:
            - type: integer
            - type: string
              enum:
                - max
          default: max
        learning_rate:
          type: number
        lr_scheduler:
          type: object
          $ref: '#/components/schemas/LRScheduler'
        warmup_ratio:
          type: number
        max_grad_norm:
          type: number
          format: float
        weight_decay:
          type: number
          format: float
        eval_steps:
          type: integer
        train_on_inputs:
          oneOf:
            - type: boolean
            - type: string
              enum:
                - auto
          default: auto
        training_method:
          type: object
          oneOf:
            - $ref: '#/components/schemas/TrainingMethodSFT'
            - $ref: '#/components/schemas/TrainingMethodDPO'
        training_type:
          type: object
          oneOf:
            - $ref: '#/components/schemas/FullTrainingType'
            - $ref: '#/components/schemas/LoRATrainingType'
        multimodal_params:
          $ref: '#/components/schemas/MultimodalParams'
        status:
          $ref: '#/components/schemas/FinetuneJobStatus'
        job_id:
          type: string
        events:
          type: array
          items:
            $ref: '#/components/schemas/FineTuneEvent'
        token_count:
          type: integer
        param_count:
          type: integer
        total_price:
          type: integer
        epochs_completed:
          type: integer
        queue_depth:
          type: integer
        wandb_project_name:
          type: string
        wandb_url:
          type: string
        from_checkpoint:
          type: string
        from_hf_model:
          type: string
        hf_model_revision:
          type: string
        progress:
          $ref: '#/components/schemas/FineTuneProgress'
    FinetuneResponseTruncated:
      type: object
      description: >-
        A truncated version of the fine-tune response, used for POST
        /fine-tunes, GET /fine-tunes and POST /fine-tunes/{id}/cancel endpoints
      required:
        - id
        - status
        - created_at
        - updated_at
      example:
        id: ft-01234567890123456789
        status: completed
        created_at: '2023-05-17T17:35:45.123Z'
        updated_at: '2023-05-17T18:46:23.456Z'
        user_id: user_01234567890123456789
        owner_address: user@example.com
        total_price: 1500
        token_count: 850000
        events: []
        model: meta-llama/Llama-2-7b-hf
        model_output_name: mynamespace/meta-llama/Llama-2-7b-hf-32162631
        n_epochs: 3
        training_file: file-01234567890123456789
        wandb_project_name: my-finetune-project
      properties:
        id:
          type: string
          description: Unique identifier for the fine-tune job
        status:
          $ref: '#/components/schemas/FinetuneJobStatus'
        created_at:
          type: string
          format: date-time
          description: Creation timestamp of the fine-tune job
        updated_at:
          type: string
          format: date-time
          description: Last update timestamp of the fine-tune job
        started_at:
          type: string
          format: date-time
          description: Start timestamp of the current stage of the fine-tune job
        user_id:
          type: string
          description: Identifier for the user who created the job
        owner_address:
          type: string
          description: Owner address information
        total_price:
          type: integer
          description: Total price for the fine-tuning job
        token_count:
          type: integer
          description: Count of tokens processed
        events:
          type: array
          items:
            $ref: '#/components/schemas/FineTuneEvent'
          description: Events related to this fine-tune job
        training_file:
          type: string
          description: File-ID of the training file
        validation_file:
          type: string
          description: File-ID of the validation file
        packing:
          type: boolean
          description: Whether sequence packing is being used for training.
        max_seq_length:
          type: integer
          description: >-
            Maximum sequence length to use for training. If not specified, the
            maximum allowed for the model and training method will be used.
        model:
          type: string
          description: Base model used for fine-tuning
        model_output_name:
          type: string
        suffix:
          type: string
          description: Suffix added to the fine-tuned model name
        n_epochs:
          type: integer
          description: Number of training epochs
        n_evals:
          type: integer
          description: Number of evaluations during training
        n_checkpoints:
          type: integer
          description: Number of checkpoints saved during training
        batch_size:
          type: integer
          description: Batch size used for training
        training_type:
          oneOf:
            - $ref: '#/components/schemas/FullTrainingType'
            - $ref: '#/components/schemas/LoRATrainingType'
          description: Type of training used (full or LoRA)
        training_method:
          oneOf:
            - $ref: '#/components/schemas/TrainingMethodSFT'
            - $ref: '#/components/schemas/TrainingMethodDPO'
          description: Method of training used
        learning_rate:
          type: number
          format: float
          description: Learning rate used for training
        lr_scheduler:
          $ref: '#/components/schemas/LRScheduler'
          description: Learning rate scheduler configuration
        warmup_ratio:
          type: number
          format: float
          description: Ratio of warmup steps
        max_grad_norm:
          type: number
          format: float
          description: Maximum gradient norm for clipping
        weight_decay:
          type: number
          format: float
          description: Weight decay value used
        random_seed:
          type: integer
          nullable: true
          description: >
            Random seed used for training. Integer when set; null if not stored
            (e.g. legacy jobs) or no explicit seed was recorded.
        wandb_project_name:
          type: string
          description: Weights & Biases project name
        wandb_name:
          type: string
          description: Weights & Biases run name
        from_checkpoint:
          type: string
          description: Checkpoint used to continue training
        from_hf_model:
          type: string
          description: Hugging Face Hub repo to start training from
        hf_model_revision:
          type: string
          description: The revision of the Hugging Face Hub model to continue training from
        progress:
          $ref: '#/components/schemas/FineTuneProgress'
          description: Progress information for the fine-tuning job
    FinetuneDeleteResponse:
      type: object
      properties:
        message:
          type: string
          description: Message indicating the result of the deletion
    FineTuneModelLimits:
      type: object
      description: Model limits for fine-tuning.
      required:
        - model_name
        - max_num_epochs
        - max_num_evals
        - max_learning_rate
        - min_learning_rate
        - supports_vision
        - supports_tools
        - supports_reasoning
        - merge_output_lora
      properties:
        model_name:
          type: string
          description: The name of the model.
        full_training:
          type: object
          description: Limits for full training.
          required:
            - max_batch_size
            - max_batch_size_dpo
            - min_batch_size
          properties:
            max_batch_size:
              type: integer
              description: Maximum batch size for SFT full training.
            max_batch_size_dpo:
              type: integer
              description: Maximum batch size for DPO full training.
            min_batch_size:
              type: integer
              description: Minimum batch size for full training.
        lora_training:
          type: object
          description: Limits for LoRA training.
          required:
            - max_batch_size
            - max_batch_size_dpo
            - min_batch_size
            - max_rank
            - target_modules
          properties:
            max_batch_size:
              type: integer
              description: Maximum batch size for SFT LoRA training.
            max_batch_size_dpo:
              type: integer
              description: Maximum batch size for DPO LoRA training.
            min_batch_size:
              type: integer
              description: Minimum batch size for LoRA training.
            max_rank:
              type: integer
              description: Maximum LoRA rank.
            target_modules:
              type: array
              items:
                type: string
              description: Available target modules for LoRA.
        max_num_epochs:
          type: integer
          description: Maximum number of training epochs.
        max_num_evals:
          type: integer
          description: Maximum number of evaluations.
        max_learning_rate:
          type: number
          description: Maximum learning rate.
        min_learning_rate:
          type: number
          description: Minimum learning rate.
        supports_vision:
          type: boolean
          description: Whether the model supports vision/multimodal inputs.
        supports_tools:
          type: boolean
          description: Whether the model supports tool/function calling.
        supports_reasoning:
          type: boolean
          description: Whether the model supports reasoning.
        merge_output_lora:
          type: boolean
          description: Whether to merge the output LoRA.
    FinetuneJobStatus:
      type: string
      enum:
        - pending
        - queued
        - running
        - compressing
        - uploading
        - cancel_requested
        - cancelled
        - error
        - completed
    FinetuneEventLevels:
      type: string
      enum:
        - null
        - info
        - warning
        - error
        - legacy_info
        - legacy_iwarning
        - legacy_ierror
    FinetuneEventType:
      type: string
      enum:
        - job_pending
        - job_start
        - job_stopped
        - model_downloading
        - model_download_complete
        - training_data_downloading
        - training_data_download_complete
        - validation_data_downloading
        - validation_data_download_complete
        - wandb_init
        - training_start
        - checkpoint_save
        - billing_limit
        - epoch_complete
        - training_complete
        - model_compressing
        - model_compression_complete
        - model_uploading
        - model_upload_complete
        - job_complete
        - job_error
        - cancel_requested
        - job_restarted
        - refund
        - warning
    FinetuneTruncatedList:
      type: object
      required:
        - data
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/FinetuneResponseTruncated'
    FinetuneListEvents:
      type: object
      required:
        - data
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/FineTuneEvent'
    FineTuneEvent:
      type: object
      required:
        - object
        - created_at
        - message
        - type
        - param_count
        - token_count
        - total_steps
        - wandb_url
        - step
        - checkpoint_path
        - model_path
        - training_offset
        - hash
      properties:
        object:
          description: The object type, which is always `fine-tune-event`.
          const: fine-tune-event
        created_at:
          type: string
        level:
          anyOf:
            - $ref: '#/components/schemas/FinetuneEventLevels'
        message:
          type: string
        type:
          $ref: '#/components/schemas/FinetuneEventType'
        param_count:
          type: integer
        token_count:
          type: integer
        total_steps:
          type: integer
        wandb_url:
          type: string
        step:
          type: integer
        checkpoint_path:
          type: string
        model_path:
          type: string
        training_offset:
          type: integer
        hash:
          type: string
    FineTuneProgress:
      type: object
      description: Progress information for a fine-tuning job
      required:
        - estimate_available
        - seconds_remaining
      properties:
        estimate_available:
          type: boolean
          description: Whether time estimate is available
        seconds_remaining:
          type: integer
          description: >-
            Estimated time remaining in seconds for the fine-tuning job to next
            state
    FinetuneListCheckpoints:
      type: object
      required:
        - data
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/FineTuneCheckpoint'
    FineTuneCheckpoint:
      type: object
      required:
        - step
        - path
        - created_at
        - checkpoint_type
      properties:
        step:
          type: integer
        created_at:
          type: string
        path:
          type: string
        checkpoint_type:
          type: string
    FullTrainingType:
      type: object
      properties:
        type:
          type: string
          enum:
            - Full
      required:
        - type
    LoRATrainingType:
      type: object
      properties:
        type:
          type: string
          enum:
            - Lora
        lora_r:
          type: integer
        lora_alpha:
          type: integer
        lora_dropout:
          type: number
          format: float
          default: 0
        lora_trainable_modules:
          type: string
          default: all-linear
      required:
        - type
        - lora_r
        - lora_alpha
    TrainingMethodSFT:
      type: object
      properties:
        method:
          type: string
          enum:
            - sft
        train_on_inputs:
          oneOf:
            - type: boolean
            - type: string
              enum:
                - auto
          type: boolean
          default: auto
          description: >-
            Whether to mask the user messages in conversational data or prompts
            in instruction data.
      required:
        - method
        - train_on_inputs
    TrainingMethodDPO:
      type: object
      properties:
        method:
          type: string
          enum:
            - dpo
        dpo_beta:
          type: number
          format: float
          default: 0.1
        rpo_alpha:
          type: number
          format: float
          default: 0
        dpo_normalize_logratios_by_length:
          type: boolean
          default: false
        dpo_reference_free:
          type: boolean
          default: false
        simpo_gamma:
          type: number
          format: float
          default: 0
      required:
        - method
    MultimodalParams:
      type: object
      properties:
        train_vision:
          type: boolean
          description: >-
            Whether to train the vision encoder of the model. Only available for
            multimodal models.
    LRScheduler:
      type: object
      properties:
        lr_scheduler_type:
          type: string
          enum:
            - linear
            - cosine
        lr_scheduler_args:
          oneOf:
            - $ref: '#/components/schemas/LinearLRSchedulerArgs'
            - $ref: '#/components/schemas/CosineLRSchedulerArgs'
      required:
        - lr_scheduler_type
    CosineLRSchedulerArgs:
      type: object
      properties:
        min_lr_ratio:
          type: number
          format: float
          default: 0
          description: The ratio of the final learning rate to the peak learning rate
        num_cycles:
          type: number
          format: float
          default: 0.5
          description: Number or fraction of cycles for the cosine learning rate scheduler
      required:
        - min_lr_ratio
        - num_cycles
    LinearLRSchedulerArgs:
      type: object
      properties:
        min_lr_ratio:
          type: number
          format: float
          default: 0
          description: The ratio of the final learning rate to the peak learning rate
    Autoscaling:
      type: object
      description: Configuration for automatic scaling of replicas based on demand.
      required:
        - min_replicas
        - max_replicas
      properties:
        min_replicas:
          type: integer
          format: int32
          description: >-
            The minimum number of replicas to maintain, even when there is no
            load
          examples:
            - 2
        max_replicas:
          type: integer
          format: int32
          description: The maximum number of replicas to scale up to under load
          examples:
            - 5
    HardwareSpec:
      type: object
      description: Detailed specifications of a hardware configuration
      required:
        - gpu_type
        - gpu_link
        - gpu_memory
        - gpu_count
      properties:
        gpu_type:
          type: string
          description: The type/model of GPU
          examples:
            - a100-80gb
        gpu_link:
          type: string
          description: The GPU interconnect technology
          examples:
            - sxm
        gpu_memory:
          type: number
          format: float
          description: Amount of GPU memory in GB
          examples:
            - 80
        gpu_count:
          type: integer
          format: int32
          description: Number of GPUs in this configuration
          examples:
            - 2
    EndpointPricing:
      type: object
      description: Pricing details for using an endpoint
      required:
        - cents_per_minute
      properties:
        cents_per_minute:
          type: number
          format: float
          description: Cost per minute of endpoint uptime in cents
          examples:
            - 5.42
    HardwareAvailability:
      type: object
      description: Indicates the current availability status of a hardware configuration
      required:
        - status
      properties:
        status:
          type: string
          description: The availability status of the hardware configuration
          enum:
            - available
            - unavailable
            - insufficient
    HardwareWithStatus:
      type: object
      description: Hardware configuration details with optional availability status
      required:
        - object
        - id
        - pricing
        - specs
        - updated_at
      properties:
        object:
          description: The object type, which is always `hardware`.
          const: hardware
        id:
          type: string
          description: Unique identifier for the hardware configuration
          examples:
            - 2x_nvidia_a100_80gb_sxm
        pricing:
          $ref: '#/components/schemas/EndpointPricing'
        specs:
          $ref: '#/components/schemas/HardwareSpec'
        availability:
          $ref: '#/components/schemas/HardwareAvailability'
        updated_at:
          type: string
          format: date-time
          description: Timestamp of when the hardware status was last updated
    CreateEndpointRequest:
      type: object
      required:
        - model
        - hardware
        - autoscaling
      properties:
        display_name:
          type: string
          description: A human-readable name for the endpoint
          example: My Llama3 70b endpoint
        model:
          type: string
          description: The model to deploy on this endpoint
          example: deepseek-ai/DeepSeek-R1
        hardware:
          type: string
          description: The hardware configuration to use for this endpoint
          example: 1x_nvidia_a100_80gb_sxm
        autoscaling:
          $ref: '#/components/schemas/Autoscaling'
          description: Configuration for automatic scaling of the endpoint
        disable_prompt_cache:
          deprecated: true
          type: boolean
          description: This parameter is deprecated and no longer has any effect.
          default: false
        disable_speculative_decoding:
          type: boolean
          description: Whether to disable speculative decoding for this endpoint
          default: false
        state:
          type: string
          description: The desired state of the endpoint
          enum:
            - STARTED
            - STOPPED
          default: STARTED
          example: STARTED
        inactive_timeout:
          type: integer
          description: >-
            The number of minutes of inactivity after which the endpoint will be
            automatically stopped. Set to null, omit or set to 0 to disable
            automatic timeout.
          nullable: true
          example: 60
        availability_zone:
          type: string
          description: >-
            Create the endpoint in a specified availability zone (e.g.,
            us-central-4b)
    DedicatedEndpoint:
      type: object
      description: Details about a dedicated endpoint deployment
      required:
        - object
        - id
        - name
        - display_name
        - model
        - hardware
        - type
        - owner
        - state
        - autoscaling
        - created_at
      properties:
        object:
          description: The object type, which is always `endpoint`.
          const: endpoint
        id:
          type: string
          description: Unique identifier for the endpoint
          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
        name:
          type: string
          description: System name for the endpoint
          example: devuser/deepseek-ai/DeepSeek-R1-a32b82a1
        display_name:
          type: string
          description: Human-readable name for the endpoint
          example: My DeepSeek R1 endpoint
        model:
          type: string
          description: The model deployed on this endpoint
          example: deepseek-ai/DeepSeek-R1
        hardware:
          type: string
          description: The hardware configuration used for this endpoint
          example: 8x_nvidia_h200_140gb_sxm
        type:
          type: string
          enum:
            - dedicated
          description: The type of endpoint
          example: dedicated
        owner:
          type: string
          description: The owner of this endpoint
          example: devuser
        state:
          type: string
          enum:
            - PENDING
            - STARTING
            - STARTED
            - STOPPING
            - STOPPED
            - ERROR
          description: Current state of the endpoint
          example: STARTED
        autoscaling:
          $ref: '#/components/schemas/Autoscaling'
          description: Configuration for automatic scaling of the endpoint
        created_at:
          type: string
          format: date-time
          description: Timestamp when the endpoint was created
          example: 2025-02-04T10:43:55.405Z
    ListEndpoint:
      type: object
      description: Details about an endpoint when listed via the list endpoint
      required:
        - id
        - object
        - name
        - model
        - type
        - owner
        - state
        - created_at
      properties:
        object:
          description: The object type, which is always `endpoint`.
          const: endpoint
        id:
          type: string
          description: Unique identifier for the endpoint
          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
        name:
          type: string
          description: System name for the endpoint
          example: allenai/OLMo-7B
        model:
          type: string
          description: The model deployed on this endpoint
          example: allenai/OLMo-7B
        type:
          type: string
          enum:
            - serverless
            - dedicated
          description: The type of endpoint
          example: serverless
        owner:
          type: string
          description: The owner of this endpoint
          example: together
        state:
          type: string
          enum:
            - PENDING
            - STARTING
            - STARTED
            - STOPPING
            - STOPPED
            - ERROR
          description: Current state of the endpoint
          example: STARTED
        created_at:
          type: string
          format: date-time
          description: Timestamp when the endpoint was created
          example: 2024-02-28T21:34:35.444Z
    DisplayorExecuteOutput:
      properties:
        data:
          properties:
            application/geo+json:
              type: object
            application/javascript:
              type: string
            application/json:
              type: object
            application/pdf:
              format: byte
              type: string
            application/vnd.vega.v5+json:
              type: object
            application/vnd.vegalite.v4+json:
              type: object
            image/gif:
              format: byte
              type: string
            image/jpeg:
              format: byte
              type: string
            image/png:
              format: byte
              type: string
            image/svg+xml:
              type: string
            text/html:
              type: string
            text/latex:
              type: string
            text/markdown:
              type: string
            text/plain:
              type: string
          type: object
        type:
          enum:
            - display_data
            - execute_result
          type: string
      required:
        - type
        - data
      title: DisplayorExecuteOutput
    Error:
      oneOf:
        - type: string
        - additionalProperties: true
          type: object
      title: Error
    ErrorOutput:
      title: ErrorOutput
      description: >-
        Errors and exceptions that occurred. If this output type is present,
        your code did not execute successfully.
      properties:
        data:
          type: string
        type:
          enum:
            - error
          type: string
      required:
        - type
        - data
    ExecuteRequest:
      title: ExecuteRequest
      required:
        - language
        - code
      properties:
        code:
          description: Code snippet to execute.
          example: print('Hello, world!')
          type: string
        files:
          description: >-
            Files to upload to the session. If present, files will be uploaded
            before executing the given code.
          items:
            properties:
              content:
                type: string
              encoding:
                description: >-
                  Encoding of the file content. Use `string` for text files such
                  as code, and `base64` for binary files, such as images.
                enum:
                  - string
                  - base64
                type: string
              name:
                type: string
            required:
              - name
              - encoding
              - content
            type: object
          type: array
        language:
          default: python
          description: >-
            Programming language for the code to execute. Currently only
            supports Python, but more will be added.
          enum:
            - python
        session_id:
          description: >-
            Identifier of the current session. Used to make follow-up calls.
            Requests will return an error if the session does not belong to the
            caller or has expired.
          example: ses_abcDEF123
          nullable: false
          type: string
    ExecuteResponse:
      title: ExecuteResponse
      type: object
      description: >-
        The result of the execution. If successful, `data` contains the result
        and `errors` will be null. If unsuccessful, `data` will be null and
        `errors` will contain the errors.
      oneOf:
        - title: SuccessfulExecution
          type: object
          required:
            - data
            - errors
          properties:
            errors:
              type: 'null'
            data:
              type: object
              nullable: false
              required:
                - session_id
                - outputs
              properties:
                outputs:
                  type: array
                  items:
                    discriminator:
                      propertyName: type
                    oneOf:
                      - title: StreamOutput
                        description: Outputs that were printed to stdout or stderr
                        type: object
                        required:
                          - type
                          - data
                        properties:
                          type:
                            enum:
                              - stdout
                              - stderr
                            type: string
                          data:
                            type: string
                      - description: >-
                          Errors and exceptions that occurred. If this output
                          type is present, your code did not execute
                          successfully.
                        properties:
                          data:
                            type: string
                          type:
                            enum:
                              - error
                            type: string
                        required:
                          - type
                          - data
                        title: ErrorOutput
                      - properties:
                          data:
                            properties:
                              application/geo+json:
                                type: object
                                additionalProperties: true
                              application/javascript:
                                type: string
                              application/json:
                                type: object
                                additionalProperties: true
                              application/pdf:
                                format: byte
                                type: string
                              application/vnd.vega.v5+json:
                                type: object
                                additionalProperties: true
                              application/vnd.vegalite.v4+json:
                                type: object
                                additionalProperties: true
                              image/gif:
                                format: byte
                                type: string
                              image/jpeg:
                                format: byte
                                type: string
                              image/png:
                                format: byte
                                type: string
                              image/svg+xml:
                                type: string
                              text/html:
                                type: string
                              text/latex:
                                type: string
                              text/markdown:
                                type: string
                              text/plain:
                                type: string
                            type: object
                          type:
                            enum:
                              - display_data
                              - execute_result
                            type: string
                        required:
                          - type
                          - data
                        title: DisplayorExecuteOutput
                    title: InterpreterOutput
                session_id:
                  type: string
                  description: >-
                    Identifier of the current session. Used to make follow-up
                    calls.
                  example: ses_abcDEF123
                  nullable: false
                status:
                  type: string
                  enum:
                    - success
                  description: Status of the execution. Currently only supports success.
        - title: FailedExecution
          type: object
          required:
            - data
            - errors
          properties:
            data:
              type: 'null'
            errors:
              type: array
              items:
                title: Error
                oneOf:
                  - type: string
                  - type: object
                    additionalProperties: true
    InterpreterOutput:
      discriminator:
        propertyName: type
      oneOf:
        - description: Outputs that were printed to stdout or stderr
          properties:
            data:
              type: string
            type:
              enum:
                - stdout
                - stderr
              type: string
          required:
            - type
            - data
          title: StreamOutput
        - description: >-
            Errors and exceptions that occurred. If this output type is present,
            your code did not execute successfully.
          properties:
            data:
              type: string
            type:
              enum:
                - error
              type: string
          required:
            - type
            - data
          title: ErrorOutput
        - properties:
            data:
              properties:
                application/geo+json:
                  type: object
                application/javascript:
                  type: string
                application/json:
                  type: object
                application/pdf:
                  format: byte
                  type: string
                application/vnd.vega.v5+json:
                  type: object
                application/vnd.vegalite.v4+json:
                  type: object
                image/gif:
                  format: byte
                  type: string
                image/jpeg:
                  format: byte
                  type: string
                image/png:
                  format: byte
                  type: string
                image/svg+xml:
                  type: string
                text/html:
                  type: string
                text/latex:
                  type: string
                text/markdown:
                  type: string
                text/plain:
                  type: string
              type: object
            type:
              enum:
                - display_data
                - execute_result
              type: string
          required:
            - type
            - data
          title: DisplayorExecuteOutput
      title: InterpreterOutput
    Response:
      properties:
        errors:
          items:
            oneOf:
              - type: string
              - additionalProperties: true
                type: object
            title: Error
          type: array
      title: Response
      type: object
    SessionListResponse:
      allOf:
        - properties:
            errors:
              items:
                oneOf:
                  - type: string
                  - additionalProperties: true
                    type: object
                title: Error
              type: array
          title: Response
          type: object
        - properties:
            data:
              properties:
                sessions:
                  items:
                    properties:
                      execute_count:
                        type: integer
                      expires_at:
                        format: date-time
                        type: string
                      id:
                        description: Session Identifier. Used to make follow-up calls.
                        example: ses_abcDEF123
                        type: string
                      last_execute_at:
                        format: date-time
                        type: string
                      started_at:
                        format: date-time
                        type: string
                    required:
                      - execute_count
                      - expires_at
                      - id
                      - last_execute_at
                      - started_at
                    type: object
                  type: array
              required:
                - sessions
          type: object
      title: SessionListResponse
      type: object
    StreamOutput:
      description: Outputs that were printed to stdout or stderr
      properties:
        data:
          type: string
        type:
          enum:
            - stdout
            - stderr
          type: string
      required:
        - type
        - data
      title: StreamOutput
    CreateBatchRequest:
      type: object
      required:
        - endpoint
        - input_file_id
      properties:
        endpoint:
          type: string
          description: The endpoint to use for batch processing
          example: /v1/chat/completions
        input_file_id:
          type: string
          description: ID of the uploaded input file containing batch requests
          example: file-abc123def456ghi789
        completion_window:
          type: string
          description: Time window for batch completion (optional)
          example: 24h
        priority:
          type: integer
          description: Priority for batch processing (optional)
          example: 1
        model_id:
          type: string
          description: Model to use for processing batch requests
          example: Qwen/Qwen3.5-9B
    BatchErrorResponse:
      type: object
      properties:
        error:
          type: string
    BatchJobWithWarning:
      type: object
      properties:
        job:
          $ref: '#/components/schemas/BatchJob'
        warning:
          type: string
    BatchJob:
      type: object
      properties:
        id:
          type: string
          format: uuid
          example: 01234567-8901-2345-6789-012345678901
        user_id:
          type: string
          example: user_789xyz012
        input_file_id:
          type: string
          example: file-input123abc456def
        file_size_bytes:
          type: integer
          format: int64
          example: 1048576
          description: Size of input file in bytes
        status:
          $ref: '#/components/schemas/BatchJobStatus'
        job_deadline:
          type: string
          format: date-time
          example: '2024-01-15T15:30:00Z'
        created_at:
          type: string
          format: date-time
          example: '2024-01-15T14:30:00Z'
        endpoint:
          type: string
          example: /v1/chat/completions
        progress:
          type: number
          format: float64
          example: 75
          description: Completion progress (0.0 to 100)
        model_id:
          type: string
          example: Qwen/Qwen3.5-9B
          description: Model used for processing requests
        output_file_id:
          type: string
          example: file-output789xyz012ghi
        error_file_id:
          type: string
          example: file-errors456def789jkl
        error:
          type: string
        completed_at:
          type: string
          format: date-time
          example: '2024-01-15T15:45:30Z'
    BatchJobStatus:
      type: string
      enum:
        - VALIDATING
        - IN_PROGRESS
        - COMPLETED
        - FAILED
        - EXPIRED
        - CANCELLED
      example: IN_PROGRESS
      description: Current status of the batch job
    EvaluationTypedRequest:
      type: object
      required:
        - type
        - parameters
      properties:
        type:
          type: string
          enum:
            - classify
            - score
            - compare
          description: The type of evaluation to perform
          example: classify
        parameters:
          oneOf:
            - $ref: '#/components/schemas/EvaluationClassifyParameters'
            - $ref: '#/components/schemas/EvaluationScoreParameters'
            - $ref: '#/components/schemas/EvaluationCompareParameters'
          description: Type-specific parameters for the evaluation
    EvaluationClassifyParameters:
      type: object
      required:
        - judge
        - labels
        - pass_labels
        - input_data_file_path
      properties:
        judge:
          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
        labels:
          type: array
          items:
            type: string
          minItems: 2
          description: List of possible classification labels
          example:
            - 'yes'
            - 'no'
        pass_labels:
          type: array
          items:
            type: string
          minItems: 1
          description: List of labels that are considered passing
          example:
            - 'yes'
        model_to_evaluate:
          $ref: '#/components/schemas/EvaluationModelOrString'
        input_data_file_path:
          type: string
          description: Data file ID
          example: file-1234-aefd
    EvaluationScoreParameters:
      type: object
      required:
        - judge
        - min_score
        - max_score
        - pass_threshold
        - input_data_file_path
      properties:
        judge:
          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
        min_score:
          type: number
          format: float
          example: 0
          description: Minimum possible score
        max_score:
          type: number
          format: float
          example: 10
          description: Maximum possible score
        pass_threshold:
          type: number
          format: float
          example: 7
          description: Score threshold for passing
        model_to_evaluate:
          $ref: '#/components/schemas/EvaluationModelOrString'
        input_data_file_path:
          type: string
          example: file-01234567890123456789
          description: Data file ID
    EvaluationCompareParameters:
      type: object
      required:
        - judge
        - input_data_file_path
      properties:
        judge:
          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
        model_a:
          $ref: '#/components/schemas/EvaluationModelOrString'
        model_b:
          $ref: '#/components/schemas/EvaluationModelOrString'
        input_data_file_path:
          type: string
          description: Data file name
    EvaluationJudgeModelConfig:
      type: object
      required:
        - model
        - system_template
        - model_source
      properties:
        model:
          type: string
          description: Name of the judge model
          example: Qwen/Qwen3.5-9B
        system_template:
          type: string
          description: System prompt template for the judge
          example: Imagine you are a helpful assistant
        model_source:
          type: string
          description: Source of the judge model.
          enum:
            - serverless
            - dedicated
            - external
        external_api_token:
          type: string
          description: Bearer/API token for external judge models.
        external_base_url:
          type: string
          description: >-
            Base URL for external judge models. Must be OpenAI-compatible base
            URL.
        num_workers:
          type: integer
          minimum: 1
          description: >-
            Number of concurrent workers for inference requests. Overrides the
            default concurrency for this model. Useful for tuning throughput
            when using proxy endpoints (e.g. OpenRouter) or rate-limited
            external APIs.
          example: 5
        max_tokens:
          type: integer
          minimum: 1
          description: >-
            Maximum number of tokens the judge model can generate. Defaults to
            32768. Increase for reasoning models (e.g. Gemini, o-series) that
            consume output token budget for chain-of-thought.
          example: 8192
        temperature:
          type: number
          format: float
          minimum: 0
          maximum: 2
          description: Sampling temperature for the judge model. Defaults to 0.05.
          example: 0
    EvaluationModelOrString:
      oneOf:
        - type: string
          description: Field name in the input data
        - $ref: '#/components/schemas/EvaluationModelRequest'
    EvaluationModelRequest:
      type: object
      required:
        - model
        - max_tokens
        - temperature
        - system_template
        - input_template
        - model_source
      properties:
        model:
          type: string
          description: Name of the model to evaluate
          example: Qwen/Qwen3.5-9B
        max_tokens:
          type: integer
          minimum: 1
          description: Maximum number of tokens to generate
          example: 512
        temperature:
          type: number
          format: float
          minimum: 0
          maximum: 2
          description: Sampling temperature
          example: 0.7
        system_template:
          type: string
          description: System prompt template
          example: Imagine you are helpful assistant
        input_template:
          type: string
          description: Input prompt template
          example: Please classify {{prompt}} based on the labels below
        model_source:
          type: string
          description: Source of the model.
          enum:
            - serverless
            - dedicated
            - external
        external_api_token:
          type: string
          description: Bearer/API token for external models.
        external_base_url:
          type: string
          description: Base URL for external models. Must be OpenAI-compatible base URL
        num_workers:
          type: integer
          minimum: 1
          description: >-
            Number of concurrent workers for inference requests. Overrides the
            default concurrency for this model. Useful for tuning throughput
            when using proxy endpoints (e.g. OpenRouter) or rate-limited
            external APIs.
          example: 5
    EvaluationResponse:
      type: object
      properties:
        workflow_id:
          type: string
          description: The ID of the created evaluation job
          example: eval-1234-1244513
        status:
          type: string
          enum:
            - pending
          description: Initial status of the job
    EvaluationJob:
      type: object
      properties:
        workflow_id:
          type: string
          description: The evaluation job ID
          example: eval-1234aedf
        type:
          type: string
          enum:
            - classify
            - score
            - compare
          description: The type of evaluation
          example: classify
        owner_id:
          type: string
          description: ID of the job owner (admin only)
        status:
          type: string
          enum:
            - pending
            - queued
            - running
            - completed
            - error
            - user_error
          description: Current status of the job
          example: completed
        status_updates:
          type: array
          items:
            $ref: '#/components/schemas/EvaluationJobStatusUpdate'
          description: History of status updates (admin only)
        parameters:
          type: object
          description: The parameters used for this evaluation
          additionalProperties: true
        created_at:
          type: string
          format: date-time
          description: When the job was created
          example: '2025-07-23T17:10:04.837888Z'
        updated_at:
          type: string
          format: date-time
          description: When the job was last updated
          example: '2025-07-23T17:10:04.837888Z'
        results:
          oneOf:
            - $ref: '#/components/schemas/EvaluationClassifyResults'
            - $ref: '#/components/schemas/EvaluationScoreResults'
            - $ref: '#/components/schemas/EvaluationCompareResults'
            - type: object
              properties:
                error:
                  type: string
          nullable: true
          description: Results of the evaluation (when completed)
    EvaluationJobStatusUpdate:
      type: object
      properties:
        status:
          type: string
          description: The status at this update
          example: pending
        message:
          type: string
          description: Additional message for this update
          example: Job is pending evaluation
        timestamp:
          type: string
          format: date-time
          description: When this update occurred
          example: '2025-07-23T17:10:04.837888Z'
    EvaluationClassifyResults:
      type: object
      properties:
        generation_fail_count:
          type: number
          format: integer
          nullable: true
          description: Number of failed generations.
          example: 0
        judge_fail_count:
          type: number
          format: integer
          nullable: true
          description: Number of failed judge generations
          example: 0
        invalid_label_count:
          type: number
          format: float
          nullable: true
          description: Number of invalid labels
          example: 0
        result_file_id:
          type: string
          description: Data File ID
          example: file-1234-aefd
        pass_percentage:
          type: number
          format: integer
          nullable: true
          description: Pecentage of pass labels.
          example: 10
        label_counts:
          type: string
          description: JSON string representing label counts
          example: '{"yes": 10, "no": 0}'
    EvaluationScoreResults:
      type: object
      properties:
        aggregated_scores:
          type: object
          properties:
            mean_score:
              type: number
              format: float
            std_score:
              type: number
              format: float
            pass_percentage:
              type: number
              format: float
        generation_fail_count:
          type: number
          format: integer
          nullable: true
          description: Number of failed generations.
          example: 0
        judge_fail_count:
          type: number
          format: integer
          nullable: true
          description: Number of failed judge generations
          example: 0
        invalid_score_count:
          type: number
          format: integer
          description: number of invalid scores generated from model
        failed_samples:
          type: number
          format: integer
          description: number of failed samples generated from model
        result_file_id:
          type: string
          description: Data File ID
          example: file-1234-aefd
    EvaluationCompareResults:
      type: object
      properties:
        num_samples:
          type: integer
          description: Total number of samples compared
        A_wins:
          type: integer
          description: Number of times model A won
        B_wins:
          type: integer
          description: Number of times model B won
        Ties:
          type: integer
          description: Number of ties
        generation_fail_count:
          type: number
          format: integer
          nullable: true
          description: Number of failed generations.
          example: 0
        judge_fail_count:
          type: number
          format: integer
          nullable: true
          description: Number of failed judge generations
          example: 0
        result_file_id:
          type: string
          description: Data File ID
    AudioFileBinary:
      type: string
      format: binary
      description: Audio file to transcribe
    AudioFileUrl:
      type: string
      format: uri
      description: Public HTTPS URL to audio file
    CreateVideoBody:
      title: Create video request
      description: Parameters for creating a new video generation job.
      type: object
      required:
        - model
      properties:
        model:
          type: string
          description: The model to be used for the video creation request.
        prompt:
          type: string
          maxLength: 32000
          minLength: 1
          description: Text prompt that describes the video to generate.
        height:
          type: integer
        width:
          type: integer
        resolution:
          type: string
          description: Video resolution.
        ratio:
          type: string
          description: Aspect ratio of the video.
        seconds:
          type: string
          description: Clip duration in seconds.
        fps:
          type: integer
          description: Frames per second. Defaults to 24.
        steps:
          type: integer
          minimum: 10
          maximum: 50
          description: >-
            The number of denoising steps the model performs during video
            generation. More steps typically result in higher quality output but
            require longer processing time.
        seed:
          type: integer
          description: >-
            Seed to use in initializing the video generation.  Using the same
            seed allows deterministic video generation.  If not provided a
            random seed is generated for each request.
        guidance_scale:
          type: integer
          description: "Controls how closely the video generation follows your prompt. Higher values make the model adhere more strictly to your text description, while lower values allow more creative freedom.\_guidence_scale\_affects both visual content and temporal consistency.Recommended range is 6.0-10.0 for most video models. Values above 12 may cause over-guidance artifacts or unnatural motion patterns."
        output_format:
          $ref: '#/components/schemas/VideoOutputFormat'
          description: Specifies the format of the output video. Defaults to MP4.
        output_quality:
          type: integer
          description: Compression quality. Defaults to 20.
        negative_prompt:
          type: string
          description: >-
            Similar to prompt, but specifies what to avoid instead of what to
            include
        generate_audio:
          type: boolean
          description: Whether to generate audio for the video.
        media:
          $ref: '#/components/schemas/VideoMedia'
          description: >-
            Media inputs for video generation. The accepted fields depend on the
            model type (e.g. i2v, r2v, t2v, videoedit).
        frame_images:
          deprecated: true
          description: >-
            Deprecated: use media.frame_images instead. Array of images to guide
            video generation, similar to keyframes.
          example:
            - - input_image: aac49721-1964-481a-ae78-8a4e29b91402
                frame: 0
              - input_image: c00abf5f-6cdb-4642-a01d-1bfff7bc3cf7
                frame: 48
              - input_image: 3ad204c3-a9de-4963-8a1a-c3911e3afafe
                frame: last
          type: array
          items:
            $ref: '#/components/schemas/VideoFrameImageInput'
        reference_images:
          deprecated: true
          description: >-
            Deprecated: use media.reference_images instead. Unlike frame_images
            which constrain specific timeline positions, reference images guide
            the general appearance that should appear consistently across the
            video.
          type: array
          items:
            type: string
    VideoStatus:
      description: Current lifecycle status of the video job.
      type: string
      enum:
        - in_progress
        - completed
        - failed
    VideoFrameImageInput:
      type: object
      required:
        - input_image
      properties:
        input_image:
          type: string
          description: URL path to hosted image that is used for a frame
        frame:
          description: >
            Optional param to specify where to insert the frame. If this is
            omitted, the following heuristics are applied:

            - frame_images size is one, frame is first.

            - If size is two, frames are first and last.

            - If size is larger, frames are first, last and evenly spaced
            between.
          anyOf:
            - type: number
            - type: string
              enum:
                - first
                - last
    VideoMedia:
      type: object
      description: >-
        Contains all media inputs for video generation. Accepted fields depend
        on the model type.
      properties:
        frame_images:
          description: >-
            Array of images to guide video generation at specific timeline
            positions.
          type: array
          items:
            $ref: '#/components/schemas/VideoFrameImageInput'
        frame_videos:
          description: Array of video clips to use as starting clips.
          type: array
          items:
            $ref: '#/components/schemas/VideoRef'
        reference_images:
          description: >-
            Array of image URLs that guide the general appearance across the
            video.
          type: array
          items:
            type: string
        reference_videos:
          description: Array of reference videos.
          type: array
          items:
            $ref: '#/components/schemas/VideoRef'
        source_video:
          description: >-
            Source video to edit. Accepts a URL string or an object with a
            "video" key.
          oneOf:
            - type: string
            - $ref: '#/components/schemas/VideoRef'
        audio_inputs:
          description: >-
            Array of audio inputs. Each element accepts a URL string or an
            object with an "audio" key.
          type: array
          items:
            oneOf:
              - type: string
              - $ref: '#/components/schemas/AudioRef'
    VideoRef:
      type: object
      required:
        - video
      properties:
        video:
          type: string
          description: URL of the video.
    AudioRef:
      type: object
      required:
        - audio
      properties:
        audio:
          type: string
          description: URL of the audio.
    VideoOutputFormat:
      type: string
      enum:
        - MP4
        - WEBM
    VideoJob:
      properties:
        id:
          type: string
          description: Unique identifier for the video job.
        object:
          description: The object type, which is always video.
          const: video
        model:
          type: string
          description: The video generation model that produced the job.
        status:
          $ref: '#/components/schemas/VideoStatus'
          description: Current lifecycle status of the video job.
        created_at:
          type: number
          description: Unix timestamp (seconds) for when the job was created.
        completed_at:
          type: number
          description: Unix timestamp (seconds) for when the job completed, if finished.
        size:
          type: string
          description: The resolution of the generated video.
        seconds:
          type: string
          description: Duration of the generated clip in seconds.
        error:
          description: Error payload that explains why generation failed, if applicable.
          type: object
          properties:
            code:
              type: string
            message:
              type: string
          required:
            - message
        outputs:
          description: >-
            Available upon completion, the outputs provides the cost charged and
            the hosted url to access the video
          type: object
          properties:
            cost:
              type: integer
              description: The cost of generated video charged to the owners account.
            video_url:
              type: string
              description: URL hosting the generated video
          required:
            - cost
            - video_url
      type: object
      required:
        - id
        - model
        - status
        - size
        - seconds
        - created_at
      title: Video job
      description: Structured information describing a generated video job.
    ContainerStatus:
      properties:
        finishedAt:
          description: >-
            FinishedAt is the timestamp when the container finished execution
            (if terminated)
          type: string
        message:
          description: >-
            Message provides a human-readable message with details about the
            container's status
          type: string
        name:
          description: Name is the name of the container
          type: string
        reason:
          description: >-
            Reason provides a brief machine-readable reason for the container's
            current status
          type: string
        startedAt:
          description: StartedAt is the timestamp when the container started execution
          type: string
        status:
          description: >-
            Status is the current state of the container (e.g., "Running",
            "Terminated", "Waiting")
          type: string
      type: object
    CreateDeploymentRequest:
      properties:
        args:
          description: >-
            Args overrides the container's CMD. Provide as an array of arguments
            (e.g., ["python", "app.py"])
          items:
            type: string
          type: array
        autoscaling:
          description: >-
            Autoscaling configuration. Example: {"metric":
            "QueueBacklogPerWorker", "target": 1.01} to scale based on queue
            backlog. Omit or set to null to disable autoscaling
          oneOf:
            - $ref: '#/components/schemas/HTTPAutoscalingConfig'
            - $ref: '#/components/schemas/QueueAutoscalingConfig'
            - $ref: '#/components/schemas/CustomMetricAutoscalingConfig'
        command:
          description: >-
            Command overrides the container's ENTRYPOINT. Provide as an array
            (e.g., ["/bin/sh", "-c"])
          items:
            type: string
          type: array
        cpu:
          description: >-
            CPU is the number of CPU cores to allocate per container instance
            (e.g., 0.1 = 100 milli cores)
          minimum: 0.1
          type: number
        description:
          description: >-
            Description is an optional human-readable description of your
            deployment
          type: string
        environment_variables:
          description: >-
            EnvironmentVariables is a list of environment variables to set in
            the container. Each must have a name and either a value or
            value_from_secret
          items:
            $ref: '#/components/schemas/EnvironmentVariable'
          type: array
        gpu_count:
          description: >-
            GPUCount is the number of GPUs to allocate per container instance.
            Defaults to 0 if not specified
          type: integer
        gpu_type:
          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
          enum:
            - h100-80gb
            - h100-40gb-mig
            - b200-192gb
          type: string
        health_check_path:
          description: >-
            HealthCheckPath is the HTTP path for health checks (e.g.,
            "/health"). If set, the platform will check this endpoint to
            determine container health
          type: string
        image:
          description: Image is the container image to deploy from registry.together.ai.
          type: string
        max_replicas:
          description: >-
            MaxReplicas is the maximum number of container instances that can be
            scaled up to. If not set, will be set to MinReplicas
          type: integer
        memory:
          description: >-
            Memory is the amount of RAM to allocate per container instance in
            GiB (e.g., 0.5 = 512MiB)
          maximum: 1000
          type: number
        min_replicas:
          description: >-
            MinReplicas is the minimum number of container instances to run.
            Defaults to 1 if not specified
          type: integer
        name:
          description: >-
            Name is the unique identifier for your deployment. Must contain only
            alphanumeric characters, underscores, or hyphens (1-100 characters)
          maxLength: 100
          minLength: 1
          type: string
        port:
          description: >-
            Port is the container port your application listens on (e.g., 8080
            for web servers). Required if your application serves traffic
          maximum: 65535
          minimum: 1
          type: integer
        storage:
          description: >-
            Storage is the amount of ephemeral disk storage to allocate per
            container instance (e.g., 10 = 10GiB)
          maximum: 400
          type: integer
        termination_grace_period_seconds:
          description: >-
            TerminationGracePeriodSeconds is the time in seconds to wait for
            graceful shutdown before forcefully terminating the replica
          type: integer
        volumes:
          description: >-
            Volumes is a list of volume mounts to attach to the container. Each
            mount must reference an existing volume by name
          items:
            $ref: '#/components/schemas/VolumeMount'
          type: array
      required:
        - gpu_type
        - image
        - name
      type: object
    CreateSecretRequest:
      properties:
        description:
          description: >-
            Description is an optional human-readable description of the
            secret's purpose (max 500 characters)
          maxLength: 500
          type: string
        name:
          description: >-
            Name is the unique identifier for the secret. Can contain
            alphanumeric characters, underscores, hyphens, forward slashes, and
            periods (1-100 characters)
          maxLength: 100
          minLength: 1
          type: string
        project_id:
          description: >-
            ProjectID is ignored - the project is automatically determined from
            your authentication
          type: string
        value:
          description: >-
            Value is the sensitive data to store securely (e.g., API keys,
            passwords, tokens). This value will be encrypted at rest
          minLength: 1
          type: string
      required:
        - name
        - value
      type: object
    CreateVolumeRequest:
      properties:
        content:
          allOf:
            - $ref: '#/components/schemas/VolumeContentRequest'
          description: Content specifies the content configuration for this volume
        name:
          description: Name is the unique identifier for the volume within the project
          type: string
        type:
          allOf:
            - $ref: '#/components/schemas/VolumeType'
          description: Type is the volume type (currently only "readOnly" is supported)
      required:
        - content
        - name
        - type
      type: object
    DeploymentListResponse:
      properties:
        data:
          description: Data is the array of deployment items
          items:
            $ref: '#/components/schemas/DeploymentResponseItem'
          type: array
        object:
          description: The object type, which is always `list`.
          const: list
      type: object
    DeploymentLogs:
      properties:
        lines:
          items:
            type: string
          type: array
      type: object
    DeploymentResponseItem:
      properties:
        args:
          description: Args are the arguments passed to the container's command
          items:
            type: string
          type: array
        autoscaling:
          description: >-
            Autoscaling contains autoscaling configuration parameters for this
            deployment. Omitted when autoscaling is disabled (nil)
          oneOf:
            - $ref: '#/components/schemas/HTTPAutoscalingConfig'
            - $ref: '#/components/schemas/QueueAutoscalingConfig'
            - $ref: '#/components/schemas/CustomMetricAutoscalingConfig'
        command:
          description: Command is the entrypoint command run in the container
          items:
            type: string
          type: array
        cpu:
          description: >-
            CPU is the amount of CPU resource allocated to each replica in cores
            (fractional value is allowed)
          type: number
        created_at:
          description: CreatedAt is the ISO8601 timestamp when this deployment was created
          type: string
          format: date-time
        description:
          description: >-
            Description provides a human-readable explanation of the
            deployment's purpose or content
          type: string
        desired_replicas:
          description: >-
            DesiredReplicas is the number of replicas that the orchestrator is
            targeting
          type: integer
        environment_variables:
          description: >-
            EnvironmentVariables is a list of environment variables set in the
            container
          items:
            $ref: '#/components/schemas/EnvironmentVariable'
          type: array
        gpu_count:
          description: >-
            GPUCount is the number of GPUs allocated to each replica in this
            deployment
          type: integer
        gpu_type:
          description: >-
            GPUType specifies the type of GPU requested (if any) for this
            deployment
          enum:
            - h100-80gb
            - h100-40gb-mig
            - b200-192gb
          type: string
        health_check_path:
          description: >-
            HealthCheckPath is the HTTP path used for health checks of the
            application
          type: string
        id:
          description: ID is the unique identifier of the deployment
          type: string
        image:
          description: Image specifies the container image used for this deployment
          type: string
        max_replicas:
          description: >-
            MaxReplicas is the maximum number of replicas to run for this
            deployment
          type: integer
        memory:
          description: >-
            Memory is the amount of memory allocated to each replica in GiB
            (fractional value is allowed)
          type: number
        min_replicas:
          description: >-
            MinReplicas is the minimum number of replicas to run for this
            deployment
          type: integer
        name:
          description: Name is the name of the deployment
          type: string
        object:
          description: The object type, which is always `deployment`.
          const: deployment
        port:
          description: Port is the container port that the deployment exposes
          type: integer
        ready_replicas:
          description: >-
            ReadyReplicas is the current number of replicas that are in the
            Ready state
          type: integer
        replica_events:
          additionalProperties:
            $ref: '#/components/schemas/ReplicaEvent'
          description: >-
            ReplicaEvents is a mapping of replica names or IDs to their status
            events
          type: object
        status:
          allOf:
            - $ref: '#/components/schemas/DeploymentStatus'
          description: >-
            Status represents the overall status of the deployment (e.g.,
            Updating, Scaling, Ready, Failed)
          enum:
            - Updating
            - Scaling
            - Ready
            - Failed
        storage:
          description: >-
            Storage is the amount of storage (in MB or units as defined by the
            platform) allocated to each replica
          type: integer
        updated_at:
          description: >-
            UpdatedAt is the ISO8601 timestamp when this deployment was last
            updated
          type: string
          format: date-time
        volumes:
          description: Volumes is a list of volume mounts for this deployment
          items:
            $ref: '#/components/schemas/VolumeMount'
          type: array
      type: object
    DeploymentStatus:
      enum:
        - Updating
        - Scaling
        - Ready
        - Failed
      type: string
      x-enum-varnames:
        - DeploymentStatusUpdating
        - DeploymentStatusScaling
        - DeploymentStatusReady
        - DeploymentStatusFailed
    EnvironmentVariable:
      properties:
        name:
          description: >-
            Name is the environment variable name (e.g., "DATABASE_URL"). Must
            start with a letter or underscore, followed by letters, numbers, or
            underscores
          type: string
        value:
          description: >-
            Value is the plain text value for the environment variable. Use this
            for non-sensitive values. Either Value or ValueFromSecret must be
            set, but not both
          type: string
        value_from_secret:
          description: >-
            ValueFromSecret references a secret by name or ID to use as the
            value. Use this for sensitive values like API keys or passwords.
            Either Value or ValueFromSecret must be set, but not both
          type: string
      required:
        - name
      type: object
    HTTPAutoscalingConfig:
      description: >-
        Autoscaling config for HTTPTotalRequests and HTTPAvgRequestDuration
        metrics
      properties:
        metric:
          description: Metric must be HTTPTotalRequests or HTTPAvgRequestDuration
          enum:
            - HTTPTotalRequests
            - HTTPAvgRequestDuration
          example: HTTPTotalRequests
          type: string
        target:
          description: >-
            Target is the threshold value. Default: 100 for HTTPTotalRequests,
            500 (ms) for HTTPAvgRequestDuration
          example: 100
          type: number
        time_interval_minutes:
          description: 'TimeIntervalMinutes is the rate window in minutes. Default: 10'
          example: 10
          type: integer
      type: object
    QueueAutoscalingConfig:
      description: Autoscaling config for QueueBacklogPerWorker metric
      properties:
        metric:
          description: Metric must be QueueBacklogPerWorker
          enum:
            - QueueBacklogPerWorker
          example: QueueBacklogPerWorker
          type: string
        model:
          description: >-
            Model overrides the model name for queue status lookup. Defaults to
            the deployment app name
          type: string
        target:
          description: 'Target is the threshold value. Default: 1.01'
          example: 1.01
          type: number
      type: object
    CustomMetricAutoscalingConfig:
      description: Autoscaling config for CustomMetric metric
      properties:
        custom_metric_name:
          description: >-
            CustomMetricName is the Prometheus metric name. Required. Must match
            [a-zA-Z_:][a-zA-Z0-9_:]*
          example: my_custom_metric
          type: string
        metric:
          description: Metric must be CustomMetric
          enum:
            - CustomMetric
          example: CustomMetric
          type: string
        target:
          description: 'Target is the threshold value. Default: 500'
          example: 500
          type: number
      type: object
    KubernetesEvent:
      properties:
        action:
          description: Action is the action taken or reported by this event
          type: string
        count:
          description: Count is the number of times this event has occurred
          type: integer
        first_seen:
          description: FirstSeen is the timestamp when this event was first observed
          type: string
        last_seen:
          description: LastSeen is the timestamp when this event was last observed
          type: string
        message:
          description: Message is a human-readable description of the event
          type: string
        reason:
          description: >-
            Reason is a brief machine-readable reason for this event (e.g.,
            "Pulling", "Started", "Failed")
          type: string
      type: object
    ListSecretsResponse:
      properties:
        data:
          description: Data is the array of secret items
          items:
            $ref: '#/components/schemas/SecretResponseItem'
          type: array
        object:
          description: The object type, which is always `list`.
          const: list
      type: object
    ListVolumesResponse:
      properties:
        data:
          description: Data is the array of volume items
          items:
            $ref: '#/components/schemas/VolumeResponseItem'
          type: array
        object:
          description: The object type, which is always `list`.
          const: list
      type: object
    ReplicaEvent:
      properties:
        image:
          description: Image is the container image used for this replica
          type: string
        replica_ready_since:
          description: >-
            ReplicaReadySince is the timestamp when the replica became ready to
            serve traffic
          type: string
        replica_status:
          description: >-
            ReplicaStatus is the current status of the replica (e.g., "Running",
            "Waiting", "Terminated")
          type: string
        replica_status_message:
          description: >-
            ReplicaStatusMessage provides a human-readable message explaining
            the replica's status
          type: string
        replica_status_reason:
          description: >-
            ReplicaStatusReason provides a brief machine-readable reason for the
            replica's status
          type: string
        revision_id:
          description: >-
            RevisionID is the deployment revision ID associated with this
            replica
          type: string
        volume_preload_completed_at:
          description: >-
            VolumePreloadCompletedAt is the timestamp when the volume preload
            completed
          type: string
        volume_preload_started_at:
          description: >-
            VolumePreloadStartedAt is the timestamp when the volume preload
            started
          type: string
        volume_preload_status:
          description: >-
            VolumePreloadStatus is the status of the volume preload (e.g.,
            "InProgress", "Completed", "Failed")
          type: string
      type: object
    SecretResponseItem:
      properties:
        created_at:
          description: CreatedAt is the ISO8601 timestamp when this secret was created
          type: string
        created_by:
          description: CreatedBy is the identifier of the user who created this secret
          type: string
        description:
          description: Description is a human-readable description of the secret's purpose
          type: string
        id:
          description: ID is the unique identifier for this secret
          type: string
        last_updated_by:
          description: >-
            LastUpdatedBy is the identifier of the user who last updated this
            secret
          type: string
        name:
          description: Name is the name/key of the secret
          type: string
        object:
          description: The object type, which is always `secret`.
          const: secret
        updated_at:
          description: UpdatedAt is the ISO8601 timestamp when this secret was last updated
          type: string
      type: object
    UpdateDeploymentRequest:
      properties:
        args:
          description: >-
            Args overrides the container's CMD. Provide as an array of arguments
            (e.g., ["python", "app.py"])
          items:
            type: string
          type: array
        autoscaling:
          description: >-
            Autoscaling configuration for the deployment. Set to {} to disable
            autoscaling
          oneOf:
            - $ref: '#/components/schemas/HTTPAutoscalingConfig'
            - $ref: '#/components/schemas/QueueAutoscalingConfig'
            - $ref: '#/components/schemas/CustomMetricAutoscalingConfig'
        command:
          description: >-
            Command overrides the container's ENTRYPOINT. Provide as an array
            (e.g., ["/bin/sh", "-c"])
          items:
            type: string
          type: array
        cpu:
          description: >-
            CPU is the number of CPU cores to allocate per container instance
            (e.g., 0.1 = 100 milli cores)
          minimum: 0.1
          type: number
        description:
          description: >-
            Description is an optional human-readable description of your
            deployment
          type: string
        environment_variables:
          description: >-
            EnvironmentVariables is a list of environment variables to set in
            the container. This will replace all existing environment variables
          items:
            $ref: '#/components/schemas/EnvironmentVariable'
          type: array
        gpu_count:
          description: GPUCount is the number of GPUs to allocate per container instance
          type: integer
        gpu_type:
          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
          enum:
            - h100-80gb
            - h100-40gb-mig
            - b200-192gb
          type: string
        health_check_path:
          description: >-
            HealthCheckPath is the HTTP path for health checks (e.g.,
            "/health"). Set to empty string to disable health checks
          type: string
        image:
          description: Image is the container image to deploy from registry.together.ai.
          type: string
        max_replicas:
          description: >-
            MaxReplicas is the maximum number of replicas that can be scaled up
            to.
          type: integer
        memory:
          description: >-
            Memory is the amount of RAM to allocate per container instance in
            GiB (e.g., 0.5 = 512MiB)
          maximum: 1000
          type: number
        min_replicas:
          description: MinReplicas is the minimum number of replicas to run
          type: integer
        name:
          description: >-
            Name is the new unique identifier for your deployment. Must contain
            only alphanumeric characters, underscores, or hyphens (1-100
            characters)
          maxLength: 100
          minLength: 1
          type: string
        port:
          description: >-
            Port is the container port your application listens on (e.g., 8080
            for web servers)
          maximum: 65535
          minimum: 1
          type: integer
        storage:
          description: >-
            Storage is the amount of ephemeral disk storage to allocate per
            container instance (e.g., 10 = 10GiB)
          maximum: 400
          type: integer
        termination_grace_period_seconds:
          description: >-
            TerminationGracePeriodSeconds is the time in seconds to wait for
            graceful shutdown before forcefully terminating the replica
          type: integer
        volumes:
          description: >-
            Volumes is a list of volume mounts to attach to the container. This
            will replace all existing volumes
          items:
            $ref: '#/components/schemas/VolumeMount'
          type: array
      type: object
    UpdateSecretRequest:
      properties:
        description:
          description: >-
            Description is an optional human-readable description of the
            secret's purpose (max 500 characters)
          maxLength: 500
          type: string
        name:
          description: >-
            Name is the new unique identifier for the secret. Can contain
            alphanumeric characters, underscores, hyphens, forward slashes, and
            periods (1-100 characters)
          maxLength: 100
          minLength: 1
          type: string
        project_id:
          description: >-
            ProjectID is ignored - the project is automatically determined from
            your authentication
          type: string
        value:
          description: >-
            Value is the new sensitive data to store securely. Updating this
            will replace the existing secret value
          minLength: 1
          type: string
      type: object
    UpdateVolumeRequest:
      properties:
        content:
          allOf:
            - $ref: '#/components/schemas/VolumeContentRequest'
          description: >-
            Content specifies the new content that will be preloaded to this
            volume
        name:
          description: Name is the new unique identifier for the volume within the project
          type: string
        type:
          allOf:
            - $ref: '#/components/schemas/VolumeType'
          description: Type is the new volume type (currently only "readOnly" is supported)
      type: object
    VolumeMount:
      properties:
        mount_path:
          description: >-
            MountPath is the path in the container where the volume will be
            mounted (e.g., "/data")
          type: string
        name:
          description: >-
            Name is the name of the volume to mount. Must reference an existing
            volume by name or ID
          type: string
        version:
          description: >-
            Version is the volume version to mount. On create, defaults to the
            latest version. On update, defaults to the currently mounted
            version.
          type: integer
      required:
        - mount_path
        - name
      type: object
    VolumeResponseItem:
      properties:
        content:
          $ref: '#/components/schemas/VolumeContent'
        created_at:
          description: CreatedAt is the ISO8601 timestamp when this volume was created
          type: string
        current_version:
          description: CurrentVersion is the current version number of this volume
          type: integer
        id:
          description: ID is the unique identifier for this volume
          type: string
        mounted_by:
          description: >-
            MountedBy is the list of deployment IDs currently mounting current
            volume version
          items:
            type: string
          type: array
          uniqueItems: false
        name:
          description: Name is the name of the volume
          type: string
        object:
          description: Object is the type identifier for this response (always "volume")
          type: string
        type:
          $ref: '#/components/schemas/VolumeType'
        updated_at:
          description: UpdatedAt is the ISO8601 timestamp when this volume was last updated
          type: string
        version_history:
          additionalProperties:
            $ref: '#/components/schemas/VersionHistoryItem'
          description: >-
            VersionHistory contains previous versions of this volume, keyed by
            version number
          type: object
      type: object
    VersionHistoryItem:
      properties:
        content:
          $ref: '#/components/schemas/VolumeContentRequest'
        mounted_by:
          items:
            type: string
          type: array
          uniqueItems: false
        version:
          type: integer
      type: object
    VolumeContentRequest:
      description: Content specifies the new content that will be preloaded to this volume
      properties:
        source_prefix:
          description: >-
            SourcePrefix is the file path prefix for the content to be preloaded
            into the volume
          example: models/
          type: string
        type:
          description: >-
            Type is the content type (currently only "files" is supported which
            allows preloading files uploaded via Files API into the volume)
          enum:
            - files
          example: files
          type: string
      type: object
    VolumeContent:
      properties:
        files:
          description: >-
            Files is the list of files that will be preloaded into the volume,
            if the volume content type is "files"
          items:
            $ref: '#/components/schemas/FileInfo'
          type: array
          uniqueItems: false
        source_prefix:
          description: >-
            SourcePrefix is the file path prefix for the content to be preloaded
            into the volume
          example: models/
          type: string
        type:
          description: >-
            Type is the content type (currently only "files" is supported which
            allows preloading files uploaded via Files API into the volume)
          enum:
            - files
          example: files
          type: string
      type: object
    FileInfo:
      properties:
        last_modified:
          description: LastModified is the timestamp when the file was last modified
          type: string
        name:
          description: Name is the filename including extension (e.g., "model_weights.bin")
          type: string
        size:
          description: Size is the file size in bytes
          type: integer
      type: object
    VolumeType:
      enum:
        - readOnly
      type: string
      x-enum-varnames:
        - VolumeTypeReadOnly
    QueueError:
      properties:
        code:
          description: Machine-readable error code
          type: string
        message:
          description: Human-readable error message
          type: string
        param:
          description: The parameter that caused the error, if applicable
          type: string
        type:
          description: Error category (e.g. "invalid_request_error", "not_found_error")
          type: string
      type: object
    QueueMetricsResponse:
      type: object
      required:
        - messages_running
        - messages_waiting
        - total_jobs
      properties:
        messages_running:
          description: Number of jobs currently being processed
          type: integer
        messages_waiting:
          description: Number of jobs waiting to be claimed by a worker
          type: integer
        total_jobs:
          description: Total number of active jobs (waiting + running)
          type: integer
    QueueCancelRequest:
      properties:
        model:
          description: Model identifier the job was submitted to
          type: string
        request_id:
          description: The request ID returned from the submit endpoint
          type: string
      required:
        - model
        - request_id
      type: object
    QueueCancelResponse:
      type: object
      required:
        - status
      properties:
        status:
          description: |
            Job status after the cancel attempt. Only pending jobs can be
            canceled. If the job is already running, done, or failed, the status
            is returned unchanged.
          type: string
          enum:
            - canceled
            - running
            - done
            - failed
    QueueJobRequest:
      properties:
        info:
          description: |
            Arbitrary JSON metadata stored with the job and returned in status
            responses. The model and system may add or update keys during
            processing.
          additionalProperties: true
          type: object
        model:
          description: Required model identifier
          type: string
        payload:
          description: >-
            Freeform model input. Passed unchanged to the model. Contents are
            model-specific.
          additionalProperties: true
          type: object
        priority:
          default: 0
          description: |
            Job priority. Higher values are processed first (strict priority
            ordering). Jobs with equal priority are processed in submission
            order (FIFO).
          type: integer
      required:
        - model
        - payload
      type: object
    QueueJobResponse:
      properties:
        error:
          $ref: '#/components/schemas/QueueError'
        requestId:
          description: >-
            Unique identifier for the submitted job. Use this to poll status or
            cancel.
          type: string
      type: object
    QueueJobStatusResponse:
      required:
        - request_id
        - model
        - status
      properties:
        claimed_at:
          description: Timestamp when a worker claimed the job
          format: date-time
          type: string
        created_at:
          description: Timestamp when the job was created
          format: date-time
          type: string
        done_at:
          description: Timestamp when the job completed (done or failed)
          format: date-time
          type: string
        info:
          description: |
            Job metadata. Contains keys from the submit request, plus any
            modifications from the model or system (e.g. progress, retry
            history).
          additionalProperties: true
          type: object
        inputs:
          description: Freeform model input, as submitted
          additionalProperties: true
          type: object
        model:
          description: Model identifier the job was submitted to
          type: string
        outputs:
          description: >-
            Freeform model output, populated when the job reaches done status.
            Contents are model-specific.
          additionalProperties: true
          type: object
        priority:
          description: Job priority. Higher values are processed first.
          type: integer
        request_id:
          description: The request ID that was returned from the submit endpoint
          type: string
        retries:
          description: |
            Number of times this job has been retried. Workers set a claim
            timeout and must send periodic status updates to keep the job alive.
            If no update is received within the timeout, the job is returned to
            the queue and retried. After 3 retries the job is permanently
            failed. Jobs explicitly failed by the model are not retried.
          type: integer
        status:
          description: >
            Current job status. Transitions: pending → running → done/failed. A
            pending job may also be canceled.
          enum:
            - pending
            - running
            - done
            - failed
            - canceled
          type: string
        warnings:
          description: Non-fatal messages about the request (e.g. deprecation notices)
          items:
            type: string
          type: array
      type: object