openapi: 3.1.0
info:
  title: Gateway REST API
  version: 4.259.0
tags:
  - name: gateway.openapi_Gateway
    x-displayName: Gateway
  - name: gateway-extra.openapi_Gateway
    x-displayName: Gateway
  - name: responses.openapi_other
    x-displayName: other
  - name: text-completion.openapi_other
    x-displayName: other
  - name: anthropic-messages.openapi_other
    x-displayName: other
paths:
  /v1/accounts:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Accounts
      operationId: Gateway_ListAccounts
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListAccountsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of accounts to return. The maximum page_size is
            200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListAccounts call. Provide
            this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListAccounts must match the call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: |-
            Only accounts satisfying the provided filter (if specified) will be
            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: |-
            Not supported.
            Accounts will be returned ordered by `name`.
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/auth/refresh:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Refresh a session JWT before expiry with a fresh TTL.
      operationId: Gateway_RefreshSessionToken
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayRefreshSessionTokenResponse'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayRefreshSessionTokenRequest'
        description: Request to refresh an existing session JWT before it expires.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/creditCodes:redeem:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: |-
        APIs for credit codes.
        Redeem Credit Code
      operationId: Gateway_RedeemCreditCode
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayRedeemCreditCodeResponse'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayRedeemCreditCodeRequest'
        description: >-
          RedeemCreditCodeRequest is the request to redeem a credit code for an
          account.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/validateModelConfig:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Validate Model Config
      operationId: Gateway_ValidateModelConfig
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayValidateModelConfigRequest'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Account
      operationId: Gateway_GetAccount
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayAccount'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/accountUsageFilterOptions:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: >-
        Distinct filter values for an account/time range (serverless +
        dedicated; for FE, separate -yet mirrors GetAccountUsage).
      operationId: Gateway_GetAccountUsageFilterOptions
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayGetAccountUsageFilterOptionsResponse
      parameters:
        - name: startTime
          in: query
          required: true
          schema:
            type: string
            format: date-time
        - name: endTime
          in: query
          required: true
          schema:
            type: string
            format: date-time
        - name: usageType
          description: |-
            If not specified, loads filter options for both usage streams.

             - SERVERLESS: Serverless filter dimensions only (model_name, api_key_id, annotations.*).
             - DEDICATED_DEPLOYMENT: Dedicated deployment filter dimensions (deployment_name, annotations.team, .project, .environment).
          in: query
          required: false
          schema:
            type: string
            enum:
              - USAGE_TYPE_UNSPECIFIED
              - SERVERLESS
              - DEDICATED_DEPLOYMENT
            default: USAGE_TYPE_UNSPECIFIED
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/auditLogs:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List User Audit Logs
      operationId: Gateway_ListAuditLogs
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListAuditLogsResponse'
      parameters:
        - name: startTime
          description: |-
            Start time of the audit logs to retrieve.
            If unspecified, the default is 30 days before now.
          in: query
          required: false
          schema:
            type: string
            format: date-time
        - name: endTime
          description: |-
            End time of the audit logs to retrieve.
            If unspecified, the default is the current time.
          in: query
          required: false
          schema:
            type: string
            format: date-time
        - name: email
          description: |-
            Optional.
            Filter audit logs for user email associated with the account.
          in: query
          required: false
          schema:
            type: string
        - name: pageSize
          description: >-
            The maximum number of audit logs to return. The maximum page_size is
            200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 10.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListAuditLogs call. Provide
            this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListAuditLogs must match the call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: Unused but required to use existing ListRequest functionality.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: Unused but required to use existing ListRequest functionality.
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/batchInferenceJobs:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Batch Inference Jobs
      operationId: Gateway_ListBatchInferenceJobs
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListBatchInferenceJobsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of batch inference jobs to return. The maximum
            page_size is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListBatchInferenceJobs call.
            Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListBatchInferenceJobs must match the call that provided
            the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: |-
            Only jobs satisfying the provided filter (if specified) will be
            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "created_time".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Batch Inference Job
      operationId: Gateway_CreateBatchInferenceJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayBatchInferenceJob'
      parameters:
        - name: batchInferenceJobId
          description: ID of the batch inference job.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayBatchInferenceJob'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/batchInferenceJobs/{batch_inference_job_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Batch Inference Job
      operationId: Gateway_GetBatchInferenceJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayBatchInferenceJob'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: batch_inference_job_id
          in: path
          required: true
          description: The Batch Inference Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Batch Inference Job
      operationId: Gateway_DeleteBatchInferenceJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: batch_inference_job_id
          in: path
          required: true
          description: The Batch Inference Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/billing/summary:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get billing summary information for an account
      operationId: Gateway_GetBillingSummary
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetBillingSummaryResponse'
      parameters:
        - name: startTime
          description: >-
            Start time for the billing period.

            Note: Costs are aggregated daily. Only the date portion (YYYY-MM-DD)
            is used;

            the time portion is ignored. For example, 2025-10-05T07:18:29Z and

            2025-10-05T23:59:59Z are treated the same as 2025-10-05T00:00:00Z.
          in: query
          required: true
          schema:
            type: string
            format: date-time
        - name: endTime
          description: >-
            End time for the billing period (exclusive).

            Note: Costs are aggregated daily. Only the date portion (YYYY-MM-DD)
            is used;

            the time portion is ignored. Costs for the end date are NOT
            included.

            For example, to get costs for Oct 5 and Oct 6, use:
              start_time: 2025-10-05T00:00:00Z
              end_time: 2025-10-07T00:00:00Z (Oct 7 is excluded)
          in: query
          required: true
          schema:
            type: string
            format: date-time
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/billingUsage:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: >-
        Get account usage (serverless and dedicated deployments).

        Optionally filter by usage type via the usage_type field. If not
        specified, returns all usage types.

        TODO: rename this to /accountUsage
      operationId: Gateway_GetAccountUsage
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayAccountUsage'
      parameters:
        - name: startTime
          description: |-
            Costs returned are inclusive of `start_time`.
            start_time must be before end_time.
          in: query
          required: true
          schema:
            type: string
            format: date-time
        - name: endTime
          description: |-
            Costs returned are exclusive of `end_time`.
            end_time must not be more than 31 days after start_time.
          in: query
          required: true
          schema:
            type: string
            format: date-time
        - name: usageType
          description: >-
            Usage type to query usage for

            If not specified, returns all usage types (both serverless and
            dedicated deployments).

             - USAGE_TYPE_UNSPECIFIED: Default value. When specified (or when usage_type field is not set),
            returns usage data for all deployment types: both serverless
            requests and dedicated deployments.
             - SERVERLESS: Returns only serverless usage data.
            Filters the response to include only usage from serverless API
            requests.
             - DEDICATED_DEPLOYMENT: Returns only dedicated deployment usage data.
            Filters the response to include only usage from dedicated
            deployments.
          in: query
          required: false
          schema:
            type: string
            enum:
              - USAGE_TYPE_UNSPECIFIED
              - SERVERLESS
              - DEDICATED_DEPLOYMENT
            default: USAGE_TYPE_UNSPECIFIED
        - name: timezone
          description: >-
            IANA timezone identifier for daily aggregation (e.g.,
            "America/Los_Angeles", "Europe/London").

            When specified, the returned data will be aggregated into daily
            buckets based on this timezone.

            If not specified or empty, defaults to "UTC".

            See: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
          in: query
          required: false
          schema:
            type: string
        - name: groupBy
          description: >-
            Dimension to group usage by (at most one value; repeated for wire
            compatibility).

            Serverless: "model_name", "api_key_id", "annotations.team",
            "annotations.project", "annotations.environment".

            Dedicated: "deployment_name", "accelerator_type", and the same
            annotation keys.

            When usage_type is unspecified, dimensions that apply only to the
            other stream are ignored there

            (e.g. "deployment_name" and "accelerator_type" are ignored for
            serverless; "model_name" and "api_key_id" for dedicated).

            Example: ["annotations.team"] or ["model_name"].

            If empty: serverless aggregates by model name; dedicated defaults to
            deployment and accelerator type.
          in: query
          required: false
          explode: true
          schema:
            type: array
            items:
              type: string
        - name: filter
          description: >-
            This is a request variable of the map type. The query format is
            "map_name[key]=value", e.g. If the map name is Age, the key type is
            string, and the value type is integer, the query parameter is
            expressed as Age["bob"]=18
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/checkpoints/{checkpoint_id}:promote:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: >-
        Promote a checkpoint to a model.

        The checkpoint is identified by account + snapshot ID; the trainer job
        ID

        is passed in the request body to resolve the GCS bucket.
      operationId: Gateway_PromoteCheckpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayPromoteCheckpointResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: checkpoint_id
          in: path
          required: true
          description: The Checkpoint Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayPromoteCheckpointBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/clusters:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Clusters
      operationId: Gateway_ListClusters
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListClustersResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of clusters to return. The maximum page_size is
            200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListClusters call. Provide
            this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListClusters must match the call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: |-
            Only clusters satisfying the provided filter (if specified) will be
            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Cluster
      operationId: Gateway_CreateCluster
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayCluster'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCreateClusterBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/clusters/{cluster_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Cluster
      operationId: Gateway_GetCluster
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayCluster'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: cluster_id
          in: path
          required: true
          description: The Cluster Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Cluster
      operationId: Gateway_UpdateCluster
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayCluster'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: cluster_id
          in: path
          required: true
          description: The Cluster Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                  description: >-
                    Human-readable display name of the cluster. e.g. "My
                    Cluster"

                    Must be fewer than 64 characters long.
                createTime:
                  type: string
                  format: date-time
                  description: The creation time of the cluster.
                  readOnly: true
                eksCluster:
                  $ref: '#/components/schemas/gatewayEksCluster'
                fakeCluster:
                  $ref: '#/components/schemas/gatewayFakeCluster'
                state:
                  $ref: '#/components/schemas/gatewayClusterState'
                  description: The current state of the cluster.
                  readOnly: true
                status:
                  $ref: '#/components/schemas/gatewayStatus'
                  description: >-
                    Detailed information about the current status of the
                    cluster.
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the cluster.
                  readOnly: true
              title: |-
                The properties of the cluster being updated. `cluster.name` must
                be populated with the updated resource's name.
        description: |-
          The properties of the cluster being updated. `cluster.name` must
          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Cluster
      operationId: Gateway_DeleteCluster
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: cluster_id
          in: path
          required: true
          description: The Cluster Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/clusters/{cluster_id}:getConnectionInfo:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Cluster Connection Info
      description: Retrieve connection settings for the cluster to be put in kubeconfig
      operationId: Gateway_GetClusterConnectionInfo
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayClusterConnectionInfo'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: cluster_id
          in: path
          required: true
          description: The Cluster Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/creditRedemptions:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Credit Redemptions
      operationId: Gateway_ListCreditRedemptions
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListCreditRedemptionsResponse'
      parameters:
        - name: pageSize
          description: The maximum number of redemptions to return
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: A page token, received from a previous ListCreditRedemptions call
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: Filter string to filter redemptions
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/datasets:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Datasets
      operationId: Gateway_ListDatasets
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListDatasetsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of datasets to return. The maximum page_size is
            200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListDatasets call. Provide
            this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListDatasets must match the call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: |-
            Only model satisfying the provided filter (if specified) will be
            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Dataset
      operationId: Gateway_CreateDataset
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDataset'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCreateDatasetBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/datasets/{dataset_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Dataset
      operationId: Gateway_GetDataset
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDataset'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dataset_id
          in: path
          required: true
          description: The Dataset Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Dataset
      operationId: Gateway_UpdateDataset
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDataset'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dataset_id
          in: path
          required: true
          description: The Dataset Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                createTime:
                  type: string
                  format: date-time
                  readOnly: true
                state:
                  $ref: '#/components/schemas/gatewayDatasetState'
                  readOnly: true
                status:
                  $ref: '#/components/schemas/gatewayStatus'
                  readOnly: true
                exampleCount:
                  type: string
                  format: int64
                userUploaded:
                  $ref: '#/components/schemas/gatewayUserUploaded'
                evaluationResult:
                  $ref: '#/components/schemas/gatewayEvaluationResult'
                transformed:
                  $ref: '#/components/schemas/gatewayTransformed'
                splitted:
                  $ref: '#/components/schemas/gatewaySplitted'
                evalProtocol:
                  $ref: '#/components/schemas/gatewayEvalProtocol'
                externalUrl:
                  type: string
                  title: The external URI of the dataset. e.g. gs://foo/bar/baz.jsonl
                format:
                  $ref: '#/components/schemas/DatasetFormat'
                createdBy:
                  type: string
                  description: >-
                    The email address of the user who initiated this fine-tuning
                    job.
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the dataset.
                  readOnly: true
                sourceJobName:
                  type: string
                  description: >-
                    The resource name of the job that created this dataset
                    (e.g., batch inference job).

                    Used for lineage tracking to understand dataset provenance.
                estimatedTokenCount:
                  type: string
                  format: int64
                  description: The estimated number of tokens in the dataset.
                  readOnly: true
                averageTurnCount:
                  type: number
                  format: float
                  description: >-
                    An estimate of the average number of turns per sample in the
                    dataset.
                  readOnly: true
              title: |-
                The properties of the Dataset being updated. `dataset.name` must
                be populated with the updated resource's name.
        description: |-
          The properties of the Dataset being updated. `dataset.name` must
          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Dataset
      operationId: Gateway_DeleteDataset
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dataset_id
          in: path
          required: true
          description: The Dataset Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/datasets/{dataset_id}:getDownloadEndpoint:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Dataset Download Endpoint
      operationId: Gateway_GetDatasetDownloadEndpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetDatasetDownloadEndpointResponse'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: downloadLineage
          description: |-
            If true, downloads entire lineage chain (all related datasets).
            Filenames will be prefixed with dataset IDs to avoid collisions.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dataset_id
          in: path
          required: true
          description: The Dataset Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/datasets/{dataset_id}:getUploadEndpoint:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Get Dataset Upload Endpoint
      operationId: Gateway_GetDatasetUploadEndpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetDatasetUploadEndpointResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dataset_id
          in: path
          required: true
          description: The Dataset Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayGetDatasetUploadEndpointBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/datasets/{dataset_id}:splitDataset:
    servers:
      - url: https://api.fireworks.ai
    post:
      operationId: Gateway_SplitDataset
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewaySplitDatasetResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dataset_id
          in: path
          required: true
          description: The Dataset Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewaySplitDatasetBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/datasets/{dataset_id}:validateUpload:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Validate Dataset Upload
      operationId: Gateway_ValidateDatasetUpload
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dataset_id
          in: path
          required: true
          description: The Dataset Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayValidateDatasetUploadBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deployedModels:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List LoRAs
      operationId: Gateway_ListDeployedModels
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListDeployedModelsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of deployed models to return. The maximum
            page_size is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListDeployedModels call.
            Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListDeployedModels must match the call that provided the
            page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Only depoyed models satisfying the provided filter (if specified)
            will be

            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Load LoRA
      operationId: Gateway_CreateDeployedModel
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeployedModel'
      parameters:
        - name: replaceMergedAddon
          description: >-
            Merges new addon to the base model, while unmerging/deleting any
            existing addon in the deployment. Must be specified for hot reload
            deployments
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayDeployedModel'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deployedModels/{deployed_model_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get LoRA
      operationId: Gateway_GetDeployedModel
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeployedModel'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployed_model_id
          in: path
          required: true
          description: The Deployed Model Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update LoRA
      operationId: Gateway_UpdateDeployedModel
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeployedModel'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployed_model_id
          in: path
          required: true
          description: The Deployed Model Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                description:
                  type: string
                  description: Description of the resource.
                createTime:
                  type: string
                  format: date-time
                  description: The creation time of the resource.
                  readOnly: true
                model:
                  type: string
                  title: |-
                    The resource name of the model to be deployed.
                    e.g. accounts/my-account/models/my-model
                deployment:
                  type: string
                  description: >-
                    The resource name of the base deployment the model is
                    deployed to.
                default:
                  type: boolean
                  description: >-
                    If true, this is the default target when querying this model
                    without

                    the `#<deployment>` suffix.

                    The first deployment a model is deployed to will have this
                    field set to true.
                state:
                  $ref: '#/components/schemas/gatewayDeployedModelState'
                  description: The state of the deployed model.
                  readOnly: true
                serverless:
                  type: boolean
                  title: True if the underlying deployment is managed by Fireworks
                status:
                  $ref: '#/components/schemas/gatewayStatus'
                  description: Contains model deploy/undeploy details.
                  readOnly: true
                public:
                  type: boolean
                  description: If true, the deployed model will be publicly reachable.
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the deployed model.
                  readOnly: true
              title: >-
                The properties of the deployed model being updated.
                `deployed_model.name` must

                be populated with the updated resource's name.
        description: >-
          The properties of the deployed model being updated.
          `deployed_model.name` must

          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Unload LoRA
      operationId: Gateway_DeleteDeployedModel
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployed_model_id
          in: path
          required: true
          description: The Deployed Model Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deploymentShapes:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Deployment Shapes
      operationId: Gateway_ListDeploymentShapes
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListDeploymentShapesResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of deployments to return. The maximum page_size
            is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListDeploymentShapes call.
            Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListDeploymentShapes must match the call that provided
            the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Only deployment satisfying the provided filter (if specified) will
            be

            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "create_time".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: targetModel
          description: >-
            Target model that the returned deployment shapes should be
            compatible with.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: |-
        CRUD APIs for deployment shape.
        Create Deployment Shape
      operationId: Gateway_CreateDeploymentShape
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeploymentShape'
      parameters:
        - name: deploymentShapeId
          description: >-
            The ID of the deployment shape. If not specified, a random ID will
            be generated.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayDeploymentShape'
        description: The properties of the deployment shape being created.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deploymentShapes/{deployment_shape_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Deployment Shape
      operationId: Gateway_GetDeploymentShape
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeploymentShape'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: skipShapeValidation
          description: |-
            If true, returns the latest version regardless of validation status.
            By default, returns the latest validated version.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_shape_id
          in: path
          required: true
          description: The Deployment Shape Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Deployment Shape
      operationId: Gateway_UpdateDeploymentShape
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeploymentShape'
      parameters:
        - name: fromLatestValidated
          description: >-
            When true, the update will use the latest validated version snapshot
            as the base

            for fields not present in the update mask; otherwise, the current
            shape is used.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_shape_id
          in: path
          required: true
          description: The Deployment Shape Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                  description: >-
                    Human-readable display name of the deployment shape. e.g.
                    "My Deployment Shape"

                    Must be fewer than 64 characters long.
                description:
                  type: string
                  description: >-
                    The description of the deployment shape. Must be fewer than
                    1000 characters long.
                createTime:
                  type: string
                  format: date-time
                  description: The creation time of the deployment shape.
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the deployment shape.
                  readOnly: true
                baseModel:
                  type: string
                  title: >-
                    The base model name. e.g.
                    accounts/fireworks/models/falcon-7b
                modelType:
                  type: string
                  description: The model type of the base model.
                  readOnly: true
                parameterCount:
                  type: string
                  format: int64
                  description: The parameter count of the base model .
                  readOnly: true
                acceleratorCount:
                  type: integer
                  format: int32
                  description: >-
                    The number of accelerators used per replica.

                    If not specified, the default is the estimated minimum
                    required by the base model.
                acceleratorType:
                  $ref: '#/components/schemas/gatewayAcceleratorType'
                  description: |-
                    The type of accelerator to use.
                    If not specified, the default is NVIDIA_A100_80GB.
                precision:
                  $ref: '#/components/schemas/DeploymentPrecision'
                  description: The precision with which the model should be served.
                disableDeploymentSizeValidation:
                  type: boolean
                  description: If true, the deployment size validation is disabled.
                enableAddons:
                  type: boolean
                  description: >-
                    If true, LORA addons are enabled for deployments created
                    from this shape.
                draftTokenCount:
                  type: integer
                  format: int32
                  description: >-
                    The number of candidate tokens to generate per step for
                    speculative

                    decoding.

                    Default is the base model's draft_token_count.
                draftModel:
                  type: string
                  description: >-
                    The draft model name for speculative decoding. e.g.
                    accounts/fireworks/models/my-draft-model

                    If empty, speculative decoding using a draft model is
                    disabled.

                    Default is the base model's default_draft_model.

                    Deprecated: set default_draft_model on the base model
                    instead.
                ngramSpeculationLength:
                  type: integer
                  format: int32
                  description: >-
                    The length of previous input sequence to be considered for
                    N-gram speculation.
                disableSpeculativeDecoding:
                  type: boolean
                  description: >-
                    If true, speculative decoding is disabled for deployments
                    created from this shape,

                    even if the base model has default draft model settings.
                enableSessionAffinity:
                  type: boolean
                  description: Whether to apply sticky routing based on `user` field.
                numLoraDeviceCached:
                  type: integer
                  format: int32
                  title: How many LORA adapters to keep on GPU side for caching
                maxContextLength:
                  type: integer
                  format: int32
                  description: >-
                    The maximum context length supported by the model (context
                    window).

                    If set to 0 or not specified, the model's default maximum
                    context length will be used.
                presetType:
                  $ref: '#/components/schemas/DeploymentShapePresetType'
                  description: >-
                    Type of deployment shape for different deployment
                    configurations.
              title: >-
                The properties of the deployment shape being updated.
                `deployment_shape.name` must

                be populated with the updated resource's name.
              required:
                - baseModel
        description: >-
          The properties of the deployment shape being updated.
          `deployment_shape.name` must

          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Deployment Shape
      operationId: Gateway_DeleteDeploymentShape
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_shape_id
          in: path
          required: true
          description: The Deployment Shape Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deploymentShapes/{deployment_shape_id}/versions:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Deployment Shapes Versions
      operationId: Gateway_ListDeploymentShapeVersions
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayListDeploymentShapeVersionsResponse
      parameters:
        - name: pageSize
          description: >-
            The maximum number of deployment shape versions to return. The
            maximum page_size is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListDeploymentShapeVersions
            call. Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListDeploymentShapeVersions must match the call that
            provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Only deployment shape versions satisfying the provided filter (if
            specified) will be

            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "create_time".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_shape_id
          in: path
          required: true
          description: The Deployment Shape Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deploymentShapes/{deployment_shape_id}/versions/{version_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Deployment Shape Version
      operationId: Gateway_GetDeploymentShapeVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeploymentShapeVersion'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_shape_id
          in: path
          required: true
          description: The Deployment Shape Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Deployment Shape Version
      operationId: Gateway_UpdateDeploymentShapeVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeploymentShapeVersion'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_shape_id
          in: path
          required: true
          description: The Deployment Shape Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                createTime:
                  type: string
                  format: date-time
                  description: >-
                    The creation time of the deployment shape version. Lists
                    will be ordered by this field.
                  readOnly: true
                snapshot:
                  $ref: '#/components/schemas/gatewayDeploymentShape'
                  description: Full snapshot of the Deployment Shape at this version.
                  readOnly: true
                validated:
                  type: boolean
                  description: If true, this version has been validated.
                public:
                  type: boolean
                  description: If true, this version will be publicly readable.
                latestValidated:
                  type: boolean
                  description: >-
                    If true, this version is the latest validated version.

                    Only one version of the shape can be the latest validated
                    version.
                  readOnly: true
              title: >-
                The properties of the deployment shape version being updated.
                `deployment_shape_version.name` must

                be populated with the updated resource's name.
        description: >-
          The properties of the deployment shape version being updated.
          `deployment_shape_version.name` must

          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deployments:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Deployments
      operationId: Gateway_ListDeployments
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListDeploymentsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of deployments to return. The maximum page_size
            is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListDeployments call. Provide
            this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListDeployments must match the call that provided the
            page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Only deployment satisfying the provided filter (if specified) will
            be

            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "create_time".
          in: query
          required: false
          schema:
            type: string
        - name: showDeleted
          description: If set, DELETED deployments will be included.
          in: query
          required: false
          schema:
            type: boolean
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Deployment
      operationId: Gateway_CreateDeployment
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeployment'
      parameters:
        - name: disableAutoDeploy
          description: >-
            By default, a deployment created with a currently undeployed base
            model

            will be deployed to this deployment. If true, this auto-deploy
            function

            is disabled.
          in: query
          required: false
          schema:
            type: boolean
        - name: disableSpeculativeDecoding
          description: >-
            By default, a deployment will use the speculative decoding settings
            from

            the base model. If true, this will disable speculative decoding.
          in: query
          required: false
          schema:
            type: boolean
        - name: deploymentId
          description: >-
            The ID of the deployment. If not specified, a random ID will be
            generated.
          in: query
          required: false
          schema:
            type: string
        - name: validateOnly
          description: >-
            If true, this will not create the deployment, but will return the
            deployment

            that would be created.
          in: query
          required: false
          schema:
            type: boolean
        - name: skipShapeValidation
          description: >-
            By default, a deployment will ensure the deployment shape provided
            is validated.

            If true, we will not require the deployment shape to be validated.
          in: query
          required: false
          schema:
            type: boolean
        - name: skipImageTagValidation
          description: >-
            If true, skip the image tag policy validation that blocks certain
            image tags.

            This allows creating deployments with image tags that would
            otherwise be blocked.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayDeployment'
        description: The properties of the deployment being created.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deployments/{deployment_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Deployment
      operationId: Gateway_GetDeployment
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeployment'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_id
          in: path
          required: true
          description: The Deployment Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Deployment
      operationId: Gateway_UpdateDeployment
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeployment'
      parameters:
        - name: skipShapeValidation
          description: >-
            By default, updating a deployment shape will ensure the new
            deployment shape provided is validated.

            If true, we will not require the deployment shape to be validated.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_id
          in: path
          required: true
          description: The Deployment Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                  description: >-
                    Human-readable display name of the deployment. e.g. "My
                    Deployment"

                    Must be fewer than 64 characters long.
                description:
                  type: string
                  description: Description of the deployment.
                createTime:
                  type: string
                  format: date-time
                  description: The creation time of the deployment.
                  readOnly: true
                expireTime:
                  type: string
                  format: date-time
                  description: >-
                    Deprecated: This field is deprecated and no longer causes
                    auto-deletion.

                    The time at which this deployment will automatically be
                    deleted.
                purgeTime:
                  type: string
                  format: date-time
                  description: The time at which the resource will be hard deleted.
                  readOnly: true
                deleteTime:
                  type: string
                  format: date-time
                  description: The time at which the resource will be soft deleted.
                  readOnly: true
                state:
                  $ref: '#/components/schemas/gatewayDeploymentState'
                  description: The state of the deployment.
                  readOnly: true
                status:
                  $ref: '#/components/schemas/gatewayStatus'
                  description: >-
                    Detailed status information regarding the most recent
                    operation.
                  readOnly: true
                annotations:
                  type: object
                  additionalProperties:
                    type: string
                  description: >-
                    Annotations to identify deployment properties.

                    Key/value pairs may be used by external tools or other
                    services.

                    The "image-tag-reason" key is redacted from API responses
                    for non-superuser principals.
                minReplicaCount:
                  type: integer
                  format: int32
                  description: |-
                    The minimum number of replicas.
                    If not specified, the default is 0.
                maxReplicaCount:
                  type: integer
                  format: int32
                  description: |-
                    The maximum number of replicas.
                    If not specified, the default is max(min_replica_count, 1).
                    May be set to 0 to downscale the deployment to 0.
                maxWithRevocableReplicaCount:
                  type: integer
                  format: int32
                  description: >-
                    max_with_revocable_replica_count is max replica count
                    including revocable capacity.

                    The max revocable capacity will be
                    max_with_revocable_replica_count - max_replica_count.
                desiredReplicaCount:
                  type: integer
                  format: int32
                  description: >-
                    The desired number of replicas for this deployment. This
                    represents the target

                    replica count that the system is trying to achieve.
                  readOnly: true
                replicaCount:
                  type: integer
                  format: int32
                  readOnly: true
                autoscalingPolicy:
                  $ref: '#/components/schemas/gatewayAutoscalingPolicy'
                baseModel:
                  type: string
                  title: >-
                    The base model name. e.g.
                    accounts/fireworks/models/falcon-7b
                acceleratorCount:
                  type: integer
                  format: int32
                  description: >-
                    The number of accelerators used per replica.

                    If not specified, the default is the estimated minimum
                    required by the

                    base model.
                acceleratorType:
                  $ref: '#/components/schemas/gatewayAcceleratorType'
                  description: The type of accelerator to use.
                precision:
                  $ref: '#/components/schemas/DeploymentPrecision'
                  description: The precision with which the model should be served.
                cluster:
                  type: string
                  description: >-
                    If set, this deployment is deployed to a cloud-premise
                    cluster.
                  readOnly: true
                enableAddons:
                  type: boolean
                  description: If true, PEFT addons are enabled for this deployment.
                draftTokenCount:
                  type: integer
                  format: int32
                  description: >-
                    The number of candidate tokens to generate per step for
                    speculative

                    decoding.

                    Default is the base model's draft_token_count. Set

                    CreateDeploymentRequest.disable_speculative_decoding to
                    false to disable

                    this behavior.
                draftModel:
                  type: string
                  description: >-
                    The draft model name for speculative decoding. e.g.
                    accounts/fireworks/models/my-draft-model

                    If empty, speculative decoding using a draft model is
                    disabled.

                    Default is the base model's default_draft_model. Set

                    CreateDeploymentRequest.disable_speculative_decoding to
                    false to disable

                    this behavior.
                ngramSpeculationLength:
                  type: integer
                  format: int32
                  description: >-
                    The length of previous input sequence to be considered for
                    N-gram speculation.
                enableSessionAffinity:
                  type: boolean
                  description: |-
                    Whether to apply sticky routing based on `user` field.
                    Serverless will be set to true when creating deployment.
                directRouteApiKeys:
                  type: array
                  items:
                    type: string
                  description: >-
                    The set of API keys used to access the direct route
                    deployment. If direct routing is not enabled, this field is
                    unused.
                numPeftDeviceCached:
                  type: integer
                  format: int32
                  title: How many peft adapters to keep on gpu side for caching
                directRouteType:
                  $ref: '#/components/schemas/gatewayDirectRouteType'
                  description: >-
                    If set, this deployment will expose an endpoint that
                    bypasses the Fireworks API gateway.
                directRouteHandle:
                  type: string
                  description: >-
                    The handle for calling a direct route. The meaning of the
                    handle depends on the

                    direct route type of the deployment:
                       INTERNET                    -> The host name for accessing the deployment
                       GCP_PRIVATE_SERVICE_CONNECT -> The service attachment name used to create the PSC endpoint.
                       AWS_PRIVATELINK             -> The service name used to create the VPC endpoint.
                  readOnly: true
                deploymentTemplate:
                  type: string
                  description: >-
                    The name of the deployment template to use for this
                    deployment. Only

                    available to enterprise accounts.
                autoTune:
                  $ref: '#/components/schemas/gatewayAutoTune'
                  description: The performance profile to use for this deployment.
                placement:
                  $ref: '#/components/schemas/gatewayPlacement'
                  description: >-
                    The desired geographic region where the deployment must be
                    placed.

                    If unspecified, the default is the GLOBAL multi-region.
                region:
                  $ref: '#/components/schemas/gatewayRegion'
                  description: >-
                    The geographic region where the deployment is presently
                    located. This region may change

                    over time, but within the `placement` constraint.
                  readOnly: true
                maxContextLength:
                  type: integer
                  format: int32
                  description: >-
                    The maximum context length supported by the model (context
                    window).

                    If set to 0 or not specified, the model's default maximum
                    context length will be used.
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the deployment.
                  readOnly: true
                disableDeploymentSizeValidation:
                  type: boolean
                  description: Whether the deployment size validation is disabled.
                enableHotLoad:
                  type: boolean
                  description: Whether to use hot load for this deployment.
                hotLoadBucketType:
                  $ref: '#/components/schemas/DeploymentHotLoadBucketType'
                  title: >-
                    hot load bucket name, indicate what type of storage to use
                    for hot load
                enableHotReloadLatestAddon:
                  type: boolean
                  description: >-
                    Allows up to 1 addon at a time to be loaded, and will merge
                    it into the base model.
                deploymentShape:
                  type: string
                  description: >-
                    The name of the deployment shape that this deployment is
                    using.

                    On the server side, this will be replaced with the
                    deployment shape version name.
                activeModelVersion:
                  type: string
                  description: >-
                    The model version that is currently active and applied to
                    running replicas of a deployment.
                targetModelVersion:
                  type: string
                  description: >-
                    The target model version that is being rolled out to the
                    deployment.

                    In a ready steady state, the target model version is the
                    same as the active model version.
                replicaStats:
                  $ref: '#/components/schemas/gatewayReplicaStats'
                  description: >-
                    Per-replica deployment status counters. Provides visibility
                    into the deployment process

                    by tracking replicas in different stages of the deployment
                    lifecycle.
                  readOnly: true
                hotLoadBucketUrl:
                  type: string
                  title: >-
                    For hot load bucket location

                    e.g for s3: s3://mybucket/<object_path>; for GCS:
                    gs://mybucket/<object_path>, no trailing slash
                pricingPlanId:
                  type: string
                  description: >-
                    Optional pricing plan ID for custom billing configuration.

                    If set, this deployment will use the pricing plan's billing
                    rules

                    instead of default billing behavior.
                hotLoadTrainerJob:
                  type: string
                  title: >-
                    Trainer job whose hot_load_bucket_url this deployment should
                    use.

                    At creation, the trainer's bucket URL is copied into this
                    deployment's

                    hot_load_bucket_url. The deployment continues working after
                    the trainer

                    is deleted (snapshot-at-creation semantics).

                    Format: accounts/{account}/rlorTrainerJobs/{job}
              title: >-
                The properties of the deployment being updated.
                `deployment.name` must

                be populated with the updated resource's name.
              required:
                - baseModel
        description: |-
          The properties of the deployment being updated. `deployment.name` must
          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Deployment
      operationId: Gateway_DeleteDeployment
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: hard
          description: If true, this will perform a hard deletion.
          in: query
          required: false
          schema:
            type: boolean
        - name: ignoreChecks
          description: >-
            If true, this will ignore checks and force the deletion of a
            deployment that is currently

            deployed and is in use.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_id
          in: path
          required: true
          description: The Deployment Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deployments/{deployment_id}/ledger:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get ledger
      operationId: Gateway_GetLedger
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetLedgerResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_id
          in: path
          required: true
          description: The Deployment Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Reset ledger for hot load
      operationId: Gateway_ResetLedger
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_id
          in: path
          required: true
          description: The Deployment Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deployments/{deployment_id}/shards:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Deployment Shards
      operationId: Gateway_ListDeploymentShards
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListDeploymentShardsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of deployment shards to return. The maximum
            page_size is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListDeploymentShards call.
            Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListDeploymentShards must match the call that provided
            the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Only deployment shard satisfying the provided filter (if specified)
            will be

            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "create_time".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_id
          in: path
          required: true
          description: The Deployment Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deployments/{deployment_id}/terminationMessage:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get termination message from deployment
      operationId: Gateway_GetTerminationMessage
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetTerminationMessageResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_id
          in: path
          required: true
          description: The Deployment Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deployments/{deployment_id}:metrics:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Deployment Metrics (Deprecated)
      operationId: Gateway_GetDeploymentMetrics
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetDeploymentMetricsResponse'
      parameters:
        - name: timeRange
          description: >-
            The time range to fetch metrics for (e.g. "1m", "10m", "2h").
            Defaults to 10m.
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_id
          in: path
          required: true
          description: The Deployment Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deployments/{deployment_id}:scale:
    servers:
      - url: https://api.fireworks.ai
    patch:
      summary: Scale Deployment to a specific number of replicas or to zero
      operationId: Gateway_ScaleDeployment
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_id
          in: path
          required: true
          description: The Deployment Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayScaleDeploymentBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/deployments/{deployment_id}:undelete:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Undelete Deployment
      operationId: Gateway_UndeleteDeployment
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeployment'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: deployment_id
          in: path
          required: true
          description: The Deployment Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayUndeleteDeploymentBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/developerPasses:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Developer Passes
      operationId: Gateway_ListDeveloperPasses
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListDeveloperPassesResponse'
      parameters:
        - name: pageSize
          description: Maximum number of developer passes to return.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: Page token from a previous ListDeveloperPasses call.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: Filter expression (e.g., "state=ACTIVE")
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: Order by expression (e.g., "create_time desc")
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: |-
        CRUD APIs for developer passes.
        Create Developer Pass
      operationId: Gateway_CreateDeveloperPass
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeveloperPass'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayDeveloperPass'
        description: The developer pass to create.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/developerPasses/{developer_passe_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Developer Pass
      operationId: Gateway_GetDeveloperPass
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeveloperPass'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: developer_passe_id
          in: path
          required: true
          description: The Developer Passe Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Developer Pass
      operationId: Gateway_UpdateDeveloperPass
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDeveloperPass'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: developer_passe_id
          in: path
          required: true
          description: The Developer Passe Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                autoRenew:
                  type: boolean
                  description: >-
                    Whether the developer pass will automatically renew upon
                    expiry.
                endTime:
                  type: string
                  format: date-time
                  description: The time at which the developer pass ends.
                  readOnly: true
                createTime:
                  type: string
                  format: date-time
                  description: Creation timestamp.
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  description: Last update timestamp.
                  readOnly: true
                state:
                  $ref: '#/components/schemas/gatewayDeveloperPassState'
                  description: State of the developer pass (active or expired).
                lastRenewTime:
                  type: string
                  format: date-time
                  description: The time at which the developer pass was last renewed.
                  readOnly: true
              title: The developer pass to update.
        description: The developer pass to update.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/dpoJobs:
    servers:
      - url: https://api.fireworks.ai
    get:
      operationId: Gateway_ListDpoJobs
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListDpoJobsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of dpo jobs to return. The maximum page_size is
            200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListDpoJobs call. Provide
            this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListDpoJobs must match the call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Filter criteria for the returned jobs. See
            https://google.aip.dev/160 for the filter syntax specification.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      operationId: Gateway_CreateDpoJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDpoJob'
      parameters:
        - name: dpoJobId
          description: ID of the DPO job, a random ID will be generated if not specified.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayDpoJob'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/dpoJobs/{dpo_job_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      operationId: Gateway_GetDpoJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDpoJob'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dpo_job_id
          in: path
          required: true
          description: The Dpo Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      operationId: Gateway_DeleteDpoJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dpo_job_id
          in: path
          required: true
          description: The Dpo Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/dpoJobs/{dpo_job_id}:cancel:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Cancel Dpo Job
      operationId: Gateway_CancelDpoJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dpo_job_id
          in: path
          required: true
          description: The Dpo Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCancelDpoJobBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/dpoJobs/{dpo_job_id}:getMetricsFileEndpoint:
    servers:
      - url: https://api.fireworks.ai
    get:
      operationId: Gateway_GetDpoJobMetricsFileEndpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetDpoJobMetricsFileResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dpo_job_id
          in: path
          required: true
          description: The Dpo Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/dpoJobs/{dpo_job_id}:resume:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Resume Dpo Job
      operationId: Gateway_ResumeDpoJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayDpoJob'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: dpo_job_id
          in: path
          required: true
          description: The Dpo Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayResumeDpoJobBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluationJobs:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Evaluation Jobs
      operationId: Gateway_ListEvaluationJobs
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListEvaluationJobsResponse'
      parameters:
        - name: pageSize
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          in: query
          required: false
          schema:
            type: string
        - name: filter
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Evaluation Job
      operationId: Gateway_CreateEvaluationJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluationJob'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCreateEvaluationJobBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluationJobs/{evaluation_job_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Evaluation Job
      operationId: Gateway_GetEvaluationJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluationJob'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluation_job_id
          in: path
          required: true
          description: The Evaluation Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Evaluation Job
      operationId: Gateway_DeleteEvaluationJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluation_job_id
          in: path
          required: true
          description: The Evaluation Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluationJobs/{evaluation_job_id}:getExecutionLogEndpoint:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Evaluation Job execution logs (stream log endpoint + tracing IDs).
      operationId: Gateway_GetEvaluationJobExecutionLogEndpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayGetEvaluationJobExecutionLogEndpointResponse
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluation_job_id
          in: path
          required: true
          description: The Evaluation Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluations:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Evaluations
      operationId: Gateway_ListEvaluations
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListEvaluationsResponse'
      parameters:
        - name: pageSize
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          in: query
          required: false
          schema:
            type: string
        - name: filter
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Evaluation
      operationId: Gateway_CreateEvaluation
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluation'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCreateEvaluationBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluations/{evaluation_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Evaluation
      operationId: Gateway_GetEvaluation
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluation'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluation_id
          in: path
          required: true
          description: The Evaluation Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: |-
        TODO: Add Update Evaluation
        Update Evaluation
        rpc UpdateEvaluation(UpdateEvaluationRequest) returns (Evaluation) {
          option (google.api.http) = {
            patch: "/v1/{evaluation.name=accounts/*/evaluations/*}"
            body: "evaluation"
          };
        }
        Delete Evaluation
      operationId: Gateway_DeleteEvaluation
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluation_id
          in: path
          required: true
          description: The Evaluation Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluations/{evaluation_id}:preview:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Preview an evaluation with sample data
      operationId: Gateway_PreviewEvaluation
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayPreviewEvaluationResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluation_id
          in: path
          required: true
          description: The Evaluation Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayPreviewEvaluationBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluations:validateAssertions:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Validate evaluation assertions
      operationId: Gateway_ValidateAssertions
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayValidateAssertionsResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayValidateAssertionsBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Evaluators
      description: Lists all evaluators for an account with pagination support.
      operationId: Gateway_ListEvaluators
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListEvaluatorsResponse'
      parameters:
        - name: pageSize
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          in: query
          required: false
          schema:
            type: string
        - name: filter
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Evaluator (Deprecated)
      description: 'Deprecated: Use CreateEvaluatorV2 instead'
      operationId: Gateway_CreateEvaluator
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluator'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCreateEvaluatorBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators/{evaluator_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Evaluator
      description: >-
        Retrieves an evaluator by name. Use this to monitor build progress after

        creation (**step 6** in the [Create
        Evaluator](/api-reference/create-evaluator) workflow).


        Possible states:


        - `BUILDING` - Environment is being prepared

        - `ACTIVE` - Evaluator is ready to use

        - `BUILD_FAILED` - Check build logs via [Get Evaluator Build Log
        Endpoint](/api-reference/get-evaluator-build-log-endpoint)
      operationId: Gateway_GetEvaluator
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluator'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Evaluator
      description: >-
        Updates evaluator metadata (display_name, description, default_dataset).

        Changing `requirements` or `entry_point` triggers a rebuild. To upload
        new

        source code, set `prepare_code_upload: true` then follow the upload
        flow.
      operationId: Gateway_UpdateEvaluator
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluator'
      parameters:
        - name: prepareCodeUpload
          description: >-
            If true, prepare a new code upload/build attempt by transitioning
            the

            evaluator to BUILDING state. Can be used without update_mask.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                description:
                  type: string
                createTime:
                  type: string
                  format: date-time
                  readOnly: true
                createdBy:
                  type: string
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  readOnly: true
                state:
                  $ref: '#/components/schemas/gatewayEvaluatorState'
                  readOnly: true
                criteria:
                  type: array
                  items:
                    type: object
                    $ref: '#/components/schemas/gatewayCriterion'
                  title: >-
                    Criteria for the evaluator, it should produce a score for
                    the metric (name of criteria)

                    Used for eval3 with UI upload path
                requirements:
                  type: string
                  title: Content for the requirements.txt for package installation
                entryPoint:
                  type: string
                  title: >-
                    entry point of the evaluator inside the codebase. In
                    module::function or path::function format
                status:
                  $ref: '#/components/schemas/gatewayStatus'
                  title: >-
                    Status of the evaluator, used to expose build status to the
                    user
                  readOnly: true
                commitHash:
                  type: string
                  title: >-
                    Commit hash of this evaluator from the user's original
                    codebase
                source:
                  $ref: '#/components/schemas/EvaluatorSource'
                  description: Source information for the evaluator codebase.
                defaultDataset:
                  type: string
                  title: Default dataset that is associated with the evaluator
              title: The evaluator to update. The name must be set.
        description: The evaluator to update. The name must be set.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Evaluator
      description: Deletes an evaluator and its associated versions and build artifacts.
      operationId: Gateway_DeleteEvaluator
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators/{evaluator_id}/versions:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Evaluator Revisions
      operationId: Gateway_ListEvaluatorVersions
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListEvaluatorVersionsResponse'
      parameters:
        - name: pageSize
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          in: query
          required: false
          schema:
            type: string
        - name: filter
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            Default order should be reverse chronological (newest first) per
            AIP-162.
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators/{evaluator_id}/versions/{version_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: |-
        CRUD APIs for evaluator revisions (AIP-162).
        Get Evaluator Revision
      operationId: Gateway_GetEvaluatorVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluatorVersion'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: >-
        Delete Evaluator Revision (deletes alias if name is an alias per
        AIP-162)
      operationId: Gateway_DeleteEvaluatorVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators/{evaluator_id}/versions/{version_id}:alias:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Alias Evaluator Revision - assign or update a user-specified alias
      operationId: Gateway_AliasEvaluatorVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluatorVersion'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayAliasEvaluatorVersionBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators/{evaluator_id}/versions/{version_id}:rollback:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Rollback Evaluator to a specific revision
      operationId: Gateway_RollbackEvaluator
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluatorVersion'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayRollbackEvaluatorBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators/{evaluator_id}:getBuildLogEndpoint:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Evaluator Build Log Endpoint
      description: |-
        Returns a signed URL to download the evaluator's build logs. Useful for
        debugging `BUILD_FAILED` state.
      operationId: Gateway_GetEvaluatorBuildLogEndpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayGetEvaluatorBuildLogEndpointResponse
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators/{evaluator_id}:getSourceCodeSignedUrl:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Evaluator Source Code Endpoint
      description: |-
        Returns a signed URL to download the evaluator's source code archive.
        Useful for debugging or reviewing the uploaded code.
      operationId: Gateway_GetEvaluatorSourceCodeEndpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayGetEvaluatorSourceCodeEndpointResponse
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators/{evaluator_id}:getUploadEndpoint:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Get Evaluator Upload Endpoint
      description: >-
        Returns signed URLs for uploading evaluator source code (**step 3** in
        the

        [Create Evaluator](/api-reference/create-evaluator) workflow). After
        receiving

        the signed URL, upload your `.tar.gz` archive using HTTP `PUT` with

        `Content-Type: application/octet-stream` header.
      operationId: Gateway_GetEvaluatorUploadEndpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetEvaluatorUploadEndpointResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayGetEvaluatorUploadEndpointBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators/{evaluator_id}:validateUpload:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Validate Evaluator Upload
      description: >-
        Triggers server-side validation of the uploaded source code (**step 5**
        in

        the [Create Evaluator](/api-reference/create-evaluator) workflow). The
        server

        extracts and processes the archive, then builds the evaluator
        environment.

        Poll [Get Evaluator](/api-reference/get-evaluator) to monitor progress.
      operationId: Gateway_ValidateEvaluatorUpload
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: evaluator_id
          in: path
          required: true
          description: The Evaluator Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayValidateEvaluatorUploadBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluators:previewEvaluator:
    servers:
      - url: https://api.fireworks.ai
    post:
      operationId: Gateway_PreviewEvaluator
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayPreviewEvaluatorResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayPreviewEvaluatorBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/evaluatorsV2:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Create Evaluator
      description: >-
        Creates a custom evaluator for scoring model outputs. Evaluators use the

        [Eval Protocol](https://evalprotocol.io) to define test cases, run model

        inference, and score responses. They are used with evaluation jobs and

        Reinforcement Fine-Tuning (RFT).


        ## Source Code Requirements


        Your project should contain:

        - `requirements.txt` - Python dependencies for your evaluator

        - `test_*.py` - Pytest test file(s) with
          [`@evaluation_test`](https://evalprotocol.io/reference/evaluation-test)
          decorated functions
        - Any additional code/modules your evaluator needs


        ## Workflow


        **Recommended:** Use the [`ep
        upload`](https://evalprotocol.io/reference/cli#ep-upload)

        CLI command to handle all these steps automatically.


        If using the API directly:


        1. Call this endpoint to create the evaluator resource

        2. Package your source directory as a `.tar.gz` (respecting
        `.gitignore`)

        3. Call [Get Evaluator Upload
        Endpoint](/api-reference/get-evaluator-upload-endpoint) to get a signed
        upload URL

        4. `PUT` the tar.gz file to the signed URL

        5. Call [Validate Evaluator
        Upload](/api-reference/validate-evaluator-upload) to trigger server-side
        validation

        6. Poll [Get Evaluator](/api-reference/get-evaluator) until ready


        Once active, reference the evaluator in [Create Evaluation
        Job](/api-reference/create-evaluation-job) or [Create Reinforcement
        Fine-tuning Job](/api-reference/create-reinforcement-fine-tuning-job).
      operationId: Gateway_CreateEvaluatorV2
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayEvaluator'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCreateEvaluatorV2Body'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/genericDeploymentTypes:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List the generic deployment types under an account
      operationId: Gateway_ListGenericDeploymentTypes
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListGenericDeploymentTypesResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of generic deployment types to return. The
            maximum page_size is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous
            ListGenericDeploymentTypeRequest call. Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListGenericDeploymentTypeRequest must match the call
            that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Only generic deployments satisfying the provided filter (if
            specified) will be

            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "create_time".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Generic Deployment Type
      operationId: Gateway_CreateGenericDeploymentType
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGenericDeploymentTypeVersion'
      parameters:
        - name: genericDeploymentTypeId
          description: The id of the generic deployment type
          in: query
          required: true
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayGenericDeploymentType'
        description: The generic deployment type being created
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/genericDeploymentTypes/{generic_deployment_type_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List all generic deployment type version
      operationId: Gateway_ListGenericDeploymentTypeVersions
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayListGenericDeploymentTypeVersionsResponse
      parameters:
        - name: pageSize
          description: >-
            The maximum number of generic deployment type versions to return.
            The maximum page_size is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous
            ListGenericDeploymentTypeVersionRequest call. Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListGenericDeploymentTypeVersionRequest must match the
            call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Only generic deployment type versions satisfying the provided filter
            (if specified) will be

            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "create_time".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: generic_deployment_type_id
          in: path
          required: true
          description: The Generic Deployment Type Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Generic Deployment Type
      operationId: Gateway_UpdateGenericDeploymentType
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGenericDeploymentTypeVersion'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: generic_deployment_type_id
          in: path
          required: true
          description: The Generic Deployment Type Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                createTime:
                  type: string
                  format: date-time
                  description: The creation time of the generic deployment type.
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the generic deployment type.
                  readOnly: true
                imageTag:
                  type: string
                  title: The image tag of the helm chart associated with the resource
              title: The updated generic deployment type
        description: The updated generic deployment type
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Generic Deployment Type
      operationId: Gateway_DeleteGenericDeploymentType
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: hard
          description: If true, this will perform a hard deletion.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: generic_deployment_type_id
          in: path
          required: true
          description: The Generic Deployment Type Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/genericDeploymentTypes/{generic_deployment_type_id}/versions/{version_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get generic deployment type version
      operationId: Gateway_GetGenericDeploymentTypeVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGenericDeploymentTypeVersion'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: generic_deployment_type_id
          in: path
          required: true
          description: The Generic Deployment Type Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete the generic deployment type version
      operationId: Gateway_DeleteGenericDeploymentTypeVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: hard
          description: If true, this will perform a hard deletion.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: generic_deployment_type_id
          in: path
          required: true
          description: The Generic Deployment Type Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/genericDeployments:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List all the generic deployments under an account
      operationId: Gateway_ListGenericDeployments
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListGenericDeploymentsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of generic deployments to return. The maximum
            page_size is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListGenericDeploymentRequest
            call. Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListGenericDeploymentRequest must match the call that
            provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Only generic deployments satisfying the provided filter (if
            specified) will be

            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "create_time".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Generic Deployment
      operationId: Gateway_CreateGenericDeployment
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGenericDeployment'
      parameters:
        - name: genericDeploymentId
          description: >-
            The ID of the deployment. If not specified, a random ID will be
            generated.
          in: query
          required: false
          schema:
            type: string
        - name: validateOnly
          description: >-
            If true, this will not create the deployment, but will return the
            deployment

            that would be created.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayGenericDeployment'
        description: The properties of the deployment being created.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/genericDeployments/{generic_deployment_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Generic Deployment
      operationId: Gateway_GetGenericDeployment
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGenericDeployment'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: generic_deployment_id
          in: path
          required: true
          description: The Generic Deployment Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Create Generic Deployment
      operationId: Gateway_UpdateGenericDeployment
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGenericDeployment'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: generic_deployment_id
          in: path
          required: true
          description: The Generic Deployment Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                releaseValues:
                  type: object
                  title: Helm file fields with values for the generic deployment
                placement:
                  $ref: '#/components/schemas/gatewayPlacement'
                  title: Regions where the deployment should be placed
                createTime:
                  type: string
                  format: date-time
                  description: The creation time of the generic deployment.
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the generic deployment.
                  readOnly: true
                genericDeploymentType:
                  type: string
                  title: >-
                    Name of the GenericDeploymentType or
                    GenericDeploymentTypeVersion.

                    If the value for the field carries the resource name of
                    GenericDeploymentType then the latest version will be used
                state:
                  $ref: '#/components/schemas/gatewayGenericDeploymentState'
                  readOnly: true
              title: The updated values for generic deployment
              required:
                - releaseValues
                - placement
                - genericDeploymentType
        description: The updated values for generic deployment
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Generic Deployment
      operationId: Gateway_DeleteGenericDeployment
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: hard
          description: If true, this will perform a hard deletion.
          in: query
          required: false
          schema:
            type: boolean
        - name: ignoreChecks
          description: >-
            If true, this will ignore checks and force the deletion of a
            deployment that is currently

            deployed and is in use.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: generic_deployment_id
          in: path
          required: true
          description: The Generic Deployment Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/identityProviders:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Identity Providers
      operationId: Gateway_ListIdentityProviders
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListIdentityProvidersResponse'
      parameters:
        - name: pageSize
          description: Page size
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: Page token
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: Filter expression
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: Order by
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Identity Provider
      operationId: Gateway_CreateIdentityProvider
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayIdentityProvider'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayIdentityProvider'
        description: Identity provider to create
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/identityProviders/{identity_provider_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Identity Provider
      operationId: Gateway_GetIdentityProvider
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayIdentityProvider'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: identity_provider_id
          in: path
          required: true
          description: The Identity Provider Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Identity Provider
      operationId: Gateway_UpdateIdentityProvider
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayIdentityProvider'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: identity_provider_id
          in: path
          required: true
          description: The Identity Provider Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                  title: Display name for the identity provider
                createTime:
                  type: string
                  format: date-time
                  title: Creation timestamp
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  title: Last update timestamp
                  readOnly: true
                samlConfig:
                  $ref: '#/components/schemas/gatewaySamlConfig'
                oidcConfig:
                  $ref: '#/components/schemas/gatewayOidcConfig'
                tenantDomains:
                  type: array
                  items:
                    type: string
                  title: >-
                    List of allowed domains for this identity provider

                    Example: ["example.com", "example.co.uk", "example.de"]

                    If not provided, domain will be derived from the account
                    email
                state:
                  $ref: '#/components/schemas/gatewayIdentityProviderState'
                  title: Current state of the identity provider
                  readOnly: true
                status:
                  $ref: '#/components/schemas/gatewayStatus'
                  description: Contains information about the identity provider status.
                  readOnly: true
                domainUrl:
                  type: string
                  description: The domain URL.
                  readOnly: true
                issuerUrl:
                  type: string
                  description: The OIDC issuer URL.
                  readOnly: true
                clientId:
                  type: string
                  description: The OIDC client ID.
                  readOnly: true
                enableJitUserProvisioning:
                  type: boolean
                  description: >-
                    Enable Just-In-Time (JIT) user provisioning.

                    When enabled, users are automatically created in the account
                    on first SSO login

                    if they don't already exist. When disabled, users must be
                    pre-provisioned

                    before they can authenticate via SSO.
                jitDefaultRole:
                  type: string
                  description: >-
                    Default role assigned to JIT-provisioned users.

                    Valid values: "admin", "user", "contributor",
                    "inference-user".

                    Only applies when enable_jit_user_provisioning is true and
                    RBAC V2 is enabled.

                    If empty or unset, defaults to "inference-user" (least
                    privilege).

                    If RBAC V2 is not enabled for the account, JIT users always
                    get "user" role.
                enforceSso:
                  type: boolean
                  title: >-
                    Enforce SSO authentication and restrict account access to
                    users with approved email domains.

                    When enabled:

                    - Users with email domains matching tenant_domains must
                    authenticate via this identity provider

                    - Users with other email domains are blocked (tenant_domains
                    becomes an allowlist)

                    - Superusers and API keys are exempt from this enforcement
                enableIdpInitiatedSso:
                  type: boolean
                  description: >-
                    Enable IdP-initiated SAML (Security Assertion Markup
                    Language) single sign-on.

                    When enabled, users can start the login flow from their
                    identity provider's

                    portal (e.g., Okta app launcher) instead of from the
                    Fireworks login page.

                    Only supported for SAML identity providers.
              title: Identity provider to update
        description: Identity provider to update
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Identity Provider
      operationId: Gateway_DeleteIdentityProvider
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: identity_provider_id
          in: path
          required: true
          description: The Identity Provider Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/models:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Models
      operationId: Gateway_ListModels
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListModelsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of models to return. The maximum page_size is
            200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListModels call. Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListModels must match the call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: |-
            Only model satisfying the provided filter (if specified) will be
            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Model
      operationId: Gateway_CreateModel
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayModel'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCreateModelBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/models/{model_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Model
      operationId: Gateway_GetModel
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayModel'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Model
      operationId: Gateway_UpdateModel
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayModel'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                  description: |-
                    Human-readable display name of the model. e.g. "My Model"
                    Must be fewer than 64 characters long.
                description:
                  type: string
                  description: >-
                    The description of the model. Must be fewer than 1000
                    characters long.
                createTime:
                  type: string
                  format: date-time
                  description: The creation time of the model.
                  readOnly: true
                state:
                  $ref: '#/components/schemas/gatewayModelState'
                  description: The state of the model.
                  readOnly: true
                status:
                  $ref: '#/components/schemas/gatewayStatus'
                  description: >-
                    Contains detailed message when the last model operation
                    fails.
                  readOnly: true
                kind:
                  $ref: '#/components/schemas/ModelKind'
                  description: |-
                    The kind of model.
                    If not specified, the default is HF_PEFT_ADDON.
                githubUrl:
                  type: string
                  description: The URL to GitHub repository of the model.
                huggingFaceUrl:
                  type: string
                  description: The URL to the Hugging Face model.
                baseModelDetails:
                  $ref: '#/components/schemas/gatewayBaseModelDetails'
                  description: >-
                    Base model details.

                    Required if kind is HF_BASE_MODEL. Must not be set
                    otherwise.
                peftDetails:
                  $ref: '#/components/schemas/gatewayPEFTDetails'
                  description: |-
                    PEFT addon details.
                    Required if kind is HF_PEFT_ADDON or HF_TEFT_ADDON.
                teftDetails:
                  $ref: '#/components/schemas/gatewayTEFTDetails'
                  description: >-
                    TEFT addon details.

                    Required if kind is HF_TEFT_ADDON. Must not be set
                    otherwise.
                public:
                  type: boolean
                  description: If true, the model will be publicly readable.
                conversationConfig:
                  $ref: '#/components/schemas/gatewayConversationConfig'
                  description: >-
                    If set, the Chat Completions API will be enabled for this
                    model.
                contextLength:
                  type: integer
                  format: int32
                  description: The maximum context length supported by the model.
                supportsImageInput:
                  type: boolean
                  description: If set, images can be provided as input to the model.
                supportsTools:
                  type: boolean
                  description: >-
                    If set, tools (i.e. functions) can be provided as input to
                    the model,

                    and the model may respond with one or more tool calls.
                importedFrom:
                  type: string
                  description: >-
                    The name of the the model from which this was imported. This
                    field is empty

                    if the model was not imported.
                  readOnly: true
                fineTuningJob:
                  type: string
                  description: >-
                    If the model was created from a fine-tuning job, this is the
                    fine-tuning

                    job name.
                  readOnly: true
                defaultDraftModel:
                  type: string
                  description: >-
                    The default draft model to use when creating a deployment.
                    If empty,

                    speculative decoding is disabled by default.
                defaultDraftTokenCount:
                  type: integer
                  format: int32
                  description: >-
                    The default draft token count to use when creating a
                    deployment.

                    Must be specified if default_draft_model is specified.
                deployedModelRefs:
                  type: array
                  items:
                    type: object
                    $ref: '#/components/schemas/gatewayDeployedModelRef'
                  description: Populated from GetModel API call only.
                  readOnly: true
                cluster:
                  type: string
                  description: >-
                    The resource name of the BYOC cluster to which this model
                    belongs.

                    e.g. accounts/my-account/clusters/my-cluster. Empty if it
                    belongs to

                    a Fireworks cluster.
                  readOnly: true
                deprecationDate:
                  $ref: '#/components/schemas/typeDate'
                  description: >-
                    If specified, this is the date when the serverless
                    deployment of the model will be taken down.
                calibrated:
                  type: boolean
                  description: >-
                    If true, the model is calibrated and can be deployed to
                    non-FP16 precisions.
                  readOnly: true
                tunable:
                  type: boolean
                  description: >-
                    Deprecated: V1 training stack only — LoRA only, limited
                    architecture support.

                    If the model has use_training_v2=true and your account has
                    AllowTrainingV2,

                    use supervised_lora_tunable and
                    supervised_full_parameter_tunable instead.
                  readOnly: true
                supportsLora:
                  type: boolean
                  description: Whether this model supports LoRA.
                useHfApplyChatTemplate:
                  type: boolean
                  description: >-
                    If true, the model will use the Hugging Face
                    apply_chat_template API to apply the chat template.
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the model.
                  readOnly: true
                defaultSamplingParams:
                  type: object
                  additionalProperties:
                    type: number
                    format: float
                  description: >-
                    A json object that contains the default sampling parameters
                    for the model.
                  readOnly: true
                rlTunable:
                  type: boolean
                  description: >-
                    Deprecated: V1 training stack only — LoRA only, limited
                    architecture support.

                    If the model has use_training_v2=true and your account has
                    AllowTrainingV2,

                    use rl_lora_tunable and rl_full_parameter_tunable instead.
                  readOnly: true
                trainingContextLength:
                  type: integer
                  format: int32
                  description: The maximum context length supported by the model.
                snapshotType:
                  $ref: '#/components/schemas/ModelSnapshotType'
                supportsServerless:
                  type: boolean
                  description: If true, the model has a serverless deployment.
                  readOnly: true
                supervisedLoraTunable:
                  type: boolean
                  description: >-
                    V2 only. Whether the model supports LoRA supervised
                    fine-tuning and DPO (lora_rank > 0).

                    True when a validated LORA_TRAINER training shape exists.
                  readOnly: true
                supervisedFullParameterTunable:
                  type: boolean
                  description: >-
                    V2 only. Whether the model supports full-parameter
                    supervised fine-tuning and DPO (lora_rank = 0).

                    True when a validated POLICY_TRAINER training shape exists.
                  readOnly: true
                rlLoraTunable:
                  type: boolean
                  description: >-
                    V2 only. Whether the model supports LoRA reinforcement
                    learning (lora_rank > 0).

                    True when a validated LORA_TRAINER training shape exists
                    plus a deployment shape.
                  readOnly: true
                rlFullParameterTunable:
                  type: boolean
                  description: >-
                    V2 only. Whether the model supports full-parameter
                    reinforcement learning (lora_rank = 0).

                    True when validated POLICY_TRAINER + FORWARD_ONLY training
                    shapes exist plus a deployment shape.
                  readOnly: true
              title: |-
                The properties of the model being updated. `model.name` must
                be populated with the updated resource's name.
        description: |-
          The properties of the model being updated. `model.name` must
          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Model
      operationId: Gateway_DeleteModel
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/models/{model_id}/versions:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: |-
        CRUD APIs for model versions.
        Create Model Version
      operationId: Gateway_CreateModelVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayModelVersion'
      parameters:
        - name: versionId
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayModelVersion'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/models/{model_id}/versions/{version_id}:deploy:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Deploy Model Version
      operationId: Gateway_DeployModelVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/googlelongrunningOperation'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayDeployModelVersionBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/models/{model_id}/versions:count:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Returns the number of model versions.
      operationId: Gateway_GetModelVersionCount
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetModelVersionCountResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/models/{model_id}:getDownloadEndpoint:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Model Download Endpoint
      operationId: Gateway_GetModelDownloadEndpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetModelDownloadEndpointResponse'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/models/{model_id}:getUploadEndpoint:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Get Model Upload Endpoint
      operationId: Gateway_GetModelUploadEndpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayGetModelUploadEndpointResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayGetModelUploadEndpointBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/models/{model_id}:import:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Transfer model from S3 to GCP storage
      operationId: Gateway_ImportModel
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/googlelongrunningOperation'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayImportModelBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/models/{model_id}:prepare:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Prepare Model for different precisions
      operationId: Gateway_PrepareModel
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayPrepareModelBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/models/{model_id}:validateUpload:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Validate Model Upload
      operationId: Gateway_ValidateModelUpload
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayValidateModelUploadResponse'
      parameters:
        - name: skipHfConfigValidation
          description: If true, skip the Hugging Face config validation.
          in: query
          required: false
          schema:
            type: boolean
        - name: trustRemoteCode
          description: If true, trusts remote code when validating the Hugging Face config.
          in: query
          required: false
          schema:
            type: boolean
        - name: configOnly
          description: If true, skip tokenizer and parameter name validation.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: model_id
          in: path
          required: true
          description: The Model Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/policySettings:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Returns the singleton PolicySettings for the given account.
      operationId: Gateway_GetPolicySettings
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayPolicySettings'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Updates the per-account PolicySettings singleton.
      operationId: Gateway_UpdatePolicySettings
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayPolicySettings'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                rules:
                  type: array
                  items:
                    type: object
                    $ref: '#/components/schemas/PolicySettingsModelAccessRule'
                  description: >-
                    Full model allowlist (governance doc §1); empty means
                    default-deny for all models.
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the policy settings resource.
                  readOnly: true
              title: >-
                The policy settings to update. `policy_settings.name` must be
                populated.

                Format: accounts/{account}/policySettings
        description: >-
          The policy settings to update. `policy_settings.name` must be
          populated.

          Format: accounts/{account}/policySettings
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/pricingPlans:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Pricing Plans
      operationId: Gateway_ListPricingPlans
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListPricingPlansResponse'
      parameters:
        - name: pageSize
          description: Maximum number of pricing plans to return
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: Page token from a previous ListPricingPlans call
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: Filter expression (e.g., "state=READY")
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: Order by expression (e.g., "create_time desc")
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/pricingPlans/{pricing_plan_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: |-
        CRUD APIs for pricing plans.
        Get Pricing Plan
      operationId: Gateway_GetPricingPlan
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayPricingPlan'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: pricing_plan_id
          in: path
          required: true
          description: The Pricing Plan Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/quotas:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Quotas
      description: Lists all quotas for an account.
      operationId: Gateway_ListQuotas
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListQuotasResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of quotas to return. The maximum page_size is
            200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListQuotas call. Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListQuotas must match the call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: |-
            Only quota satisfying the provided filter (if specified) will be
            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/quotas/{quota_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Quota
      description: Gets a single quota by resource name.
      operationId: Gateway_GetQuota
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayQuota'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: quota_id
          in: path
          required: true
          description: The Quota Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Quota
      description: Updates a quota.
      operationId: Gateway_UpdateQuota
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayQuota'
      parameters:
        - name: allowMissing
          description: If true, and the quota does not exist, it will be created.
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: quota_id
          in: path
          required: true
          description: The Quota Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                value:
                  type: string
                  format: int64
                  description: >-
                    The value of the quota being enforced. This may be lower
                    than the max_value

                    if the user manually lowers it.
                maxValue:
                  type: string
                  format: int64
                  description: The maximum approved value.
                usage:
                  type: number
                  format: double
                  description: The usage of the quota.
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the quota.
                  readOnly: true
              title: |-
                The properties of the quota being updated. `quota.name` must
                be populated with the updated resource's name.
        description: |-
          The properties of the quota being updated. `quota.name` must
          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/reinforcementFineTuningJobs:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Reinforcement Fine-tuning Jobs
      operationId: Gateway_ListReinforcementFineTuningJobs
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayListReinforcementFineTuningJobsResponse
      parameters:
        - name: pageSize
          description: >-
            The maximum number of fine-tuning jobs to return. The maximum
            page_size is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous
            ListReinforcementLearningFineTuningJobs call. Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListReinforcementLearningFineTuningJobs must match the
            call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Filter criteria for the returned jobs. See
            https://google.aip.dev/160 for the filter syntax specification.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Reinforcement Fine-tuning Job
      operationId: Gateway_CreateReinforcementFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayReinforcementFineTuningJob'
      parameters:
        - name: reinforcementFineTuningJobId
          description: >-
            ID of the reinforcement fine-tuning job, a random UUID will be
            generated if not specified.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayReinforcementFineTuningJob'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/reinforcementFineTuningJobs/{reinforcement_fine_tuning_job_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Reinforcement Fine-tuning Job
      operationId: Gateway_GetReinforcementFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayReinforcementFineTuningJob'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: reinforcement_fine_tuning_job_id
          in: path
          required: true
          description: The Reinforcement Fine-tuning Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Reinforcement Fine-tuning Job
      operationId: Gateway_UpdateReinforcementFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayReinforcementFineTuningJob'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: reinforcement_fine_tuning_job_id
          in: path
          required: true
          description: The Reinforcement Fine-tuning Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                createTime:
                  type: string
                  format: date-time
                  readOnly: true
                completedTime:
                  type: string
                  format: date-time
                  description: The completed time for the reinforcement fine-tuning job.
                  readOnly: true
                dataset:
                  type: string
                  description: The name of the dataset used for training.
                evaluationDataset:
                  type: string
                  description: The name of a separate dataset to use for evaluation.
                evalAutoCarveout:
                  type: boolean
                  description: Whether to auto-carve the dataset for eval.
                state:
                  $ref: '#/components/schemas/gatewayJobState'
                  readOnly: true
                status:
                  $ref: '#/components/schemas/gatewayStatus'
                  readOnly: true
                createdBy:
                  type: string
                  description: >-
                    The email address of the user who initiated this fine-tuning
                    job.
                  readOnly: true
                trainingConfig:
                  $ref: '#/components/schemas/gatewayBaseTrainingConfig'
                  description: Common training configurations.
                evaluator:
                  type: string
                  description: The evaluator resource name to use for RLOR fine-tuning job.
                wandbConfig:
                  $ref: '#/components/schemas/gatewayWandbConfig'
                  description: >-
                    The Weights & Biases team/user account for logging training
                    progress.
                awsS3Config:
                  $ref: '#/components/schemas/gatewayAwsS3Config'
                  description: The AWS configuration for S3 dataset access.
                azureBlobStorageConfig:
                  $ref: '#/components/schemas/gatewayAzureBlobStorageConfig'
                  description: The Azure configuration for Blob Storage dataset access.
                outputStats:
                  type: string
                  description: >-
                    The output dataset's aggregated stats for the evaluation
                    job.
                  readOnly: true
                jobProgress:
                  $ref: '#/components/schemas/gatewayJobProgress'
                  description: Job progress.
                  readOnly: true
                inferenceParameters:
                  $ref: >-
                    #/components/schemas/gatewayReinforcementFineTuningJobInferenceParameters
                  description: RFT inference parameters.
                chunkSize:
                  type: integer
                  format: int32
                  description: >-
                    Data chunking for rollout, default size 200, enabled when
                    dataset > 300. Valid range is 1-10,000.
                outputMetrics:
                  type: string
                  readOnly: true
                maxInferenceReplicaCount:
                  type: integer
                  format: int32
                  title: |-
                    Maximum number of replicas to use for the deployment.
                    Default is 1
                nodeCount:
                  type: integer
                  format: int32
                  description: |-
                    The number of nodes to use for the fine-tuning job.
                    If not specified, the default is 1.
                lossConfig:
                  $ref: '#/components/schemas/gatewayReinforcementLearningLossConfig'
                  description: >-
                    Reinforcement learning loss method + hyperparameters for the
                    underlying trainers.
                trainerLogsSignedUrl:
                  type: string
                  description: >-
                    The signed URL for the trainer logs file (stdout/stderr).

                    Only populated if the account has trainer log reading
                    enabled.
                  readOnly: true
                acceleratorSeconds:
                  type: object
                  additionalProperties:
                    type: string
                    format: int64
                  description: >-
                    Accelerator seconds used by the job, keyed by accelerator
                    type (e.g., "NVIDIA_H100_80GB"). Updated when job completes
                    or is cancelled.
                  readOnly: true
                maxConcurrentRollouts:
                  type: integer
                  format: int32
                  description: Maximum number of concurrent rollouts during the RFT job.
                maxConcurrentEvaluations:
                  type: integer
                  format: int32
                  description: Maximum number of concurrent evaluations during the RFT job.
                purpose:
                  $ref: '#/components/schemas/gatewayPurpose'
                  description: Scheduling purpose for this job.
              title: The reinforcement fine-tuning job to update.
              required:
                - dataset
                - evaluator
        description: The reinforcement fine-tuning job to update.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Reinforcement Fine-tuning Job
      operationId: Gateway_DeleteReinforcementFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: reinforcement_fine_tuning_job_id
          in: path
          required: true
          description: The Reinforcement Fine-tuning Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/reinforcementFineTuningJobs/{reinforcement_fine_tuning_job_id}:cancel:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Cancel Reinforcement Fine-tuning Job
      operationId: Gateway_CancelReinforcementFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: reinforcement_fine_tuning_job_id
          in: path
          required: true
          description: The Reinforcement Fine-tuning Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCancelReinforcementFineTuningJobBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/reinforcementFineTuningJobs/{reinforcement_fine_tuning_job_id}:debug:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Debug Reinforcement Fine-tuning Job
      operationId: Gateway_DebugReinforcementFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayDebugReinforcementFineTuningJobResponse
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: reinforcement_fine_tuning_job_id
          in: path
          required: true
          description: The Reinforcement Fine-tuning Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayDebugReinforcementFineTuningJobBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/reinforcementFineTuningJobs/{reinforcement_fine_tuning_job_id}:getMetricsFileEndpoint:
    servers:
      - url: https://api.fireworks.ai
    get:
      operationId: Gateway_GetReinforcementFineTuningJobMetricsFileEndpoint
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayGetReinforcementFineTuningJobMetricsFileResponse
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: reinforcement_fine_tuning_job_id
          in: path
          required: true
          description: The Reinforcement Fine-tuning Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/reinforcementFineTuningJobs/{reinforcement_fine_tuning_job_id}:resume:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Resume Reinforcement Fine-tuning Job
      operationId: Gateway_ResumeReinforcementFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayReinforcementFineTuningJob'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: reinforcement_fine_tuning_job_id
          in: path
          required: true
          description: The Reinforcement Fine-tuning Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayResumeReinforcementFineTuningJobBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/rlorTrainerJobs:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Reinforcement Fine-tuning Steps
      operationId: Gateway_ListRlorTrainerJobs
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListRlorTrainerJobsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of fine-tuning jobs to return. The maximum
            page_size is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListRlorTuningJobs call.
            Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListRlorTuningJobs must match the call that provided the
            page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Filter criteria for the returned jobs. See
            https://google.aip.dev/160 for the filter syntax specification.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Reinforcement Fine-tuning Step
      operationId: Gateway_CreateRlorTrainerJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayRlorTrainerJob'
      parameters:
        - name: rlorTrainerJobId
          description: >-
            ID of the RLOR trainer job, a random UUID will be generated if not
            specified.
          in: query
          required: false
          schema:
            type: string
        - name: trainingShape
          description: >-
            Optional validated training-shape selector for service-mode
            launches.

            Accepted formats:

            - accounts/{account}/trainingShapes/{shape}

            - accounts/{account}/trainingShapes/{shape}/versions/{version}

            - accounts/{account}/trainingShapes/{shape}/versions/latest

            When a shape (without /versions/*) is provided, the latest validated
            version is used.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayRlorTrainerJob'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/rlorTrainerJobs/{rlor_trainer_job_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Reinforcement Fine-tuning Step
      operationId: Gateway_GetRlorTrainerJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayRlorTrainerJob'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: rlor_trainer_job_id
          in: path
          required: true
          description: The Rlor Trainer Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Reinforcement Fine-tuning Step
      operationId: Gateway_DeleteRlorTrainerJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: rlor_trainer_job_id
          in: path
          required: true
          description: The Rlor Trainer Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/rlorTrainerJobs/{rlor_trainer_job_id}/checkpoints:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List checkpoints for an RLOR Trainer Job
      operationId: Gateway_ListRlorTrainerJobCheckpoints
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayListRlorTrainerJobCheckpointsResponse
      parameters:
        - name: pageSize
          description: Maximum number of checkpoints to return. Default is 50, max is 200.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: Page token from a previous call.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: rlor_trainer_job_id
          in: path
          required: true
          description: The Rlor Trainer Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/rlorTrainerJobs/{rlor_trainer_job_id}/publicLogs:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get a signed URL for the public trainer logs of an RLOR Trainer Job.
      operationId: Gateway_GetRlorTrainerJobPublicLogs
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayGetRlorTrainerJobPublicLogsResponse
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: rlor_trainer_job_id
          in: path
          required: true
          description: The Rlor Trainer Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/rlorTrainerJobs/{rlor_trainer_job_id}:cancel:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Cancel Rlor Trainer Job
      operationId: Gateway_CancelRlorTrainerJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: rlor_trainer_job_id
          in: path
          required: true
          description: The Rlor Trainer Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCancelRlorTrainerJobBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/rlorTrainerJobs/{rlor_trainer_job_id}:executeTrainStep:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Execute one training step for keep-alive Reinforcement Fine-tuning Step
      operationId: Gateway_ExecuteRlorTrainStep
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: rlor_trainer_job_id
          in: path
          required: true
          description: The Rlor Trainer Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayExecuteRlorTrainStepBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/rlorTrainerJobs/{rlor_trainer_job_id}:resume:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Resume Rlor Trainer Job
      operationId: Gateway_ResumeRlorTrainerJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayRlorTrainerJob'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: rlor_trainer_job_id
          in: path
          required: true
          description: The Rlor Trainer Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayResumeRlorTrainerJobBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/routers:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Routers
      operationId: Gateway_ListRouters
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListRoutersResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of routers to return. The maximum page_size is
            200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListRouters call. Provide
            this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListRouters must match the call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Filter criteria for the returned routers. See
            https://google.aip.dev/160 for the filter syntax specification.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Router
      operationId: Gateway_CreateRouter
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayRouter'
      parameters:
        - name: routerId
          description: ID of the router.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayRouter'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/routers/{router_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: |-
        CRUD APIs for routers.
        Get Router
      operationId: Gateway_GetRouter
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayRouter'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: router_id
          in: path
          required: true
          description: The Router Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Router
      operationId: Gateway_UpdateRouter
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayRouter'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: router_id
          in: path
          required: true
          description: The Router Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                createTime:
                  type: string
                  format: date-time
                  readOnly: true
                createdBy:
                  type: string
                  readOnly: true
                state:
                  $ref: '#/components/schemas/gatewayRouterState'
                  description: The state of the router.
                  readOnly: true
                status:
                  $ref: '#/components/schemas/gatewayStatus'
                  readOnly: true
                deployments:
                  type: array
                  items:
                    type: string
                  description: The deployment names to be covered by the router.
                model:
                  type: string
                  description: |-
                    The model name to route requests to.
                    model is only applicable to single-region deployments.
                    For multi-region deployments, model must be empty.
                weightedRandom:
                  $ref: '#/components/schemas/gatewayWeightedRandomStrategy'
                evenLoad:
                  $ref: '#/components/schemas/gatewayEvenLoadStrategy'
                aliases:
                  type: array
                  items:
                    type: string
                  title: >-
                    Aliases for the router. These are the alias names that can
                    be used to

                    route requests through the router.

                    By default, these aliases are generated by the system.

                    Examples:
                      "accounts/foo/deployments/my-deployment" or "foo/my-deployment"
                      "accounts/foo/routers/my-router"
                      "accounts/foo/models/my-base-model" - only if this is the first deployment using this model
                  readOnly: true
                autoGenerated:
                  type: boolean
                  description: >-
                    auto_generated indicates this router was generated by the
                    system for a deployment.

                    Auto-generated routers cannot be updated or deleted by the
                    user.
                  readOnly: true
                public:
                  type: boolean
                  description: >-
                    True if the router is public (any account can query the
                    underlying workload), false if the router is private

                    (only the account that owns the router can query the
                    underlying workload).
              title: |-
                The properties of the router being updated. `router.name` must
                be populated with the updated resource's name.
        description: |-
          The properties of the router being updated. `router.name` must
          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Router
      operationId: Gateway_DeleteRouter
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: router_id
          in: path
          required: true
          description: The Router Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/secrets:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Secrets
      description: >-
        Lists all secrets for an account. Note that the `value` field is not
        returned in the response for security reasons. Only the `name` and
        `key_name` fields are included for each secret.
      operationId: Gateway_ListSecrets
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListSecretsResponse'
      parameters:
        - name: pageSize
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: Unused but required to use existing ListRequest functionality.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: Unused but required to use existing ListRequest functionality.
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      operationId: Gateway_CreateSecret
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewaySecret'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewaySecret'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/secrets/{secret_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Secret
      description: >-
        Retrieves a secret by name. Note that the `value` field is not returned
        in the response for security reasons. Only the `name` and `key_name`
        fields are included.
      operationId: Gateway_GetSecret
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewaySecret'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: secret_id
          in: path
          required: true
          description: The Secret Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      operationId: Gateway_UpdateSecret
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewaySecret'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: secret_id
          in: path
          required: true
          description: The Secret Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                keyName:
                  type: string
                  title: >-
                    name of the key. In this case, it can be
                    WOLFRAM_ALPHA_API_KEY
                value:
                  type: string
                  example: sk-1234567890abcdef
                  description: >-
                    The secret value. This field is INPUT_ONLY and will not be
                    returned in GET or LIST responses

                    for security reasons. The value is only accepted when
                    creating or updating secrets.
              required:
                - keyName
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      operationId: Gateway_DeleteSecret
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: secret_id
          in: path
          required: true
          description: The Secret Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/serverlessRateLimits:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: >-
        Lists effective global rate limits for shared serverless pool
        deployments (fireworks-owned).

        Values reflect configured limits as observed in production monitoring.
      operationId: Gateway_ListAccountServerlessRateLimits
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayListAccountServerlessRateLimitsResponse
      parameters:
        - name: deployment
          description: If set, only return limits for this deployment resource name.
          in: query
          required: false
          schema:
            type: string
        - name: start
          description: >-
            Start of the time range for time series data. Defaults to 30 days
            before `end`.
          in: query
          required: false
          schema:
            type: string
            format: date-time
        - name: end
          description: >-
            End of the time range for time series data. Defaults to the request
            time.
          in: query
          required: false
          schema:
            type: string
            format: date-time
        - name: interval
          description: >-
            Step size for the time series: each point is the peak effective
            limit observed within that

            window. Defaults to 4 hours.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/serverlessTokenUsage:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: >-
        Serverless pool token throughput (tokens per minute) by time bucket and
        base model, from

        production usage metrics.
      operationId: Gateway_GetAccountServerlessTokenUsage
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayGetAccountServerlessTokenUsageResponse
      parameters:
        - name: start
          description: Start of the time range. Defaults to 30 days before `end`.
          in: query
          required: false
          schema:
            type: string
            format: date-time
        - name: end
          description: End of the time range. Defaults to the request time.
          in: query
          required: false
          schema:
            type: string
            format: date-time
        - name: interval
          description: >-
            Step size for each point (peak or average TPM within each window).
            Defaults to 4 hours.
          in: query
          required: false
          schema:
            type: string
        - name: includePeakTokensPerMinuteByBaseModel
          description: >-
            Whether to include each section in the response. At least one must
            be true; otherwise the

            request fails with INVALID_ARGUMENT.
          in: query
          required: false
          schema:
            type: boolean
        - name: includeAverageTokensPerMinuteByBaseModel
          in: query
          required: false
          schema:
            type: boolean
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/supervisedFineTuningJobs:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Supervised Fine-tuning Jobs
      operationId: Gateway_ListSupervisedFineTuningJobs
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayListSupervisedFineTuningJobsResponse
      parameters:
        - name: pageSize
          description: >-
            The maximum number of fine-tuning jobs to return. The maximum
            page_size is 200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListSupervisedFineTuningJobs
            call. Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListSupervisedFineTuningJobs must match the call that
            provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Filter criteria for the returned jobs. See
            https://google.aip.dev/160 for the filter syntax specification.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Supervised Fine-tuning Job
      operationId: Gateway_CreateSupervisedFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewaySupervisedFineTuningJob'
      parameters:
        - name: supervisedFineTuningJobId
          description: >-
            ID of the supervised fine-tuning job, a random UUID will be
            generated if not specified.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewaySupervisedFineTuningJob'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/supervisedFineTuningJobs/{supervised_fine_tuning_job_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Supervised Fine-tuning Job
      operationId: Gateway_GetSupervisedFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewaySupervisedFineTuningJob'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: supervised_fine_tuning_job_id
          in: path
          required: true
          description: The Supervised Fine-tuning Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Supervised Fine-tuning Job
      operationId: Gateway_DeleteSupervisedFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: supervised_fine_tuning_job_id
          in: path
          required: true
          description: The Supervised Fine-tuning Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/supervisedFineTuningJobs/{supervised_fine_tuning_job_id}:cancel:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Cancel Supervised Fine-tuning Job
      operationId: Gateway_CancelSupervisedFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: supervised_fine_tuning_job_id
          in: path
          required: true
          description: The Supervised Fine-tuning Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCancelSupervisedFineTuningJobBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/supervisedFineTuningJobs/{supervised_fine_tuning_job_id}:resume:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Resume Supervised Fine-tuning Job
      operationId: Gateway_ResumeSupervisedFineTuningJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewaySupervisedFineTuningJob'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: supervised_fine_tuning_job_id
          in: path
          required: true
          description: The Supervised Fine-tuning Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayResumeSupervisedFineTuningJobBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/supervisedFineTuningJobs:estimateCost:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Estimate the cost of a Supervised Fine-tuning Job
      operationId: Gateway_EstimateSupervisedFineTuningJobCost
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: >-
                  #/components/schemas/gatewayEstimateSupervisedFineTuningJobCostResponse
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewaySupervisedFineTuningJob'
        description: >-
          uses the dataset, base model, and number of epochs to determine the
          estimated cost of the fine-tuning job.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trails:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Trails
      operationId: Gateway_ListTrails
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListTrailsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of trails to return. The maximum page_size is
            200,

            values above 200 will be coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListTrails call. Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListTrails must match the call that provided the page
            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: >-
            Filter criteria for the returned trails. See
            https://google.aip.dev/160

            for the filter syntax specification.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g.
            "create_time,display_name"

            The default sort order is ascending. To specify descending order for
            a

            field, append a " desc" suffix. e.g. "create_time desc"

            If not specified, the default order is by "create_time desc".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create Trail
      operationId: Gateway_CreateTrail
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrail'
      parameters:
        - name: trailId
          description: >-
            Optional ID for the trail. If not specified, a random UUID will be
            generated.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayTrail'
        description: The trail to create
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trails/{trail_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: |-
        CRUD APIs for trails.
        Get Trail
      operationId: Gateway_GetTrail
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrail'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: trail_id
          in: path
          required: true
          description: The Trail Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Trail
      operationId: Gateway_UpdateTrail
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrail'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: trail_id
          in: path
          required: true
          description: The Trail Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                  title: Human-readable display name for the trail
                createTime:
                  type: string
                  format: date-time
                  title: Creation timestamp
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  title: Last update timestamp
                  readOnly: true
                description:
                  type: string
                  title: Optional description of what this trail is tracking
                createdBy:
                  type: string
                  title: The email address of the user who created this trail
                  readOnly: true
                langfuseConfig:
                  type: string
                  description: >-
                    Langfuse configuration for this trail, inherited from the
                    account at creation time.

                    Contains API credentials for a Project within the customer's
                    Langfuse Organization.

                    Serialized JSON format:
                    {"public_key":"pk-lf-xxx","secret_key":"sk-lf-xxx","host":"https://langfuse-prod.fireworks.ai"}

                    If empty, the default Langfuse Organization/Project will be
                    used.
                  readOnly: true
                defaultModel:
                  type: string
                  description: >-
                    Default model for requests through this trail.

                    Format: "{provider}/{model_id}" (e.g.,
                    "anthropic/claude-3-5-sonnet-20240620").

                    Can be overridden per request.
                providerKey:
                  type: string
                  description: >-
                    Provider API key for this trail.

                    When creating a trail: provide the raw API key (e.g.,
                    "sk-ant-api03-xxxx...")

                    After creation: this field contains a secret reference
                    (e.g.,
                    "accounts/{account_id}/secrets/trail-xxx-provider-key")

                    The LiteLLM gateway retrieves the actual key from Secret
                    Manager using this reference.

                    Can be overridden by specifying api_key in the request body.
              title: |-
                The properties of the trail being updated. `trail.name` must
                be populated with the updated resource's name.
        description: |-
          The properties of the trail being updated. `trail.name` must
          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Trail
      operationId: Gateway_DeleteTrail
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: trail_id
          in: path
          required: true
          description: The Trail Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trails/{trail_id}:getMetrics:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Trail Metrics (trace counts, token usage from tracing service)
      operationId: Gateway_GetTrailMetrics
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrailMetrics'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: trail_id
          in: path
          required: true
          description: The Trail Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trainingSessionJobs:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List training session jobs for an account.
      operationId: Gateway_ListTrainingSessionJobs
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListTrainingSessionJobsResponse'
      parameters:
        - name: pageSize
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          in: query
          required: false
          schema:
            type: string
        - name: filter
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create a training session job that binds an account to a shared trainer.
      operationId: Gateway_CreateTrainingSessionJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrainingSessionJob'
      parameters:
        - name: trainingSessionJobId
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayTrainingSessionJob'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trainingSessionJobs/{training_session_job_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get a training session job.
      operationId: Gateway_GetTrainingSessionJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrainingSessionJob'
      parameters:
        - name: readMask
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_session_job_id
          in: path
          required: true
          description: The Training Session Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: >-
        Delete a training session job and cascade-clean its child sessions and
        routes.
      operationId: Gateway_DeleteTrainingSessionJob
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_session_job_id
          in: path
          required: true
          description: The Training Session Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trainingSessionJobs/{training_session_job_id}/trainingSessions:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List training sessions under a training session job.
      operationId: Gateway_ListTrainingSessions
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListTrainingSessionsResponse'
      parameters:
        - name: pageSize
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          in: query
          required: false
          schema:
            type: string
        - name: filter
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_session_job_id
          in: path
          required: true
          description: The Training Session Job Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create a training session under a training session job.
      operationId: Gateway_CreateTrainingSession
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrainingSession'
      parameters:
        - name: trainingSessionId
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_session_job_id
          in: path
          required: true
          description: The Training Session Job Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayTrainingSession'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trainingSessionJobs/{training_session_job_id}/trainingSessions/{training_session_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get a training session.
      operationId: Gateway_GetTrainingSession
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrainingSession'
      parameters:
        - name: readMask
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_session_job_id
          in: path
          required: true
          description: The Training Session Job Id
          schema:
            type: string
        - name: training_session_id
          in: path
          required: true
          description: The Training Session Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trainingSessionJobs/{training_session_job_id}/trainingSessions/{training_session_id}:loadState:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Load or switch the LoRA adapter state for a training session.
      operationId: Gateway_ExecuteTrainingSessionLoadState
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrainingSession'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_session_job_id
          in: path
          required: true
          description: The Training Session Job Id
          schema:
            type: string
        - name: training_session_id
          in: path
          required: true
          description: The Training Session Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayExecuteTrainingSessionLoadStateBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trainingShapes:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Training Shapes
      operationId: Gateway_ListTrainingShapes
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListTrainingShapesResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of resources to return. Max page_size is 200;
            values above 200 are coerced to 200.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: A page token from a previous ListTrainingShapes call.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: Filter per AIP-160.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: Order by fields, default "create_time".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: |-
        CRUD APIs for training shape.
        Create Training Shape
      operationId: Gateway_CreateTrainingShape
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrainingShape'
      parameters:
        - name: trainingShapeId
          description: >-
            The ID of the training shape. If not specified, a random ID will be
            generated.

            Must follow AIP-122 segment format and start with a letter.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayTrainingShape'
        description: The properties of the training shape being created.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trainingShapes/{training_shape_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Training Shape
      operationId: Gateway_GetTrainingShape
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrainingShape'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_shape_id
          in: path
          required: true
          description: The Training Shape Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Training Shape
      operationId: Gateway_UpdateTrainingShape
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrainingShape'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_shape_id
          in: path
          required: true
          description: The Training Shape Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                  description: >-
                    Human-readable display name of the training shape. e.g.
                    "Llama3 70B H200 BF16"

                    Must be fewer than 64 characters long.
                description:
                  type: string
                  description: >-
                    The description of the training shape. Must be fewer than
                    1000 characters long.
                createTime:
                  type: string
                  format: date-time
                  description: The creation time of the training shape.
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the training shape.
                  readOnly: true
                baseModel:
                  type: string
                  title: >-
                    The base model name. e.g.
                    accounts/fireworks/models/llama-3-70b
                deploymentShapeVersion:
                  type: string
                  title: >-
                    The validated deployment shape version used for numerics
                    verification.

                    e.g.
                    accounts/fireworks/deploymentShapes/rft-qwen3-4b/versions/abcd1234
                trainerImageTag:
                  type: string
                  description: >-
                    The validated trainer runtime image tag used for numerics
                    verification.
                trainerMode:
                  $ref: '#/components/schemas/TrainingShapeTrainerMode'
                  description: Trainer mode used for the validated launch profile.
                nodeCount:
                  type: integer
                  format: int32
                  description: Node count validated for the launch profile.
                trainerShardingScheme:
                  $ref: '#/components/schemas/gatewayTrainerShardingScheme'
                  description: >-
                    Structured sharding/parallelism profile validated for the
                    trainer launch.
                modelType:
                  type: string
                  description: The model type of the base model (e.g. "llama", "qwen").
                  readOnly: true
                parameterCount:
                  type: string
                  format: int64
                  description: >-
                    The parameter count of the base model, used for
                    billion-parameter bucket matching.
                  readOnly: true
                acceleratorType:
                  $ref: '#/components/schemas/gatewayAcceleratorType'
                  description: The type of accelerator to use.
                acceleratorCount:
                  type: integer
                  format: int32
                  description: Total number of accelerators used by the job.
                baseModelWeightPrecision:
                  $ref: '#/components/schemas/gatewayWeightPrecision'
                  description: >-
                    Default precision for base weights during training (prefer
                    BF16; QLoRA/QAT omitted).
                maxSupportedContextLength:
                  type: integer
                  format: int32
                  description: Capacity limits validated for this shape.
              title: >-
                The properties of the training shape being updated;
                `training_shape.name` must be populated.
              required:
                - baseModel
                - trainerImageTag
        description: >-
          The properties of the training shape being updated;
          `training_shape.name` must be populated.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    delete:
      summary: Delete Training Shape
      operationId: Gateway_DeleteTrainingShape
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_shape_id
          in: path
          required: true
          description: The Training Shape Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trainingShapes/{training_shape_id}/versions:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Training Shapes Versions
      operationId: Gateway_ListTrainingShapeVersions
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListTrainingShapeVersionsResponse'
      parameters:
        - name: pageSize
          description: >-
            The maximum number of versions to return. Max page_size is 200;
            values above 200 are coerced.

            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: A page token from a previous ListTrainingShapeVersions call.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: Filter per AIP-160.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: Order by fields, default "create_time".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_shape_id
          in: path
          required: true
          description: The Training Shape Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/trainingShapes/{training_shape_id}/versions/{version_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get Training Shape Version
      operationId: Gateway_GetTrainingShapeVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrainingShapeVersion'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_shape_id
          in: path
          required: true
          description: The Training Shape Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update Training Shape Version
      operationId: Gateway_UpdateTrainingShapeVersion
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayTrainingShapeVersion'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: training_shape_id
          in: path
          required: true
          description: The Training Shape Id
          schema:
            type: string
        - name: version_id
          in: path
          required: true
          description: The Version Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                createTime:
                  type: string
                  format: date-time
                  title: Creation time (used for ordering desc by default)
                  readOnly: true
                snapshot:
                  $ref: '#/components/schemas/gatewayTrainingShape'
                  description: Full snapshot of the TrainingShape at this version.
                  readOnly: true
                validated:
                  type: boolean
                  description: >-
                    Whether this version has been validated through capacity
                    tests.

                    Only superusers can set this flag.
                public:
                  type: boolean
                  description: If true, this version will be publicly readable.
                latestValidated:
                  type: boolean
                  description: >-
                    If true, this version is the latest validated version (at
                    most one per shape).
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  description: Last update time of mutable fields such as validated/public.
                  readOnly: true
              title: >-
                The properties of the version being updated;
                `training_shape_version.name` must be populated.
        description: >-
          The properties of the version being updated;
          `training_shape_version.name` must be populated.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/users:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List Users
      operationId: Gateway_ListUsers
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListUsersResponse'
      parameters:
        - name: pageSize
          description: |-
            The maximum number of users to return. The maximum page_size is 200,
            values above 200 will be coerced to 200.
            If unspecified, the default is 50.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            A page token, received from a previous ListUsers call. Provide this

            to retrieve the subsequent page. When paginating, all other
            parameters

            provided to ListUsers must match the call that provided the page

            token.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: |-
            Only users satisfying the provided filter (if specified) will be
            returned. See https://google.aip.dev/160 for the filter grammar.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: >-
            A comma-separated list of fields to order by. e.g. "foo,bar"

            The default sort order is ascending. To specify a descending order
            for a

            field, append a " desc" suffix. e.g. "foo desc,bar"

            Subfields are specified with a "." character. e.g. "foo.bar"

            If not specified, the default order is by "name".
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create User
      operationId: Gateway_CreateUser
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayUser'
      parameters:
        - name: userId
          description: |-
            The user ID to use in the user name. e.g. my-user
            If not specified, a default ID is generated from user.email.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/gatewayUser'
        description: The properties of the user being created.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/users/{user_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get User
      operationId: Gateway_GetUser
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayUser'
      parameters:
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: user_id
          in: path
          required: true
          description: The User Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    patch:
      summary: Update User
      operationId: Gateway_UpdateUser
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayUser'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: user_id
          in: path
          required: true
          description: The User Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                displayName:
                  type: string
                  description: |-
                    Human-readable display name of the user. e.g. "Alice"
                    Must be fewer than 64 characters long.
                serviceAccount:
                  type: boolean
                  title: >-
                    Whether this user is a service account (can only be set by
                    admins)
                createTime:
                  type: string
                  format: date-time
                  description: The creation time of the user.
                  readOnly: true
                role:
                  type: string
                  description: >-
                    The user's role: admin, user, contributor, inference-user,
                    or custom.

                    When set to "custom", the user's permissions are governed by
                    permission_preset.
                email:
                  type: string
                  description: The user's email address.
                state:
                  $ref: '#/components/schemas/gatewayUserState'
                  description: The state of the user.
                  readOnly: true
                status:
                  $ref: '#/components/schemas/gatewayStatus'
                  description: Contains information about the user status.
                  readOnly: true
                updateTime:
                  type: string
                  format: date-time
                  description: The update time for the user.
                  readOnly: true
                permissionPreset:
                  type: string
                  description: >-
                    The permission preset for this user. Only valid when role is
                    "custom".
              title: |-
                The properties of the User being updated. `user.name` must
                be populated with the updated resource's name.
              required:
                - role
        description: |-
          The properties of the User being updated. `user.name` must
          be populated with the updated resource's name.
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/users/{user_id}/apiKeys:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: List API Keys
      operationId: Gateway_ListApiKeys
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayListApiKeysResponse'
      parameters:
        - name: pageSize
          description: >-
            Number of API keys to return in the response. Pagination support to
            be added.
          in: query
          required: false
          schema:
            type: integer
            format: int32
        - name: pageToken
          description: >-
            Token for fetching the next page of results. Pagination support to
            be added.
          in: query
          required: false
          schema:
            type: string
        - name: filter
          description: Field for filtering results.
          in: query
          required: false
          schema:
            type: string
        - name: orderBy
          description: Field for ordering results.
          in: query
          required: false
          schema:
            type: string
        - name: readMask
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
          in: query
          required: false
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: user_id
          in: path
          required: true
          description: The User Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
    post:
      summary: Create API Key
      operationId: Gateway_CreateApiKey
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayApiKey'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: user_id
          in: path
          required: true
          description: The User Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayCreateApiKeyBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/users/{user_id}/apiKeys/accounts/{account_id}/users/{user_id}:
    servers:
      - url: https://api.fireworks.ai
    get:
      summary: Get API Key
      operationId: Gateway_GetApiKey
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayApiKey'
      parameters:
        - name: keyId
          description: The key ID for the API key.
          in: path
          required: true
          schema:
            type: string
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: user_id
          in: path
          required: true
          description: The User Id
          schema:
            type: string
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/users/{user_id}/apiKeys:delete:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Delete API Key
      operationId: Gateway_DeleteApiKey
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                type: object
                properties: {}
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
        - name: user_id
          in: path
          required: true
          description: The User Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayDeleteApiKeyBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}:testeval:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: >-
        Similar to preview evaluation, but no need to create the evaluation
        entry first.
      operationId: Gateway_TestEvaluation
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/gatewayPreviewEvaluationResponse'
      parameters:
        - name: account_id
          in: path
          required: true
          description: The Account Id
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GatewayTestEvaluationBody'
        required: true
      tags:
        - gateway.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/accounts/{account_id}/datasets/{dataset_id}:upload:
    servers:
      - url: https://api.fireworks.ai
    post:
      summary: Upload Dataset Files
      description: >
        Provides a streamlined way to upload a dataset file in a single API
        request. This path can handle file sizes up to 150Mb. For larger file
        sizes use [Get Dataset Upload Endpoint](get-dataset-upload-endpoint).
      operationId: Gateway_UploadDatasetFile
      responses:
        '200':
          description: A successful response.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FileUploadResponse'
        '400':
          description: Bad Request
        '401':
          description: Unauthorized
        '404':
          description: Not Found
        '500':
          description: Internal Server Error
      parameters:
        - name: account_id
          in: path
          required: true
          description: The account id
          schema:
            type: string
        - name: dataset_id
          in: path
          required: true
          description: The dataset id
          schema:
            type: string
      requestBody:
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                file:
                  type: string
                  format: binary
        required: true
      tags:
        - gateway-extra.openapi_Gateway
      security:
        - BearerAuth: []
  /v1/responses:
    servers:
      - url: https://api.fireworks.ai/inference
    post:
      summary: Create Response
      description: >-
        Creates a model response, optionally interacting with custom tools via
        the Model Context Protocol (MCP). This endpoint supports conversational
        continuation and streaming.


        Explore our cookbooks for detailed examples:


        - [Basic MCP
        Usage](https://github.com/fw-ai/cookbook/blob/main/learn/response-api/fireworks_mcp_examples.ipynb)

        - [Streaming with
        MCP](https://github.com/fw-ai/cookbook/blob/main/learn/response-api/fireworks_mcp_with_streaming.ipynb)

        - [Conversational History with
        `previous_response_id`](https://github.com/fw-ai/cookbook/blob/main/learn/response-api/fireworks_previous_response_cookbook.ipynb)

        - [Basic
        Streaming](https://github.com/fw-ai/cookbook/blob/main/learn/response-api/fireworks_streaming_example.ipynb)

        - [Controlling Response
        Storage](https://github.com/fw-ai/cookbook/blob/main/learn/response-api/mcp_server_with_store_false_argument.ipynb)
      operationId: create_response_v1_responses_post
      security:
        - BearerAuth: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateResponse'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Response'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      tags:
        - responses.openapi_other
    get:
      summary: List Responses
      description: |-
        Get a list of all responses for the authenticated account.

        Args:
            limit: Maximum number of responses to return (default: 20, max: 100)
            after: Cursor for pagination - return responses after this ID
            before: Cursor for pagination - return responses before this ID
      operationId: list_responses_v1_responses_get
      security:
        - BearerAuth: []
      parameters:
        - name: limit
          in: query
          required: false
          schema:
            type: integer
            default: 20
            title: Limit
        - name: after
          in: query
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            title: After
        - name: before
          in: query
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            title: Before
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ResponseList'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      tags:
        - responses.openapi_other
  /v1/responses/{response_id}:
    servers:
      - url: https://api.fireworks.ai/inference
    get:
      summary: Get Response
      operationId: get_response_v1_responses__response_id__get
      security:
        - BearerAuth: []
      parameters:
        - name: response_id
          in: path
          required: true
          schema:
            type: string
            title: Response Id
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Response'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      tags:
        - responses.openapi_other
    delete:
      summary: Delete Response
      description: >-
        Deletes a model response by its ID. Once deleted, the response data will
        be gone immediately and permanently.


        The response cannot be recovered and any conversations that reference
        this response ID will no longer be able to access it.
      operationId: delete_response_v1_responses__response_id__delete
      security:
        - BearerAuth: []
      parameters:
        - name: response_id
          in: path
          required: true
          schema:
            type: string
            description: The ID of the response to delete
            title: Response Id
          description: The ID of the response to delete
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DeleteResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      tags:
        - responses.openapi_other
  /v1/completions:
    servers:
      - url: https://api.fireworks.ai/inference
    post:
      summary: Create Completion
      description: >-
        Create a completion for the provided prompt and parameters.


        For RL / agent rollouts, Fireworks inference exposes additional

        rollout-specific features:

        [`x-session-affinity` and
        `x-multi-turn-session-id`](https://docs.fireworks.ai/guides/rollout-inference#session-affinity)

        for multi-turn trajectories, and

        [MoE Router Replay
        (R3)](https://docs.fireworks.ai/guides/rollout-inference#moe-router-replay)

        for MoE expert tracing during rollouts.
      operationId: create_completion_v1_completions_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CompletionRequest'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CompletionResponse'
            text/event-stream:
              schema:
                $ref: '#/components/schemas/CompletionStreamResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      tags:
        - text-completion.openapi_other
      security:
        - BearerAuth: []
  /v1/chat/completions:
    servers:
      - url: https://api.fireworks.ai/inference
    post:
      summary: Create Chat Completion
      description: >-
        Create a completion for the provided prompt and parameters.


        For RL / agent rollouts, Fireworks inference exposes additional

        rollout-specific features:

        [`x-session-affinity` and
        `x-multi-turn-session-id`](https://docs.fireworks.ai/guides/rollout-inference#session-affinity)

        for multi-turn trajectories, and

        [MoE Router Replay
        (R3)](https://docs.fireworks.ai/guides/rollout-inference#moe-router-replay)

        for MoE expert tracing during rollouts.
      operationId: create_chat_completion_v1_chat_completions_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
            text/event-stream:
              schema:
                $ref: '#/components/schemas/ChatCompletionStreamResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      tags:
        - text-completion.openapi_other
      security:
        - BearerAuth: []
  /v1/messages:
    servers:
      - url: https://api.fireworks.ai/inference
    post:
      summary: Create a Message
      description: >-
        **Anthropic-compatible endpoint.**


        Send a structured list of input messages with text and/or image content,
        and the model will generate the next message in the conversation.


        The Messages API can be used for either single queries or stateless
        multi-turn conversations.


        **Fireworks Quickstarts:**

        - [Serverless Quickstart](/getting-started/quickstart)

        - [Deployments Quickstart](/getting-started/ondemand-quickstart)
      operationId: messages_post
      responses:
        '200':
          description: Message object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AnthropicMessage'
        4XX:
          description: >-
            Error response.


            See the [errors documentation](/guides/inference-error-codes) for
            more details.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AnthropicErrorResponse'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AnthropicCreateMessageParams'
        required: true
      tags:
        - anthropic-messages.openapi_other
      security:
        - BearerAuth: []
components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
      description: >-
        Bearer authentication using your Fireworks API key. Format: Bearer
        <API_KEY>
      bearerFormat: API_KEY
  schemas:
    AccountAccountType:
      type: string
      enum:
        - ACCOUNT_TYPE_UNSPECIFIED
        - ENTERPRISE
      default: ACCOUNT_TYPE_UNSPECIFIED
    AccountRateLimitMetric:
      type: string
      enum:
        - METRIC_UNSPECIFIED
        - TOKENS_GENERATED
        - TOKENS_PROMPT
        - TOKENS_CACHE_ADJUSTED_PROMPT
      default: METRIC_UNSPECIFIED
      description: Which rate-limit metric this row describes.
    AccountSuspendState:
      type: string
      enum:
        - UNSUSPENDED
        - FAILED_PAYMENTS
        - CREDIT_DEPLETED
        - MONTHLY_SPEND_LIMIT_EXCEEDED
        - BLOCKED_BY_ABUSE_RULE
      default: UNSUSPENDED
    AccountUsageDedicatedDeploymentUsage:
      type: object
      properties:
        deploymentId:
          type: string
          title: The deployment ID
        acceleratorType:
          type: string
          title: GPU type / accelerator type
        acceleratorSeconds:
          type: string
          format: int64
          title: Accelerator seconds
        startTime:
          type: string
          format: date-time
          description: >-
            Start timestamp of the usage.

            Note: Dedicated deployment usage events are aggregated daily. Only
            the date portion (YYYY-MM-DD) is used;

            the time portion is ignored.
        endTime:
          type: string
          format: date-time
          description: >-
            End timestamp of the usage.

            Note: Dedicated deployment usage events are aggregated daily. Only
            the date portion (YYYY-MM-DD) is used;

            the time portion is ignored.
        baseModel:
          type: string
          title: Base model of the deployment
        usageType:
          type: string
          title: Usage type to distinguish between different deployment types
        placement:
          type: string
          description: >-
            Deployment placement as a string (e.g. US_IOWA_1, GLOBAL, US,
            EUROPE).

            Set to REGION_UNSPECIFIED when unknown.
        group:
          type: object
          additionalProperties:
            type: string
          description: >-
            Group-by dimension key-values (deployment_name, accelerator_type,
            team, project, environment).
      description: One dedicated deployment usage aggregation bucket.
    AccountUsageServerlessUsage:
      type: object
      properties:
        modelName:
          type: string
          description: 'Deprecated: use group["model_name"] instead.'
        promptTokens:
          type: string
          format: int64
          title: Number of prompt tokens (for text inference)
        completionTokens:
          type: string
          format: int64
          title: Number of completion tokens (for text inference)
        startTime:
          type: string
          format: date-time
          title: >-
            Start timestamp of the usage.

            Note: Serverless usage events are aggregated daily. Only the date
            portion (YYYY-MM-DD) is used;
        endTime:
          type: string
          format: date-time
          title: >-
            End timestamp of the usage.

            Note: Serverless usage events are aggregated daily. Only the date
            portion (YYYY-MM-DD) is used;
        audioInputSeconds:
          type: number
          format: double
          title: Audio input seconds (for audio inference)
        usageType:
          type: string
          title: Usage type to distinguish between different inference types
        apiKeyId:
          type: string
          description: 'Deprecated: use group["api_key_id"] instead.'
        group:
          type: object
          additionalProperties:
            type: string
          description: >-
            Group-by dimension key-values (model_name, api_key_id, team,
            project, environment).
      description: One serverless usage aggregation bucket.
    AssertionAssertionType:
      type: string
      enum:
        - ASSERTION_TYPE_UNSPECIFIED
        - ASSERTION_TYPE_LLM
        - ASSERTION_TYPE_CODE
      default: ASSERTION_TYPE_UNSPECIFIED
    BaseModelDetailsCheckpointFormat:
      type: string
      enum:
        - CHECKPOINT_FORMAT_UNSPECIFIED
        - NATIVE
        - HUGGINGFACE
        - UNINITIALIZED
      default: CHECKPOINT_FORMAT_UNSPECIFIED
    CodeAssertionExecutionOptions:
      type: object
      properties:
        timeoutMs:
          type: integer
          format: int32
          title: Timeout in milliseconds, max 5 minutes
        memoryLimitMb:
          type: integer
          format: int32
          title: Memory limit in MB, max 1GB
        envVars:
          type: object
          additionalProperties:
            type: string
          title: Environment variables
      title: Options for execution
    DatasetFormat:
      type: string
      enum:
        - FORMAT_UNSPECIFIED
        - CHAT
        - COMPLETION
        - RL
      default: FORMAT_UNSPECIFIED
    DeploymentHotLoadBucketType:
      type: string
      enum:
        - BUCKET_TYPE_UNSPECIFIED
        - MINIO
        - S3
        - NEBIUS
        - FW_HOSTED
      default: BUCKET_TYPE_UNSPECIFIED
      title: '- FW_HOSTED: Fireworks hosted bucket'
    DeploymentPrecision:
      type: string
      enum:
        - PRECISION_UNSPECIFIED
        - FP16
        - FP8
        - FP8_MM
        - FP8_AR
        - FP8_MM_KV_ATTN
        - FP8_KV
        - FP8_MM_V2
        - FP8_V2
        - FP8_MM_KV_ATTN_V2
        - NF4
        - FP4
        - BF16
        - FP4_BLOCKSCALED_MM
        - FP4_MX_MOE
      default: PRECISION_UNSPECIFIED
      title: >-
        - PRECISION_UNSPECIFIED: if left unspecified we will treat this as a
        legacy model created before

        self serve
    DeploymentShapePresetType:
      type: string
      enum:
        - PRESET_TYPE_UNSPECIFIED
        - MINIMAL
        - FAST
        - THROUGHPUT
        - FULL_PRECISION
        - AGENTIC_CODING
        - CHAT
        - SUMMARIZATION
      default: PRESET_TYPE_UNSPECIFIED
      title: |-
        - MINIMAL: Preset for cheapest & most minimal type of deployment
         - FAST: Preset for fastest generation & TTFT deployment
         - THROUGHPUT: Preset for best throughput deployment
         - FULL_PRECISION: Preset for deployment with full precision for training & most accurate numerics
         - AGENTIC_CODING: Preset for autonomous code generation and analysis for development workflows
         - CHAT: Preset for interactive conversational AI for customer engagement
         - SUMMARIZATION: Preset for efficient document and content summarization
    EagleTrainingJobTrainingStyle:
      type: string
      enum:
        - TRAINING_STYLE_UNSPECIFIED
        - EAGLE
        - EAGLE3
      default: TRAINING_STYLE_UNSPECIFIED
    EvaluatorSource:
      type: object
      properties:
        type:
          $ref: '#/components/schemas/EvaluatorSourceType'
          description: Identifies how the evaluator source code is provided.
        githubRepositoryName:
          type: string
          description: >-
            Normalized GitHub repository name (e.g. owner/repository) when the
            source is GitHub.
    EvaluatorSourceType:
      type: string
      enum:
        - TYPE_UNSPECIFIED
        - TYPE_UPLOAD
        - TYPE_GITHUB
        - TYPE_TEMPORARY
      default: TYPE_UNSPECIFIED
      title: |-
        - TYPE_UPLOAD: Source code is uploaded by the user
         - TYPE_GITHUB: Source code is from a GitHub repository
         - TYPE_TEMPORARY: Source code is a temporary UI uploaded code
    GatewayAliasEvaluatorVersionBody:
      type: object
      properties:
        aliasId:
          type: string
          description: The alias ID (e.g., "current", a semver tag, etc.).
      description: Assigns or updates a user-specified alias for a given revision.
      required:
        - aliasId
    GatewayCancelDpoJobBody:
      type: object
    GatewayCancelReinforcementFineTuningJobBody:
      type: object
    GatewayCancelRlorTrainerJobBody:
      type: object
    GatewayCancelSupervisedFineTuningJobBody:
      type: object
    GatewayCreateApiKeyBody:
      type: object
      properties:
        apiKey:
          $ref: '#/components/schemas/gatewayApiKey'
          description: The API key to be created.
      required:
        - apiKey
    GatewayCreateClusterBody:
      type: object
      properties:
        cluster:
          $ref: '#/components/schemas/gatewayCluster'
          description: The properties of the cluster being created.
        clusterId:
          type: string
          title: The cluster ID to use in the cluster name. e.g. my-cluster
      required:
        - cluster
        - clusterId
    GatewayCreateDatasetBody:
      type: object
      properties:
        dataset:
          $ref: '#/components/schemas/gatewayDataset'
        datasetId:
          type: string
        sourceDatasetId:
          type: string
          title: >-
            If set, indicates we are creating a new dataset by filtering this
            existing dataset ID
        filter:
          type: string
          title: >-
            Filter condition (SQL-like WHERE clause) to apply to the source
            dataset
      required:
        - dataset
        - datasetId
    GatewayCreateEvaluationBody:
      type: object
      properties:
        evaluation:
          $ref: '#/components/schemas/gatewayEvaluation'
        evaluationId:
          type: string
      required:
        - evaluation
    GatewayCreateEvaluationJobBody:
      type: object
      properties:
        evaluationJob:
          $ref: '#/components/schemas/gatewayEvaluationJob'
        evaluationJobId:
          type: string
      required:
        - evaluationJob
    GatewayCreateEvaluatorBody:
      type: object
      properties:
        evaluator:
          $ref: '#/components/schemas/gatewayEvaluator'
        evaluatorId:
          type: string
      required:
        - evaluator
    GatewayCreateEvaluatorV2Body:
      type: object
      properties:
        evaluator:
          $ref: '#/components/schemas/gatewayEvaluator'
        evaluatorId:
          type: string
      required:
        - evaluator
    GatewayCreateModelBody:
      type: object
      properties:
        model:
          $ref: '#/components/schemas/gatewayModel'
          description: The properties of the Model being created.
        modelId:
          type: string
          description: ID of the model.
        cluster:
          type: string
          description: |-
            The resource name of the BYOC cluster to which this model belongs.
            e.g. accounts/my-account/clusters/my-cluster. Empty if it belongs to
            a Fireworks cluster.
      required:
        - modelId
    GatewayDebugReinforcementFineTuningJobBody:
      type: object
    GatewayDeleteApiKeyBody:
      type: object
      properties:
        keyId:
          type: string
          description: The key ID for the API key.
      required:
        - keyId
    GatewayDeployModelVersionBody:
      type: object
      properties:
        deployment:
          type: string
          title: The deployment name to deploy the model version to
        rolloutStrategy:
          $ref: '#/components/schemas/gatewayRolloutStrategy'
          title: The rollout strategy to use when deploying the model version
      required:
        - deployment
        - rolloutStrategy
    GatewayExecuteRlorTrainStepBody:
      type: object
      properties:
        dataset:
          type: string
          description: Dataset to process for this iteration.
        outputModel:
          type: string
          description: Output model to materialize when training completes.
      required:
        - dataset
        - outputModel
    GatewayExecuteTrainingSessionLoadStateBody:
      type: object
      properties:
        path:
          type: string
          description: Adapter checkpoint path to load into the training session.
      required:
        - path
    GatewayGetDatasetUploadEndpointBody:
      type: object
      properties:
        filenameToSize:
          type: object
          additionalProperties:
            type: string
            format: int64
          description: A mapping from the file name to its size in bytes.
        readMask:
          type: string
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
      required:
        - filenameToSize
    GatewayGetEvaluatorUploadEndpointBody:
      type: object
      properties:
        filenameToSize:
          type: object
          additionalProperties:
            type: string
            format: int64
        readMask:
          type: string
      required:
        - filenameToSize
    GatewayGetModelUploadEndpointBody:
      type: object
      properties:
        filenameToSize:
          type: object
          additionalProperties:
            type: string
            format: int64
          description: A mapping from the file name to its size in bytes.
        enableResumableUpload:
          type: boolean
          description: If true, enable resumable upload instead of PUT.
        readMask:
          type: string
          description: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.
      required:
        - filenameToSize
    GatewayImportModelBody:
      type: object
      properties:
        awsS3Source:
          $ref: '#/components/schemas/gatewayAwsS3ModelSource'
          description: |-
            AWS S3 source details.
            Must be set when importing from AWS S3.
        azureBlobSource:
          $ref: '#/components/schemas/gatewayAzureBlobModelSource'
          description: |-
            Azure Blob Storage source details.
            Must be set when importing from Azure Blob Storage.
    GatewayPrepareModelBody:
      type: object
      properties:
        precision:
          $ref: '#/components/schemas/DeploymentPrecision'
          title: the precision with which the model will be prepared
        readMask:
          type: string
          title: >-
            The fields to be returned in the response. If empty or "*", all
            fields will be returned.

            This is added as is used in getResource()
    GatewayPreviewEvaluationBody:
      type: object
      properties:
        sampleData:
          type: string
          title: Sample data in JSON format (array of samples)
        maxSamples:
          type: integer
          format: int32
          title: >-
            Maximum number of samples to evaluate (optional, default will be a
            small number like 5)
      title: Request to preview an evaluation with sample data
      required:
        - sampleData
    GatewayPreviewEvaluatorBody:
      type: object
      properties:
        evaluator:
          $ref: '#/components/schemas/gatewayEvaluator'
          title: The evaluator object to run preview on
        sampleData:
          type: array
          items:
            type: string
          title: Sample data in JSON format (array of json formated samples)
        maxSamples:
          type: integer
          format: int32
          title: Maximum number of samples to evaluate, default is 10
      required:
        - evaluator
        - sampleData
    GatewayPromoteCheckpointBody:
      type: object
      properties:
        outputModel:
          type: string
          title: |-
            The desired output model resource name.
            Format: accounts/{account}/models/{model}
        trainerJobId:
          type: string
          description: |-
            The trainer job that wrote this checkpoint.
            Format: accounts/{account}/rlorTrainerJobs/{rlor_trainer_job}
            Used to construct the GCS path (trainer-keyed bucket) and as a
            source annotation on the promoted model.
        baseModel:
          type: string
          title: >-
            The base model for metadata inheritance (model type, context length,
            etc.).

            Format: accounts/{account}/models/{model}
        hotLoadDeploymentId:
          type: string
          title: |-
            Only needed when the trainer job was created with a deployment ID
            (deployment-first flow). Not needed when the deployment references
            the trainer job (trainer-first flow, recommended).
            Format: accounts/{account}/deployments/{deployment}
      description: |-
        Promote a checkpoint to a model. When base_model is provided, the
        checkpoint GCS path is reconstructed from account metadata and the
        trainer/deployment ID — no trainer job database lookup is needed.
      required:
        - outputModel
        - trainerJobId
        - baseModel
    GatewayResumeDpoJobBody:
      type: object
    GatewayResumeReinforcementFineTuningJobBody:
      type: object
    GatewayResumeRlorTrainerJobBody:
      type: object
    GatewayResumeSupervisedFineTuningJobBody:
      type: object
    GatewayRollbackEvaluatorBody:
      type: object
      description: >-
        Rolls an evaluator back to the configuration captured by a specific
        revision.
    GatewayScaleDeploymentBody:
      type: object
      properties:
        replicaCount:
          type: integer
          format: int32
          description: The desired number of replicas.
    GatewaySplitDatasetBody:
      type: object
      properties:
        chunkSize:
          type: integer
          format: int32
          title: Required. The size of each chunk (minimum 200)
        parent:
          type: string
          description: The parent account ID of the requester.
      title: Request message for splitting a dataset into chunks
    GatewayTestEvaluationBody:
      type: object
      properties:
        evaluation:
          $ref: '#/components/schemas/gatewayEvaluation'
          title: The draft evaluation to test
        sampleData:
          type: string
          title: Sample data in JSON format (array of samples)
      required:
        - evaluation
        - sampleData
    GatewayUndeleteDeploymentBody:
      type: object
    GatewayValidateAssertionsBody:
      type: object
      properties:
        assertions:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayAssertion'
      description: >-
        Validate the code assertions. Recommended before creating the
        evaluation.
      required:
        - assertions
    GatewayValidateDatasetUploadBody:
      type: object
    GatewayValidateEvaluatorUploadBody:
      type: object
    GetAccountUsageRequestFilterValues:
      type: object
      properties:
        values:
          type: array
          items:
            type: string
      description: >-
        Allowed values for one filter dimension; multiple values are OR'ed;
        different keys in `filter` are AND'ed.
    GetReservationDataResponseReservationDataByType:
      type: object
      properties:
        acceleratorType:
          $ref: '#/components/schemas/gatewayAcceleratorType'
          title: The accelerator type
        consumed:
          type: string
          format: int64
          title: Number of GPUs currently consumed (in use)
        reserved:
          type: string
          format: int64
          title: Number of GPUs reserved (total capacity)
      title: Reservation data for a specific accelerator type
    ListCostsResponseCostDataItem:
      type: object
      properties:
        subtotal:
          $ref: '#/components/schemas/typeMoney'
          description: The amount before any credits and discounts are applied.
        total:
          $ref: '#/components/schemas/typeMoney'
          description: The total amount after any credits and discounts have been applied.
        startTime:
          type: string
          format: date-time
          description: Costs returned are inclusive of `start_time`.
        endTime:
          type: string
          format: date-time
          description: Costs returned are exclusive of `end_time`.
    ListPaymentMethodsResponseCard:
      type: object
      properties:
        brand:
          type: string
          description: Credit card brand.
        last4:
          type: string
          description: Last 4 digits of the credit card.
        expMonth:
          type: integer
          format: int32
          description: Expiration month.
        expYear:
          type: integer
          format: int32
          description: Expiration year.
    ListPaymentMethodsResponseStripePaymentMethod:
      type: object
      properties:
        id:
          type: string
          description: Payment method ID.
        card:
          $ref: '#/components/schemas/ListPaymentMethodsResponseCard'
          description: Card used by the payment method.
        usBankAccount:
          $ref: '#/components/schemas/ListPaymentMethodsResponseUsBankAccount'
          title: ACH Direct Debit
    ListPaymentMethodsResponseUsBankAccount:
      type: object
      properties:
        bankName:
          type: string
        last4:
          type: string
          description: Last four digits of the bank account number.
    ModelKind:
      type: string
      enum:
        - KIND_UNSPECIFIED
        - HF_BASE_MODEL
        - HF_PEFT_ADDON
        - HF_TEFT_ADDON
        - FLUMINA_BASE_MODEL
        - FLUMINA_ADDON
        - DRAFT_ADDON
        - FIRE_AGENT
        - LIVE_MERGE
        - CUSTOM_MODEL
        - EMBEDDING_MODEL
        - SNAPSHOT_MODEL
      default: KIND_UNSPECIFIED
      description: |-
        - HF_BASE_MODEL: An LLM base model.
         - HF_PEFT_ADDON: A parameter-efficent fine-tuned addon.
         - HF_TEFT_ADDON: A token-eficient fine-tuned addon.
         - FLUMINA_BASE_MODEL: A Flumina base model.
         - FLUMINA_ADDON: A Flumina addon.
         - DRAFT_ADDON: A draft model used for speculative decoding in a deployment.
         - FIRE_AGENT: A FireAgent model.
         - LIVE_MERGE: A live-merge model.
         - CUSTOM_MODEL: A customized model
         - EMBEDDING_MODEL: An Embedding model.
         - SNAPSHOT_MODEL: A snapshot model.
    ModelSnapshotType:
      type: string
      enum:
        - FULL_SNAPSHOT
        - INCREMENTAL_SNAPSHOT
      default: FULL_SNAPSHOT
    PolicySettingsEffect:
      type: string
      enum:
        - UNSPECIFIED
        - DENY
      default: UNSPECIFIED
      description: >-
        Policy effect for model-access rules (per-row; not a standalone API
        enum).

         - DENY: Block actions that violate the policy.
    PolicySettingsModelAccessRule:
      type: object
      properties:
        model:
          type: string
          title: >-
            Resource name of the model, e.g.
            accounts/fireworks/models/llama-v3-8b-instruct
        allowServerless:
          type: boolean
          description: Serverless inference on this model.
        allowFineTuning:
          type: boolean
          description: Create fine-tuning jobs using this model.
        allowDeployments:
          type: boolean
          description: Deploy this model on dedicated infrastructure.
        effect:
          $ref: '#/components/schemas/PolicySettingsEffect'
          description: >-
            Policy effect for this rule so allowlisted models can use different
            effects per row.
      title: >-
        One row in the model allowlist: which model and which capabilities are
        permitted.
      required:
        - model
    PricingPlanAcceleratorHourBillingConfig:
      type: object
      properties:
        acceleratorType:
          $ref: '#/components/schemas/gatewayAcceleratorType'
          title: Specific accelerator type this applies to
        acceleratorHourPrice:
          $ref: '#/components/schemas/typeMoney'
          title: Price per accelerator hour
      title: Configuration for accelerator-hour billing
      required:
        - acceleratorType
        - acceleratorHourPrice
    PricingPlanBillingType:
      type: string
      enum:
        - BILLING_TYPE_UNSPECIFIED
        - TOKEN_USAGE
        - ACCELERATOR_HOURS
      default: BILLING_TYPE_UNSPECIFIED
    PricingPlanTokenBillingConfig:
      type: object
      properties:
        baseModelName:
          type: string
          title: Model name this applies to
        inputTokenPricePerMillion:
          $ref: '#/components/schemas/typeMoney'
          title: Price per million input tokens
        outputTokenPricePerMillion:
          $ref: '#/components/schemas/typeMoney'
          title: Price per million output tokens
        uncachedInputTokenPricePerMillion:
          $ref: '#/components/schemas/typeMoney'
          title: Price per million uncached input tokens
        cachedInputTokenPricePerMillion:
          $ref: '#/components/schemas/typeMoney'
          title: Price per million cached input tokens
      title: Configuration for token-based billing
      required:
        - baseModelName
        - outputTokenPricePerMillion
    ReinforcementLearningLossConfigMethod:
      type: string
      enum:
        - METHOD_UNSPECIFIED
        - GRPO
        - DAPO
        - DPO
        - ORPO
        - GSPO_TOKEN
      default: METHOD_UNSPECIFIED
      title: |-
        - METHOD_UNSPECIFIED: Defaults to GRPO
         - GRPO: Group Relative Policy Optimization (default for preference jobs)
         - DAPO: Decoupled Alignment Preference Optimization
         - DPO: Direct Preference Optimization
         - ORPO: Odds Ratio Preference Optimization (reference-free)
         - GSPO_TOKEN: Group Sequence Policy Optimization (token-level)
    TrainingSessionReferenceState:
      type: string
      enum:
        - TRAINING_SESSION_REFERENCE_STATE_UNSPECIFIED
        - BASE
        - ADAPTER
      default: TRAINING_SESSION_REFERENCE_STATE_UNSPECIFIED
      description: |-
        ReferenceState indicates whether the session currently references the
        base model or a loaded LoRA adapter. Updated automatically when
        ExecuteTrainingSessionLoadState is called.

         - BASE: Session is using the base model (no adapter loaded).
         - ADAPTER: Session has a LoRA adapter loaded.
    TrainingShapeTrainerMode:
      type: string
      enum:
        - TRAINER_MODE_UNSPECIFIED
        - POLICY_TRAINER
        - FORWARD_ONLY
        - LORA_TRAINER
      default: TRAINER_MODE_UNSPECIFIED
      description: Trainer execution mode used for validated launch-profile matching.
    ValidateAssertionsResponseValidateAssertionError:
      type: object
      properties:
        errorMessages:
          type: array
          items:
            type: string
    gatewayAcceleratorType:
      type: string
      enum:
        - ACCELERATOR_TYPE_UNSPECIFIED
        - NVIDIA_A100_80GB
        - NVIDIA_H100_80GB
        - AMD_MI300X_192GB
        - NVIDIA_A10G_24GB
        - NVIDIA_A100_40GB
        - NVIDIA_L4_24GB
        - NVIDIA_H200_141GB
        - NVIDIA_B200_180GB
        - AMD_MI325X_256GB
        - AMD_MI350X_288GB
        - NVIDIA_B300_288GB
      default: ACCELERATOR_TYPE_UNSPECIFIED
    gatewayAccount:
      type: object
      properties:
        name:
          type: string
          title: The resource name of the account. e.g. accounts/my-account
          readOnly: true
        displayName:
          type: string
          description: |-
            Human-readable display name of the account. e.g. "My Account"
            Must be fewer than 64 characters long.
        createTime:
          type: string
          format: date-time
          description: The creation time of the account.
          readOnly: true
        accountType:
          $ref: '#/components/schemas/AccountAccountType'
          description: The type of the account.
        email:
          type: string
          description: |-
            The primary email for the account. This is used for billing invoices
            and account notifications.
        state:
          $ref: '#/components/schemas/gatewayAccountState'
          description: The state of the account.
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          description: Contains information about the account status.
          readOnly: true
        suspendState:
          $ref: '#/components/schemas/AccountSuspendState'
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: The update time for the account.
          readOnly: true
        notificationSettings:
          $ref: '#/components/schemas/gatewayNotificationSettings'
          description: Notification settings for this account.
      required:
        - email
    gatewayAccountRateLimit:
      type: object
      properties:
        deployment:
          type: string
          title: >-
            Resource name of the deployment, e.g.
            accounts/my-account/deployments/my-deployment
        metric:
          $ref: '#/components/schemas/AccountRateLimitMetric'
        effectiveLimit:
          type: number
          format: double
          description: >-
            Enforced limit for this metric (per second; units depend on the
            metric).
      required:
        - deployment
        - metric
        - effectiveLimit
    gatewayAccountState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - CREATING
        - READY
        - UPDATING
        - DELETING
      default: STATE_UNSPECIFIED
    gatewayAccountUsage:
      type: object
      properties:
        serverlessCosts:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/AccountUsageServerlessUsage'
          title: List of serverless cost data
        dedicatedCosts:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/AccountUsageDedicatedDeploymentUsage'
          title: List of dedicated deployment cost data
      title: Response with model costs by deployment type
    gatewayApiKey:
      type: object
      properties:
        keyId:
          type: string
          description: >-
            Unique identifier (Key ID) for the API key, used primarily for
            deletion.
          readOnly: true
        displayName:
          type: string
          description: >-
            Display name for the API key, defaults to "default" if not
            specified.
        key:
          type: string
          description: >-
            The actual API key value, only available upon creation and not
            stored thereafter.
          readOnly: true
        createTime:
          type: string
          format: date-time
          description: Timestamp indicating when the API key was created.
          readOnly: true
        secure:
          type: boolean
          description: >-
            Indicates whether the plaintext value of the API key is unknown to
            Fireworks.

            If true, Fireworks does not know this API key's plaintext value. If
            false, Fireworks does

            know the plaintext value.
          readOnly: true
        email:
          type: string
          description: Email of the user who owns this API key.
          readOnly: true
        prefix:
          type: string
          title: The first few characters of the API key to visually identify it
          readOnly: true
        expireTime:
          type: string
          format: date-time
          description: >-
            Timestamp indicating when the API key will expire. If not set, the
            key never expires.
        annotations:
          type: object
          additionalProperties:
            type: string
          description: Optional key-value annotations for this API key.
        lastUsed:
          type: string
          format: date-time
          description: >-
            Timestamp indicating when the API key was last used. Not set if the
            key has never been used.
          readOnly: true
    gatewayAssertion:
      type: object
      properties:
        assertionType:
          $ref: '#/components/schemas/AssertionAssertionType'
        llmAssertion:
          $ref: '#/components/schemas/gatewayLLMAssertion'
        codeAssertion:
          $ref: '#/components/schemas/gatewayCodeAssertion'
        metricName:
          type: string
      title: >-
        We are doing auto generated GORM with JSON serializer and oneof doesn't
        work

        so I am doing enums + just flat fields
      required:
        - assertionType
    gatewayAuditLogEntry:
      type: object
      properties:
        id:
          type: string
          description: Audit log entry id.
        method:
          type: string
          description: The gRPC method name.
        principal:
          type: string
          description: The email of the principal user who performed this action.
        payload:
          type: object
          description: The payload as JSON.
        status:
          $ref: '#/components/schemas/gatewayStatus'
          description: The response status.
        timestamp:
          type: string
          format: date-time
          description: The timestamp when the request was received.
        message:
          type: string
          title: Optional message describing the audit log entry
        resource:
          type: string
          title: The resource being operated on (e.g. accounts/123)
        isAdminAction:
          type: boolean
          title: >-
            Whether this action was taken by an admin and should not be shown to
            regular users
        userAgent:
          type: string
          description: >-
            The user agent that made the request (e.g. "firectl/1.2.3
            grpc-go/1.73.0").
        clientIp:
          type: string
          description: The client IP when known.
        apiKeyId:
          type: string
          description: >-
            Unkey API key ID when the action was authenticated with an API key
            (empty for bearer/OIDC).
    gatewayAutoTune:
      type: object
      properties:
        longPrompt:
          type: boolean
          description: If true, this deployment is optimized for long prompt lengths.
    gatewayAutoscalingPolicy:
      type: object
      properties:
        scaleUpWindow:
          type: string
          description: >-
            The duration the autoscaler will wait before scaling up a deployment
            after observing

            increased load. Default is 30s. Must be less than or equal to 1
            hour.
        scaleDownWindow:
          type: string
          description: >-
            The duration the autoscaler will wait before scaling down a
            deployment after observing

            decreased load. Default is 10m. Must be less than or equal to 1
            hour.
        scaleToZeroWindow:
          type: string
          description: >-
            The duration after which there are no requests that the deployment
            will be scaled down

            to zero replicas, if min_replica_count==0. Default is 1h.

            This must be at least 5 minutes.
        loadTargets:
          type: object
          additionalProperties:
            type: number
            format: float
          title: >-
            Map of load metric names to their target utilization factors.

            Currently only the "default" key is supported, which specifies the
            default target for all metrics.

            If not specified, the default target is 0.8
        scalingSchedules:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/gatewayScalingSchedule'
          description: >-
            Named scaling schedules that override min_replica_count on a
            time-based cron schedule.

            When multiple schedules are active simultaneously, the highest
            min_replica_count

            across all active schedules is used ("max wins"). When no schedule
            is active, the

            deployment's base min_replica_count applies.

            Maximum 5 schedules per deployment.
    gatewayAwsS3Config:
      type: object
      properties:
        credentialsSecret:
          type: string
          title: >-
            Reference to a Secret resource containing AWS access key
            credentials.

            Format: accounts/{account_id}/secrets/{secret_id}

            The secret value must be JSON: {"aws_access_key_id": "AKIA...",
            "aws_secret_access_key": "..."}
        iamRoleArn:
          type: string
          title: >-
            IAM role ARN to assume for accessing S3 datasets via GCP OIDC
            federation.

            Format: arn:aws:iam::account-id:role/role-name
      description: |-
        AwsS3Config is the configuration for AWS S3 dataset access which
        will be used by a training job.
    gatewayAwsS3ModelSource:
      type: object
      properties:
        s3Bucket:
          type: string
          description: The S3 bucket name.
        s3Path:
          type: string
          description: The S3 path prefix.
        roleArn:
          type: string
          title: AWS role ARN for authentication
        accessKeyId:
          type: string
          title: AWS access key ID for authentication
        accessSecret:
          type: string
          title: AWS access secret for authentication
      required:
        - s3Bucket
    gatewayAzureBlobModelSource:
      type: object
      properties:
        storageAccount:
          type: string
          description: The Azure storage account name.
        container:
          type: string
          description: The Azure blob container name.
        path:
          type: string
          description: The path prefix within the container.
        sasTokenSecret:
          type: string
          description: |-
            Reference to a Fireworks secret containing the Azure SAS token.
            Format: accounts/{account}/secrets/{secret_id}
            The secret's value should contain the SAS token.
            Create a secret first using the Secrets API, then reference it here.
        clientId:
          type: string
          description: |-
            Azure AD client ID for authentication.
            If set, tenant_id must also be set.
        tenantId:
          type: string
          description: |-
            Azure AD tenant ID for authentication.
            If set, client_id must also be set.
      required:
        - storageAccount
        - container
    gatewayAzureBlobStorageConfig:
      type: object
      properties:
        credentialsSecret:
          type: string
          description: >-
            Reference to a Secret resource containing Azure credentials.

            Format: accounts/{account_id}/secrets/{secret_id}

            The secret value must be JSON: {"connection_string": "..."} or
            {"sas_token": "..."} or {"account_key": "..."}

            Mutually exclusive with managed_identity_client_id.
        managedIdentityClientId:
          type: string
          description: >-
            Managed Identity Client ID for GCP-to-Azure Workload Identity
            Federation.

            Format: uuid

            Mutually exclusive with credentials_secret.
        tenantId:
          type: string
          title: |-
            Azure tenant ID for Workload Identity Federation.
            Format: uuid
      description: >-
        AzureBlobStorageConfig is the configuration for Azure Blob Storage
        dataset access

        which will be used by a training job.
    gatewayBalance:
      type: object
      properties:
        money:
          $ref: '#/components/schemas/typeMoney'
          description: >-
            Current remaining balance of the developer for a particular
            currency.
    gatewayBaseModelDetails:
      type: object
      properties:
        worldSize:
          type: integer
          format: int32
          description: |-
            The default number of GPUs the model is served with.
            If not specified, the default is 1.
        checkpointFormat:
          $ref: '#/components/schemas/BaseModelDetailsCheckpointFormat'
        huggingfaceFiles:
          type: array
          items:
            type: string
          description: >-
            A list of Hugging Face files associated with this model. Specified
            if and only if

            the checkpoint_format is HUGGINGFACE.
        parameterCount:
          type: string
          format: int64
          description: >-
            The number of model parameters. For serverless models, this
            determines the

            price per token.
        moe:
          type: boolean
          description: >-
            If true, this is a Mixture of Experts (MoE) model. For serverless
            models,

            this affects the price per token.
        tunable:
          type: boolean
          description: >-
            Deprecated: V1 training stack only. Use per-category tunable flags
            on Model instead.
        modelType:
          type: string
          description: The type of the model.
        supportsFireattention:
          type: boolean
          description: Whether this model supports fireattention.
        defaultPrecision:
          $ref: '#/components/schemas/DeploymentPrecision'
          description: Default precision of the model.
          readOnly: true
        supportsMtp:
          type: boolean
          description: If true, this model supports MTP.
    gatewayBaseTrainingConfig:
      type: object
      properties:
        outputModel:
          type: string
          description: >-
            The model ID to be assigned to the resulting fine-tuned model. If
            not specified, the job ID will be used.
        baseModel:
          type: string
          description: |-
            The name of the base model to be fine-tuned
            Only one of 'base_model' or 'warm_start_from' should be specified.
        warmStartFrom:
          type: string
          description: |-
            The PEFT addon model in Fireworks format to be fine-tuned from
            Only one of 'base_model' or 'warm_start_from' should be specified.
        jinjaTemplate:
          type: string
          title: >-
            The Jinja template for conversation formatting. If not specified,
            defaults to the base model's conversation template configuration
        learningRate:
          type: number
          format: float
          description: The learning rate used for training.
        maxContextLength:
          type: integer
          format: int32
          description: The maximum context length to use with the model.
        loraRank:
          type: integer
          format: int32
          description: The rank of the LoRA layers.
        epochs:
          type: integer
          format: int32
          description: The number of epochs to train for.
        batchSize:
          type: integer
          format: int32
          description: >-
            The maximum packed number of tokens per batch for training in
            sequence packing.
        gradientAccumulationSteps:
          type: integer
          format: int32
          title: Number of gradient accumulation steps
        learningRateWarmupSteps:
          type: integer
          format: int32
          title: Number of steps for learning rate warm up
        batchSizeSamples:
          type: integer
          format: int32
          description: The number of samples per gradient batch.
        optimizerWeightDecay:
          type: number
          format: float
          description: Weight decay (L2 regularization) for optimizer.
        trainerShardingScheme:
          $ref: '#/components/schemas/gatewayTrainerShardingScheme'
          description: Structured trainer sharding/parallelism configuration.
        loraAlpha:
          type: integer
          format: int32
          description: |-
            LoRA alpha scaling factor.
            If not specified (or 0), trainer defaults are used.
        loraDropout:
          type: number
          format: float
          description: LoRA dropout probability.
        loraTargetModules:
          type: array
          items:
            type: string
          description: Optional LoRA target module names (e.g. q_proj, k_proj, v_proj).
      title: |-
        BaseTrainingConfig contains common configuration fields shared across
        different training job types.
    gatewayBatchInferenceJob:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the batch inference job. e.g.
            accounts/my-account/batchInferenceJobs/my-batch-inference-job
          readOnly: true
        displayName:
          type: string
          title: >-
            Human-readable display name of the batch inference job. e.g. "My
            Batch Inference Job"
        createTime:
          type: string
          format: date-time
          description: The creation time of the batch inference job.
          readOnly: true
        createdBy:
          type: string
          description: >-
            The email address of the user who initiated this batch inference
            job.
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayJobState'
          description: JobState represents the state an asynchronous job can be in.
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        model:
          type: string
          description: >-
            The name of the model to use for inference. This is required, except
            when continued_from_job_name is specified.
        inputDatasetId:
          type: string
          description: >-
            The name of the dataset used for inference. This is required, except
            when continued_from_job_name is specified.
        outputDatasetId:
          type: string
          description: >-
            The name of the dataset used for storing the results. This will also
            contain the error file.
        inferenceParameters:
          $ref: '#/components/schemas/gatewayBatchInferenceJobInferenceParameters'
          description: Parameters controlling the inference process.
        updateTime:
          type: string
          format: date-time
          description: The update time for the batch inference job.
          readOnly: true
        precision:
          $ref: '#/components/schemas/DeploymentPrecision'
          description: >-
            The precision with which the model should be served.

            If PRECISION_UNSPECIFIED, a default will be chosen based on the
            model.
        jobProgress:
          $ref: '#/components/schemas/gatewayJobProgress'
          description: Job progress.
          readOnly: true
        continuedFromJobName:
          type: string
          description: >-
            The resource name of the batch inference job that this job continues
            from.

            Used for lineage tracking to understand job continuation chains.
    gatewayBatchInferenceJobInferenceParameters:
      type: object
      properties:
        maxTokens:
          type: integer
          format: int32
          description: Maximum number of tokens to generate per response.
        temperature:
          type: number
          format: float
          description: Sampling temperature, typically between 0 and 2.
        topP:
          type: number
          format: float
          description: Top-p sampling parameter, typically between 0 and 1.
        'n':
          type: integer
          format: int32
          description: Number of response candidates to generate per input.
        extraBody:
          type: string
          description: |-
            Additional parameters for the inference request as a JSON string.
            For example: "{\"stop\": [\"\\n\"]}".
        topK:
          type: integer
          format: int32
          description: >-
            Top-k sampling parameter, limits the token selection to the top k
            tokens.
      title: BIJ inference parameters
    gatewayBillcomInvoice:
      type: object
      properties:
        id:
          type: string
          description: The Bill.com invoice ID (e.g. "00e02ZLYJNYUXC216neu").
        invoiceNumber:
          type: string
          title: The Bill.com invoice number (e.g. "167")
        archived:
          type: boolean
          description: Whether this invoice is archived.
        invoiceDate:
          $ref: '#/components/schemas/typeDate'
          description: The date when the invoice was issued (calendar date, no timezone).
        dueDate:
          $ref: '#/components/schemas/typeDate'
          description: The date when payment is due (calendar date, no timezone).
        customerId:
          type: string
          description: The Bill.com customer ID.
        totalAmount:
          $ref: '#/components/schemas/typeMoney'
          description: The total invoice amount.
        dueAmount:
          $ref: '#/components/schemas/typeMoney'
          description: The amount still due (after payments/credits).
        scheduledAmount:
          $ref: '#/components/schemas/typeMoney'
          description: The amount scheduled for payment.
        creditAmount:
          $ref: '#/components/schemas/typeMoney'
          description: The amount of credit amount applied.
        status:
          type: string
          description: The invoice status (DRAFT, OPEN, PAID, VOID, etc.).
        salesTaxTotal:
          $ref: '#/components/schemas/typeMoney'
          description: The total amount of sales tax.
        paymentLink:
          type: string
          description: A shareable payment link for viewing and paying this invoice.
      title: Bill.com invoice (actual invoices for enterprise contracts)
    gatewayCheckpointType:
      type: string
      enum:
        - CHECKPOINT_TYPE_UNSPECIFIED
        - CHECKPOINT_TYPE_INFERENCE_BASE
        - CHECKPOINT_TYPE_INFERENCE_ARC_V2
        - CHECKPOINT_TYPE_INFERENCE_LORA
        - CHECKPOINT_TYPE_TRAINING
        - CHECKPOINT_TYPE_TRAINING_LORA
      default: CHECKPOINT_TYPE_UNSPECIFIED
      description: >-
        - CHECKPOINT_TYPE_INFERENCE_BASE: Inference-ready checkpoint
        (full-parameter, HuggingFace format).

        Promotable to a model for serving.
         - CHECKPOINT_TYPE_INFERENCE_ARC_V2: Inference-ready checkpoint (XOR delta compressed, ARC v2 format).
        Not directly promotable.
         - CHECKPOINT_TYPE_INFERENCE_LORA: Inference-ready LoRA adapter checkpoint (HuggingFace PEFT format).
        Promotable to an addon model for serving.
         - CHECKPOINT_TYPE_TRAINING: Training checkpoint (full-parameter) with optimizer state.
        Not promotable to a model; used for training resume.
         - CHECKPOINT_TYPE_TRAINING_LORA: Training checkpoint from a LoRA run (adapter weights + optimizer state).
        Not promotable; used for training resume.
    gatewayCluster:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the cluster. e.g.
            accounts/my-account/clusters/my-cluster
          readOnly: true
        displayName:
          type: string
          description: |-
            Human-readable display name of the cluster. e.g. "My Cluster"
            Must be fewer than 64 characters long.
        createTime:
          type: string
          format: date-time
          description: The creation time of the cluster.
          readOnly: true
        eksCluster:
          $ref: '#/components/schemas/gatewayEksCluster'
        fakeCluster:
          $ref: '#/components/schemas/gatewayFakeCluster'
        state:
          $ref: '#/components/schemas/gatewayClusterState'
          description: The current state of the cluster.
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          description: Detailed information about the current status of the cluster.
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: The update time for the cluster.
          readOnly: true
    gatewayClusterConnectionInfo:
      type: object
      properties:
        endpoint:
          type: string
          description: The cluster's Kubernetes API server endpoint.
        caData:
          type: string
          description: Base64-encoded cluster's CA certificate.
    gatewayClusterState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - CREATING
        - READY
        - DELETING
        - FAILED
      default: STATE_UNSPECIFIED
      description: |-
        - CREATING: The cluster is still being created.
         - READY: The cluster is ready to be used.
         - DELETING: The cluster is being deleted.
         - FAILED: Cluster is not operational.
        Consult 'status' for detailed messaging.
        Cluster needs to be deleted and re-created.
    gatewayCode:
      type: string
      enum:
        - OK
        - CANCELLED
        - UNKNOWN
        - INVALID_ARGUMENT
        - DEADLINE_EXCEEDED
        - NOT_FOUND
        - ALREADY_EXISTS
        - PERMISSION_DENIED
        - UNAUTHENTICATED
        - RESOURCE_EXHAUSTED
        - FAILED_PRECONDITION
        - ABORTED
        - OUT_OF_RANGE
        - UNIMPLEMENTED
        - INTERNAL
        - UNAVAILABLE
        - DATA_LOSS
      default: OK
      description: |-
        - OK: Not an error; returned on success.

        HTTP Mapping: 200 OK
         - CANCELLED: The operation was cancelled, typically by the caller.

        HTTP Mapping: 499 Client Closed Request
         - UNKNOWN: Unknown error.  For example, this error may be returned when
        a `Status` value received from another address space belongs to
        an error space that is not known in this address space.  Also
        errors raised by APIs that do not return enough error information
        may be converted to this error.

        HTTP Mapping: 500 Internal Server Error
         - INVALID_ARGUMENT: The client specified an invalid argument.  Note that this differs
        from `FAILED_PRECONDITION`.  `INVALID_ARGUMENT` indicates arguments
        that are problematic regardless of the state of the system
        (e.g., a malformed file name).

        HTTP Mapping: 400 Bad Request
         - DEADLINE_EXCEEDED: The deadline expired before the operation could complete. For operations
        that change the state of the system, this error may be returned
        even if the operation has completed successfully.  For example, a
        successful response from a server could have been delayed long
        enough for the deadline to expire.

        HTTP Mapping: 504 Gateway Timeout
         - NOT_FOUND: Some requested entity (e.g., file or directory) was not found.

        Note to server developers: if a request is denied for an entire class
        of users, such as gradual feature rollout or undocumented allowlist,
        `NOT_FOUND` may be used. If a request is denied for some users within
        a class of users, such as user-based access control, `PERMISSION_DENIED`
        must be used.

        HTTP Mapping: 404 Not Found
         - ALREADY_EXISTS: The entity that a client attempted to create (e.g., file or directory)
        already exists.

        HTTP Mapping: 409 Conflict
         - PERMISSION_DENIED: The caller does not have permission to execute the specified
        operation. `PERMISSION_DENIED` must not be used for rejections
        caused by exhausting some resource (use `RESOURCE_EXHAUSTED`
        instead for those errors). `PERMISSION_DENIED` must not be
        used if the caller can not be identified (use `UNAUTHENTICATED`
        instead for those errors). This error code does not imply the
        request is valid or the requested entity exists or satisfies
        other pre-conditions.

        HTTP Mapping: 403 Forbidden
         - UNAUTHENTICATED: The request does not have valid authentication credentials for the
        operation.

        HTTP Mapping: 401 Unauthorized
         - RESOURCE_EXHAUSTED: Some resource has been exhausted, perhaps a per-user quota, or
        perhaps the entire file system is out of space.

        HTTP Mapping: 429 Too Many Requests
         - FAILED_PRECONDITION: The operation was rejected because the system is not in a state
        required for the operation's execution.  For example, the directory
        to be deleted is non-empty, an rmdir operation is applied to
        a non-directory, etc.

        Service implementors can use the following guidelines to decide
        between `FAILED_PRECONDITION`, `ABORTED`, and `UNAVAILABLE`:
         (a) Use `UNAVAILABLE` if the client can retry just the failing call.
         (b) Use `ABORTED` if the client should retry at a higher level. For
             example, when a client-specified test-and-set fails, indicating the
             client should restart a read-modify-write sequence.
         (c) Use `FAILED_PRECONDITION` if the client should not retry until
             the system state has been explicitly fixed. For example, if an "rmdir"
             fails because the directory is non-empty, `FAILED_PRECONDITION`
             should be returned since the client should not retry unless
             the files are deleted from the directory.

        HTTP Mapping: 400 Bad Request
         - ABORTED: The operation was aborted, typically due to a concurrency issue such as
        a sequencer check failure or transaction abort.

        See the guidelines above for deciding between `FAILED_PRECONDITION`,
        `ABORTED`, and `UNAVAILABLE`.

        HTTP Mapping: 409 Conflict
         - OUT_OF_RANGE: The operation was attempted past the valid range.  E.g., seeking or
        reading past end-of-file.

        Unlike `INVALID_ARGUMENT`, this error indicates a problem that may
        be fixed if the system state changes. For example, a 32-bit file
        system will generate `INVALID_ARGUMENT` if asked to read at an
        offset that is not in the range [0,2^32-1], but it will generate
        `OUT_OF_RANGE` if asked to read from an offset past the current
        file size.

        There is a fair bit of overlap between `FAILED_PRECONDITION` and
        `OUT_OF_RANGE`.  We recommend using `OUT_OF_RANGE` (the more specific
        error) when it applies so that callers who are iterating through
        a space can easily look for an `OUT_OF_RANGE` error to detect when
        they are done.

        HTTP Mapping: 400 Bad Request
         - UNIMPLEMENTED: The operation is not implemented or is not supported/enabled in this
        service.

        HTTP Mapping: 501 Not Implemented
         - INTERNAL: Internal errors.  This means that some invariants expected by the
        underlying system have been broken.  This error code is reserved
        for serious errors.

        HTTP Mapping: 500 Internal Server Error
         - UNAVAILABLE: The service is currently unavailable.  This is most likely a
        transient condition, which can be corrected by retrying with
        a backoff. Note that it is not always safe to retry
        non-idempotent operations.

        See the guidelines above for deciding between `FAILED_PRECONDITION`,
        `ABORTED`, and `UNAVAILABLE`.

        HTTP Mapping: 503 Service Unavailable
         - DATA_LOSS: Unrecoverable data loss or corruption.

        HTTP Mapping: 500 Internal Server Error
      title: >-
        Mimics
        [https://github.com/googleapis/googleapis/blob/master/google/rpc/code.proto]
    gatewayCodeAssertion:
      type: object
      properties:
        language:
          type: string
          title: Language of the code (python/javascript)
        code:
          type: string
          title: The code to execute
        expectedOutput:
          type: string
          title: Optional expected output
        options:
          $ref: '#/components/schemas/CodeAssertionExecutionOptions'
      required:
        - language
        - code
    gatewayCodeSnippets:
      type: object
      properties:
        language:
          type: string
        fileContents:
          type: object
          additionalProperties:
            type: string
          title: File name to code snippet, default is main.py
        entryFile:
          type: string
        entryFunc:
          type: string
    gatewayConversationConfig:
      type: object
      properties:
        style:
          type: string
          description: The chat template to use.
        system:
          type: string
          description: The system prompt (if the chat style supports it).
        template:
          type: string
          description: The Jinja template (if style is "jinja").
      required:
        - style
    gatewayCreditRedemption:
      type: object
      properties:
        name:
          type: string
          description: The resource name of the credit redemption.
          readOnly: true
        creditCode:
          type: string
          title: The user-facing code of the credit code being redeemed
        createTime:
          type: string
          format: date-time
          description: >-
            The creation time of the credit redemption (i.e. when the credit
            code was redeemed).
          readOnly: true
      title: >-
        CreditRedemption represents a record of a code redemption. Stored as
        (code, account) pairs
      required:
        - creditCode
    gatewayCriterion:
      type: object
      properties:
        type:
          $ref: '#/components/schemas/gatewayCriterionType'
        name:
          type: string
        description:
          type: string
        codeSnippets:
          $ref: '#/components/schemas/gatewayCodeSnippets'
          title: Criteria for code snippet
    gatewayCriterionType:
      type: string
      enum:
        - TYPE_UNSPECIFIED
        - CODE_SNIPPETS
      default: TYPE_UNSPECIFIED
      title: '- CODE_SNIPPETS: Code snippets for Sandbox based evaluation'
    gatewayDataset:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
        createTime:
          type: string
          format: date-time
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayDatasetState'
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        exampleCount:
          type: string
          format: int64
        userUploaded:
          $ref: '#/components/schemas/gatewayUserUploaded'
        evaluationResult:
          $ref: '#/components/schemas/gatewayEvaluationResult'
        transformed:
          $ref: '#/components/schemas/gatewayTransformed'
        splitted:
          $ref: '#/components/schemas/gatewaySplitted'
        evalProtocol:
          $ref: '#/components/schemas/gatewayEvalProtocol'
        externalUrl:
          type: string
          title: The external URI of the dataset. e.g. gs://foo/bar/baz.jsonl
        format:
          $ref: '#/components/schemas/DatasetFormat'
        createdBy:
          type: string
          description: The email address of the user who initiated this fine-tuning job.
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: The update time for the dataset.
          readOnly: true
        sourceJobName:
          type: string
          description: >-
            The resource name of the job that created this dataset (e.g., batch
            inference job).

            Used for lineage tracking to understand dataset provenance.
        estimatedTokenCount:
          type: string
          format: int64
          description: The estimated number of tokens in the dataset.
          readOnly: true
        averageTurnCount:
          type: number
          format: float
          description: >-
            An estimate of the average number of turns per sample in the
            dataset.
          readOnly: true
    gatewayDatasetState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - UPLOADING
        - READY
      default: STATE_UNSPECIFIED
    gatewayDatasetValidationFormat:
      type: string
      enum:
        - DATASET_VALIDATION_FORMAT_UNSPECIFIED
        - DATASET_VALIDATION_FORMAT_CHAT
        - DATASET_VALIDATION_FORMAT_CHAT_RELAXED
        - DATASET_VALIDATION_FORMAT_RLOR
        - DATASET_VALIDATION_FORMAT_BATCH_INFERENCE_STRICT
        - DATASET_VALIDATION_FORMAT_BATCH_INFERENCE_ALLOW_TRAILING_ASSISTANT
        - DATASET_VALIDATION_FORMAT_BATCH_INFERENCE
        - DATASET_VALIDATION_FORMAT_DPO
      default: DATASET_VALIDATION_FORMAT_UNSPECIFIED
      description: >-
        - DATASET_VALIDATION_FORMAT_UNSPECIFIED: Unspecified format.
         - DATASET_VALIDATION_FORMAT_CHAT: Chat dataset format. One or more .jsonl files, in which each line is a JSON object with a "messages" list,
        containing objects with a "role" field and a "content" field.

        {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
        "assistant", "content": "Hello, user!"}]}
         - DATASET_VALIDATION_FORMAT_CHAT_RELAXED: Chat dataset relaxed format. Same as DATASET_VALIDATION_FORMAT_CHAT, but optionally allows the assistant message to be missing, and allows extra fields.
         - DATASET_VALIDATION_FORMAT_RLOR: RLOR dataset format. One or more .jsonl files, in which each line is a JSON object with a "samples" array,
        containing objects with "messages" list and optional metrics keys with
        values ranging from 0.0 to 1.0.

        {"samples": [{"messages": [{"role": "user", "content": "..."}, {"role":
        "assistant", "content": "..."}], "metric_key": 1.0}, ...]}
         - DATASET_VALIDATION_FORMAT_BATCH_INFERENCE_STRICT: Batch inference strict format. Same as chat except no trailing assistant messages allowed.
         - DATASET_VALIDATION_FORMAT_BATCH_INFERENCE_ALLOW_TRAILING_ASSISTANT: Batch inference format with trailing assistant messages allowed. Same as chat except trailing assistant messages are optional.
         - DATASET_VALIDATION_FORMAT_BATCH_INFERENCE: Batch inference format (new). It will allow chat format and also OpenAI format.
         - DATASET_VALIDATION_FORMAT_DPO: DPO format (new). It will allow DPO format.
    gatewayDatasetValidationJob:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
        createTime:
          type: string
          format: date-time
          readOnly: true
        createdBy:
          type: string
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayJobState'
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        datasetName:
          type: string
          description: The name of the dataset to validate.
        format:
          $ref: '#/components/schemas/gatewayDatasetValidationFormat'
          description: >-
            The format of the dataset. See DatasetValidationFormat for more
            details.
        result:
          $ref: '#/components/schemas/gatewayDatasetValidationJobResult'
          readOnly: true
        validationError:
          type: string
          readOnly: true
        datasetNames:
          type: array
          items:
            type: string
          description: The name of the dataset to validate.
        updateTime:
          type: string
          format: date-time
          description: The update time for the dataset validation job.
          readOnly: true
        rewards:
          type: array
          items:
            type: string
          description: A list of reward metrics to validate.
        baseModel:
          type: string
          description: >-
            Resource name of the base model associated with the parent job. This
            is used to determine the tokenizer.
        skipChatTemplateValidation:
          type: boolean
          description: If set, chat template validation is disabled.
      required:
        - datasetName
        - format
    gatewayDatasetValidationJobResult:
      type: string
      enum:
        - DATASET_VALIDATION_JOB_RESULT_UNSPECIFIED
        - DATASET_VALIDATION_JOB_RESULT_SUCCESS
        - DATASET_VALIDATION_JOB_RESULT_FAILURE
      default: DATASET_VALIDATION_JOB_RESULT_UNSPECIFIED
    gatewayDebugReinforcementFineTuningJobResponse:
      type: object
      properties:
        name:
          type: string
          description: The resource name of the reinforcement fine-tuning job.
        failedJobName:
          type: string
          readOnly: true
      required:
        - name
    gatewayDeployedModel:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name. e.g.
            accounts/my-account/deployedModels/my-deployed-model
          readOnly: true
        displayName:
          type: string
        description:
          type: string
          description: Description of the resource.
        createTime:
          type: string
          format: date-time
          description: The creation time of the resource.
          readOnly: true
        model:
          type: string
          title: |-
            The resource name of the model to be deployed.
            e.g. accounts/my-account/models/my-model
        deployment:
          type: string
          description: The resource name of the base deployment the model is deployed to.
        default:
          type: boolean
          description: >-
            If true, this is the default target when querying this model without

            the `#<deployment>` suffix.

            The first deployment a model is deployed to will have this field set
            to true.
        state:
          $ref: '#/components/schemas/gatewayDeployedModelState'
          description: The state of the deployed model.
          readOnly: true
        serverless:
          type: boolean
          title: True if the underlying deployment is managed by Fireworks
        status:
          $ref: '#/components/schemas/gatewayStatus'
          description: Contains model deploy/undeploy details.
          readOnly: true
        public:
          type: boolean
          description: If true, the deployed model will be publicly reachable.
        updateTime:
          type: string
          format: date-time
          description: The update time for the deployed model.
          readOnly: true
    gatewayDeployedModelRef:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name. e.g.
            accounts/my-account/deployedModels/my-deployed-model
          readOnly: true
        deployment:
          type: string
          description: The resource name of the base deployment the model is deployed to.
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayDeployedModelState'
          description: The state of the deployed model.
          readOnly: true
        default:
          type: boolean
          description: >-
            If true, this is the default target when querying this model without

            the `#<deployment>` suffix.

            The first deployment a model is deployed to will have this field set
            to

            true automatically.
          readOnly: true
        public:
          type: boolean
          description: If true, the deployed model will be publicly reachable.
          readOnly: true
    gatewayDeployedModelState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - UNDEPLOYING
        - DEPLOYING
        - DEPLOYED
        - UPDATING
      default: STATE_UNSPECIFIED
      description: |-
        - UNDEPLOYING: The model is being undeployed.
         - DEPLOYING: The model is being deployed.
         - DEPLOYED: The model is deployed and ready for inference.
         - UPDATING: there are updates happening with the deployed model
    gatewayDeployment:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the deployment. e.g.
            accounts/my-account/deployments/my-deployment
          readOnly: true
        displayName:
          type: string
          description: |-
            Human-readable display name of the deployment. e.g. "My Deployment"
            Must be fewer than 64 characters long.
        description:
          type: string
          description: Description of the deployment.
        createTime:
          type: string
          format: date-time
          description: The creation time of the deployment.
          readOnly: true
        expireTime:
          type: string
          format: date-time
          description: >-
            Deprecated: This field is deprecated and no longer causes
            auto-deletion.

            The time at which this deployment will automatically be deleted.
        purgeTime:
          type: string
          format: date-time
          description: The time at which the resource will be hard deleted.
          readOnly: true
        deleteTime:
          type: string
          format: date-time
          description: The time at which the resource will be soft deleted.
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayDeploymentState'
          description: The state of the deployment.
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          description: Detailed status information regarding the most recent operation.
          readOnly: true
        annotations:
          type: object
          additionalProperties:
            type: string
          description: >-
            Annotations to identify deployment properties.

            Key/value pairs may be used by external tools or other services.

            The "image-tag-reason" key is redacted from API responses for
            non-superuser principals.
        minReplicaCount:
          type: integer
          format: int32
          description: |-
            The minimum number of replicas.
            If not specified, the default is 0.
        maxReplicaCount:
          type: integer
          format: int32
          description: |-
            The maximum number of replicas.
            If not specified, the default is max(min_replica_count, 1).
            May be set to 0 to downscale the deployment to 0.
        maxWithRevocableReplicaCount:
          type: integer
          format: int32
          description: >-
            max_with_revocable_replica_count is max replica count including
            revocable capacity.

            The max revocable capacity will be max_with_revocable_replica_count
            - max_replica_count.
        desiredReplicaCount:
          type: integer
          format: int32
          description: >-
            The desired number of replicas for this deployment. This represents
            the target

            replica count that the system is trying to achieve.
          readOnly: true
        replicaCount:
          type: integer
          format: int32
          readOnly: true
        autoscalingPolicy:
          $ref: '#/components/schemas/gatewayAutoscalingPolicy'
        baseModel:
          type: string
          title: The base model name. e.g. accounts/fireworks/models/falcon-7b
        acceleratorCount:
          type: integer
          format: int32
          description: >-
            The number of accelerators used per replica.

            If not specified, the default is the estimated minimum required by
            the

            base model.
        acceleratorType:
          $ref: '#/components/schemas/gatewayAcceleratorType'
          description: The type of accelerator to use.
        precision:
          $ref: '#/components/schemas/DeploymentPrecision'
          description: The precision with which the model should be served.
        cluster:
          type: string
          description: If set, this deployment is deployed to a cloud-premise cluster.
          readOnly: true
        enableAddons:
          type: boolean
          description: If true, PEFT addons are enabled for this deployment.
        draftTokenCount:
          type: integer
          format: int32
          description: >-
            The number of candidate tokens to generate per step for speculative

            decoding.

            Default is the base model's draft_token_count. Set

            CreateDeploymentRequest.disable_speculative_decoding to false to
            disable

            this behavior.
        draftModel:
          type: string
          description: >-
            The draft model name for speculative decoding. e.g.
            accounts/fireworks/models/my-draft-model

            If empty, speculative decoding using a draft model is disabled.

            Default is the base model's default_draft_model. Set

            CreateDeploymentRequest.disable_speculative_decoding to false to
            disable

            this behavior.
        ngramSpeculationLength:
          type: integer
          format: int32
          description: >-
            The length of previous input sequence to be considered for N-gram
            speculation.
        enableSessionAffinity:
          type: boolean
          description: |-
            Whether to apply sticky routing based on `user` field.
            Serverless will be set to true when creating deployment.
        directRouteApiKeys:
          type: array
          items:
            type: string
          description: >-
            The set of API keys used to access the direct route deployment. If
            direct routing is not enabled, this field is unused.
        numPeftDeviceCached:
          type: integer
          format: int32
          title: How many peft adapters to keep on gpu side for caching
        directRouteType:
          $ref: '#/components/schemas/gatewayDirectRouteType'
          description: >-
            If set, this deployment will expose an endpoint that bypasses the
            Fireworks API gateway.
        directRouteHandle:
          type: string
          description: >-
            The handle for calling a direct route. The meaning of the handle
            depends on the

            direct route type of the deployment:
               INTERNET                    -> The host name for accessing the deployment
               GCP_PRIVATE_SERVICE_CONNECT -> The service attachment name used to create the PSC endpoint.
               AWS_PRIVATELINK             -> The service name used to create the VPC endpoint.
          readOnly: true
        deploymentTemplate:
          type: string
          description: |-
            The name of the deployment template to use for this deployment. Only
            available to enterprise accounts.
        autoTune:
          $ref: '#/components/schemas/gatewayAutoTune'
          description: The performance profile to use for this deployment.
        placement:
          $ref: '#/components/schemas/gatewayPlacement'
          description: |-
            The desired geographic region where the deployment must be placed.
            If unspecified, the default is the GLOBAL multi-region.
        region:
          $ref: '#/components/schemas/gatewayRegion'
          description: >-
            The geographic region where the deployment is presently located.
            This region may change

            over time, but within the `placement` constraint.
          readOnly: true
        maxContextLength:
          type: integer
          format: int32
          description: >-
            The maximum context length supported by the model (context window).

            If set to 0 or not specified, the model's default maximum context
            length will be used.
        updateTime:
          type: string
          format: date-time
          description: The update time for the deployment.
          readOnly: true
        disableDeploymentSizeValidation:
          type: boolean
          description: Whether the deployment size validation is disabled.
        enableHotLoad:
          type: boolean
          description: Whether to use hot load for this deployment.
        hotLoadBucketType:
          $ref: '#/components/schemas/DeploymentHotLoadBucketType'
          title: >-
            hot load bucket name, indicate what type of storage to use for hot
            load
        enableHotReloadLatestAddon:
          type: boolean
          description: >-
            Allows up to 1 addon at a time to be loaded, and will merge it into
            the base model.
        deploymentShape:
          type: string
          description: >-
            The name of the deployment shape that this deployment is using.

            On the server side, this will be replaced with the deployment shape
            version name.
        activeModelVersion:
          type: string
          description: >-
            The model version that is currently active and applied to running
            replicas of a deployment.
        targetModelVersion:
          type: string
          description: >-
            The target model version that is being rolled out to the deployment.

            In a ready steady state, the target model version is the same as the
            active model version.
        replicaStats:
          $ref: '#/components/schemas/gatewayReplicaStats'
          description: >-
            Per-replica deployment status counters. Provides visibility into the
            deployment process

            by tracking replicas in different stages of the deployment
            lifecycle.
          readOnly: true
        hotLoadBucketUrl:
          type: string
          title: >-
            For hot load bucket location

            e.g for s3: s3://mybucket/<object_path>; for GCS:
            gs://mybucket/<object_path>, no trailing slash
        pricingPlanId:
          type: string
          description: |-
            Optional pricing plan ID for custom billing configuration.
            If set, this deployment will use the pricing plan's billing rules
            instead of default billing behavior.
        hotLoadTrainerJob:
          type: string
          title: >-
            Trainer job whose hot_load_bucket_url this deployment should use.

            At creation, the trainer's bucket URL is copied into this
            deployment's

            hot_load_bucket_url. The deployment continues working after the
            trainer

            is deleted (snapshot-at-creation semantics).

            Format: accounts/{account}/rlorTrainerJobs/{job}
      required:
        - baseModel
    gatewayDeploymentAcceleratorConfig:
      type: object
      properties:
        acceleratorType:
          $ref: '#/components/schemas/gatewayAcceleratorType'
          title: 'Key: The accelerator type for this config'
        precision:
          $ref: '#/components/schemas/DeploymentPrecision'
          description: 'Key: Supported precisions.'
        minAcceleratorCount:
          type: integer
          format: int32
          description: 'Value: Minimum number of accelerators required.'
        regions:
          type: array
          items:
            $ref: '#/components/schemas/gatewayRegion'
          description: >-
            Value: Regions where the account has quota for this accelerator
            type.
        multiRegions:
          type: array
          items:
            $ref: '#/components/schemas/gatewayMultiRegion'
          description: >-
            Value: Multi-regions where the account has quota for this
            accelerator type.
    gatewayDeploymentPrerequisites:
      type: object
      properties:
        acceleratorConfigs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDeploymentAcceleratorConfig'
    gatewayDeploymentShape:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the deployment shape. e.g.
            accounts/my-account/deploymentShapes/my-deployment-shape
          readOnly: true
        displayName:
          type: string
          description: >-
            Human-readable display name of the deployment shape. e.g. "My
            Deployment Shape"

            Must be fewer than 64 characters long.
        description:
          type: string
          description: >-
            The description of the deployment shape. Must be fewer than 1000
            characters long.
        createTime:
          type: string
          format: date-time
          description: The creation time of the deployment shape.
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: The update time for the deployment shape.
          readOnly: true
        baseModel:
          type: string
          title: The base model name. e.g. accounts/fireworks/models/falcon-7b
        modelType:
          type: string
          description: The model type of the base model.
          readOnly: true
        parameterCount:
          type: string
          format: int64
          description: The parameter count of the base model .
          readOnly: true
        acceleratorCount:
          type: integer
          format: int32
          description: >-
            The number of accelerators used per replica.

            If not specified, the default is the estimated minimum required by
            the base model.
        acceleratorType:
          $ref: '#/components/schemas/gatewayAcceleratorType'
          description: |-
            The type of accelerator to use.
            If not specified, the default is NVIDIA_A100_80GB.
        precision:
          $ref: '#/components/schemas/DeploymentPrecision'
          description: The precision with which the model should be served.
        disableDeploymentSizeValidation:
          type: boolean
          description: If true, the deployment size validation is disabled.
        enableAddons:
          type: boolean
          description: >-
            If true, LORA addons are enabled for deployments created from this
            shape.
        draftTokenCount:
          type: integer
          format: int32
          description: |-
            The number of candidate tokens to generate per step for speculative
            decoding.
            Default is the base model's draft_token_count.
        draftModel:
          type: string
          description: >-
            The draft model name for speculative decoding. e.g.
            accounts/fireworks/models/my-draft-model

            If empty, speculative decoding using a draft model is disabled.

            Default is the base model's default_draft_model.

            Deprecated: set default_draft_model on the base model instead.
        ngramSpeculationLength:
          type: integer
          format: int32
          description: >-
            The length of previous input sequence to be considered for N-gram
            speculation.
        disableSpeculativeDecoding:
          type: boolean
          description: >-
            If true, speculative decoding is disabled for deployments created
            from this shape,

            even if the base model has default draft model settings.
        enableSessionAffinity:
          type: boolean
          description: Whether to apply sticky routing based on `user` field.
        numLoraDeviceCached:
          type: integer
          format: int32
          title: How many LORA adapters to keep on GPU side for caching
        maxContextLength:
          type: integer
          format: int32
          description: >-
            The maximum context length supported by the model (context window).

            If set to 0 or not specified, the model's default maximum context
            length will be used.
        presetType:
          $ref: '#/components/schemas/DeploymentShapePresetType'
          description: Type of deployment shape for different deployment configurations.
      title: >-
        A deployment shape is a set of parameters that define the shape of a
        deployment.

        Deployments are created from a deployment shape.
      required:
        - baseModel
    gatewayDeploymentShapeVersion:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the deployment shape version. e.g.
            accounts/my-account/deploymentShapes/my-deployment-shape/versions/{version_id}
          readOnly: true
        createTime:
          type: string
          format: date-time
          description: >-
            The creation time of the deployment shape version. Lists will be
            ordered by this field.
          readOnly: true
        snapshot:
          $ref: '#/components/schemas/gatewayDeploymentShape'
          description: Full snapshot of the Deployment Shape at this version.
          readOnly: true
        validated:
          type: boolean
          description: If true, this version has been validated.
        public:
          type: boolean
          description: If true, this version will be publicly readable.
        latestValidated:
          type: boolean
          description: |-
            If true, this version is the latest validated version.
            Only one version of the shape can be the latest validated version.
          readOnly: true
      title: >-
        A deployment shape version is a specific version of a deployment shape.

        Versions are immutable, only created on updates and deleted when the
        deployment shape is deleted.
    gatewayDeploymentShard:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the deployment shard. e.g.
            accounts/my-account/deployments/my-deployment/shards/my-deployment-shard
          readOnly: true
        createTime:
          type: string
          format: date-time
          description: The creation time of the deployment shard.
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: The update time for the deployment shard.
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayDeploymentShardState'
          description: The state of the deployment shard.
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          description: Detailed status information regarding the most recent operation.
          readOnly: true
        replicaStats:
          $ref: '#/components/schemas/gatewayReplicaStats'
          description: >-
            Per-replica deployment status counters for this shard.

            Provides visibility into pod states (pending, downloading,
            initializing, ready) across regions.
          readOnly: true
    gatewayDeploymentShardState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - CREATING
        - READY
        - DELETING
        - FAILED
        - UPDATING
      default: STATE_UNSPECIFIED
      description: |-
        - CREATING: The deployment shard is still being created.
         - READY: The deployment shard is ready to be used.
         - DELETING: The deployment shard is being deleted.
         - FAILED: The deployment shard failed to be created. See the `status` field for
        additional details on why it failed.
         - UPDATING: There are in-progress updates happening with the deployment shard.
    gatewayDeploymentState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - CREATING
        - READY
        - DELETING
        - FAILED
        - UPDATING
        - DELETED
      default: STATE_UNSPECIFIED
      description: |-
        - CREATING: The deployment is still being created.
         - READY: The deployment is ready to be used.
         - DELETING: The deployment is being deleted.
         - FAILED: The deployment failed to be created. See the `status` field for
        additional details on why it failed.
         - UPDATING: There are in-progress updates happening with the deployment.
         - DELETED: The deployment is soft-deleted.
    gatewayDeploymentTemplate:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the template. e.g.
            accounts/my-account/deploymentTemplates/my-template
          readOnly: true
        displayName:
          type: string
          description: >-
            Human-readable display name of the deployment template. e.g. "My
            Template"

            Must be fewer than 64 characters long.
        description:
          type: string
          description: Description of the deployment template.
        createTime:
          type: string
          format: date-time
          description: The creation time of the deployment template.
          readOnly: true
        createdBy:
          type: string
          description: The email address of the user who created this deployment template.
          readOnly: true
        minReplicaCount:
          type: integer
          format: int32
        maxReplicaCount:
          type: integer
          format: int32
        autoscalingPolicy:
          $ref: '#/components/schemas/gatewayAutoscalingPolicy'
        baseModel:
          type: string
        acceleratorCount:
          type: integer
          format: int32
        acceleratorType:
          $ref: '#/components/schemas/gatewayAcceleratorType'
        enableAddons:
          type: boolean
        draftModel:
          type: string
        ngramSpeculationLength:
          type: integer
          format: int32
        draftTokenCount:
          type: integer
          format: int32
        enableSessionAffinity:
          type: boolean
        region:
          $ref: '#/components/schemas/gatewayRegion'
        directRouteApiKeys:
          type: array
          items:
            type: string
        directRouteType:
          $ref: '#/components/schemas/gatewayDirectRouteType'
        precision:
          $ref: '#/components/schemas/DeploymentPrecision'
        updateTime:
          type: string
          format: date-time
          description: The update time for the deployment template.
          readOnly: true
        maxContextLength:
          type: integer
          format: int32
          description: >-
            The maximum context length supported by the model (context window).

            If set to 0 or not specified, the model's default maximum context
            length will be used.
        disableDeploymentSizeValidation:
          type: boolean
          description: Whether the deployment size validation is disabled.
    gatewayDeveloperPass:
      type: object
      properties:
        name:
          type: string
          title: |-
            The resource name of the developer pass.
            Format: accounts/{account_id}/developerPasses/{developer_pass_id}
          readOnly: true
        autoRenew:
          type: boolean
          description: Whether the developer pass will automatically renew upon expiry.
        endTime:
          type: string
          format: date-time
          description: The time at which the developer pass ends.
          readOnly: true
        createTime:
          type: string
          format: date-time
          description: Creation timestamp.
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: Last update timestamp.
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayDeveloperPassState'
          description: State of the developer pass (active or expired).
        lastRenewTime:
          type: string
          format: date-time
          description: The time at which the developer pass was last renewed.
          readOnly: true
      title: DeveloperPass represents an account-level usage pass
    gatewayDeveloperPassState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - ACTIVE
        - EXPIRED
      default: STATE_UNSPECIFIED
    gatewayDirectRouteType:
      type: string
      enum:
        - DIRECT_ROUTE_TYPE_UNSPECIFIED
        - INTERNET
        - GCP_PRIVATE_SERVICE_CONNECT
        - AWS_PRIVATELINK
      default: DIRECT_ROUTE_TYPE_UNSPECIFIED
      title: |-
        - DIRECT_ROUTE_TYPE_UNSPECIFIED: No direct routing
         - INTERNET: The direct route is exposed via the public internet
         - GCP_PRIVATE_SERVICE_CONNECT: The direct route is exposed via GCP Private Service Connect
         - AWS_PRIVATELINK: The direct route is exposed via AWS PrivateLink
    gatewayDpoConfig:
      type: object
      properties:
        beta:
          type: number
          format: float
          description: DPO temperature parameter (beta in the paper).
        refCacheConcurrency:
          type: integer
          format: int32
          description: Max concurrent reference forward passes during cache warm-up.
        refCacheBatchSize:
          type: integer
          format: int32
          description: >-
            Number of preference pairs per reference forward call during
            caching.
      description: Hyperparameters for Direct Preference Optimization (DPO) training.
    gatewayDpoJob:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
        createTime:
          type: string
          format: date-time
          readOnly: true
        completedTime:
          type: string
          format: date-time
          readOnly: true
        dataset:
          type: string
          description: The name of the dataset used for training.
        state:
          $ref: '#/components/schemas/gatewayJobState'
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        createdBy:
          type: string
          description: The email address of the user who initiated this dpo job.
          readOnly: true
        trainingConfig:
          $ref: '#/components/schemas/gatewayBaseTrainingConfig'
          description: Common training configurations.
        wandbConfig:
          $ref: '#/components/schemas/gatewayWandbConfig'
          description: The Weights & Biases team/user account for logging job progress.
        trainerLogsSignedUrl:
          type: string
          description: |-
            The signed URL for the trainer logs file (stdout/stderr).
            Only populated if the account has trainer log reading enabled.
          readOnly: true
        lossConfig:
          $ref: '#/components/schemas/gatewayReinforcementLearningLossConfig'
          description: |-
            Loss configuration for the training job.
            If not specified, defaults to DPO loss.
            Set method to ORPO for ORPO training.
        awsS3Config:
          $ref: '#/components/schemas/gatewayAwsS3Config'
          description: The AWS configuration for S3 dataset access.
        azureBlobStorageConfig:
          $ref: '#/components/schemas/gatewayAzureBlobStorageConfig'
          description: The Azure configuration for Azure Blob Storage dataset access.
        purpose:
          $ref: '#/components/schemas/gatewayPurpose'
          description: Scheduling purpose for this job.
      required:
        - dataset
    gatewayEagleTrainingJob:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
        createTime:
          type: string
          format: date-time
          readOnly: true
        createdBy:
          type: string
          description: The email address of the user who created this EAGLE training job.
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayJobState'
          description: State of the training job.
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          description: Information describing success or failure of the training job.
          readOnly: true
        inputDraftModel:
          type: string
          description: |-
            The name of a model with Kind=DRAFT_ADDON. Model architecture must
            be either Llama or Mixtral and compatible with the base model.
            Example: "accounts/example-account/models/example-draft-model".
        trainingDataset:
          type: string
          description: |-
            The name of the dataset to use for training the eagle model.
            Example: "accounts/example-account/datasets/example-dataset".
        baseModel:
          type: string
          description: |-
            The name of the base model to use for training the eagle model.
            Model architecture must be either Llama or Mixtral.
            Kind must be HF_BASE_MODEL.
            Example: "accounts/example-account/models/example-base-model".
            If not specified, will be inferred from source_deployment or
            hidden_states_gen_config.deployed_model when available.
        outputDraftModel:
          type: string
          description: |-
            The name of the output draft model that is generated as the
            output of this job.
            Example: "accounts/example-account/models/example-draft-model".
        epochsCount:
          type: number
          format: float
          description: |-
            Number of epochs to train for. If absent, it will be set
            automatically.
        learningRate:
          type: number
          format: float
          description: |-
            Learning rate for the EAGLE training job. If absent, it will be set
            automatically.
        wandbConfig:
          $ref: '#/components/schemas/gatewayWandbConfig'
          description: >-
            The Weights & Biases team/user account for logging training
            progress.
        updateTime:
          type: string
          format: date-time
          description: The update time for the eagle training job.
          readOnly: true
        hiddenStatesGenConfig:
          $ref: '#/components/schemas/gatewayHiddenStatesGenConfig'
          description: Configuration for hidden states generation during data preparation.
        intermediateSize:
          type: integer
          format: int32
          description: Intermediate size for model architecture.
        numHiddenLayers:
          type: integer
          format: int32
          description: Number of hidden layers for model architecture.
        maxContextLen:
          type: integer
          format: int32
          description: The maximum context length to use with the model.
        batchSize:
          type: integer
          format: int32
          description: >-
            The maximum number of tokens per batch for training in sequence
            packing.
        skipDataGen:
          type: boolean
          description: Whether to skip data generation and use existing data.
        dataGenOnly:
          type: boolean
          description: Whether to only perform data generation without training.
        trainingStyle:
          $ref: '#/components/schemas/EagleTrainingJobTrainingStyle'
          description: The training style for the EAGLE training job.
    gatewayEksCluster:
      type: object
      properties:
        awsAccountId:
          type: string
          description: The 12-digit AWS account ID where this cluster lives.
        fireworksManagerRole:
          type: string
          title: >-
            The IAM role ARN used to manage Fireworks resources on AWS.

            If not specified, the default is
            arn:aws:iam::<aws_account_id>:role/FireworksManagerRole
        region:
          type: string
          description: >-
            The AWS region where this cluster lives. See
            https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html

            for a list of available regions.
        clusterName:
          type: string
          description: The EKS cluster name.
        storageBucketName:
          type: string
          description: The S3 bucket name.
        metricWriterRole:
          type: string
          description: >-
            The IAM role ARN used by Google Managed Prometheus role that will
            write metrics

            to Fireworks managed Prometheus. The role must be assumable by the

            `system:serviceaccount:gmp-system:collector` service account on the
            EKS cluster.

            If not specified, no metrics will be written to GCP.
        loadBalancerControllerRole:
          type: string
          description: >-
            The IAM role ARN used by the EKS load balancer controller (i.e. the
            load balancer

            automatically created for the k8s gateway resource). If not
            specified, no gateway

            will be created.
        workloadIdentityPoolProviderId:
          type: string
          title: |-
            The ID of the GCP workload identity pool provider in the Fireworks
            project for this cluster. The pool ID is assumed to be "byoc-pool"
        inferenceRole:
          type: string
          description: The IAM role ARN used by the inference pods on the cluster.
      title: An Amazon Elastic Kubernetes Service cluster.
      required:
        - awsAccountId
        - region
    gatewayEstimateSupervisedFineTuningJobCostResponse:
      type: object
      properties:
        estimatedCost:
          $ref: '#/components/schemas/typeMoney'
          title: An estimate of the fine-tuning costs
    gatewayEvalProtocol:
      type: object
    gatewayEvaluateOptions:
      type: object
      properties:
        maxConcurrency:
          type: integer
          format: int32
          title: 'Maximum concurrent requests (default: 4)'
        repeat:
          type: integer
          format: int32
          title: 'Number of times to repeat each test case (default: 1)'
        delay:
          type: integer
          format: int32
          title: Delay (in ms) between API calls
      title: Options for how to run the evaluation
    gatewayEvaluation:
      type: object
      properties:
        name:
          type: string
          title: Current fields in your proto
          readOnly: true
        createTime:
          type: string
          format: date-time
          readOnly: true
        createdBy:
          type: string
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        evaluationType:
          type: string
          title: string llm_evaluator_prompt = 6;
        description:
          type: string
          title: Optional description of the evaluation
        providers:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayProvider'
          title: One or more providers to use
        assertions:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayAssertion'
          title: One or more assertions to evaluate
        updateTime:
          type: string
          format: date-time
          description: The update time for the evaluation.
          readOnly: true
      required:
        - evaluationType
        - providers
        - assertions
    gatewayEvaluationJob:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
        createTime:
          type: string
          format: date-time
          readOnly: true
        createdBy:
          type: string
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayJobState'
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        evaluator:
          type: string
          description: >-
            The fully-qualified resource name of the Evaluation used by this
            job.


            Format: accounts/{account_id}/evaluators/{evaluator_id}
        inputDataset:
          type: string
          description: >-
            The fully-qualified resource name of the input Dataset used by this
            job.


            Format: accounts/{account_id}/datasets/{dataset_id}
        outputDataset:
          type: string
          description: >-
            The fully-qualified resource name of the output Dataset created by
            this job.


            Format: accounts/{account_id}/datasets/{output_dataset_id}
        metrics:
          type: object
          additionalProperties:
            type: number
            format: double
          readOnly: true
        outputStats:
          type: string
          description: The output dataset's aggregated stats for the evaluation job.
        updateTime:
          type: string
          format: date-time
          description: The update time for the evaluation job.
          readOnly: true
        awsS3Config:
          $ref: '#/components/schemas/gatewayAwsS3Config'
          description: The AWS configuration for S3 dataset access.
      required:
        - evaluator
        - inputDataset
        - outputDataset
    gatewayEvaluationResult:
      type: object
      properties:
        evaluationJobId:
          type: string
      required:
        - evaluationJobId
    gatewayEvaluator:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
        description:
          type: string
        createTime:
          type: string
          format: date-time
          readOnly: true
        createdBy:
          type: string
          readOnly: true
        updateTime:
          type: string
          format: date-time
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayEvaluatorState'
          readOnly: true
        criteria:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayCriterion'
          title: >-
            Criteria for the evaluator, it should produce a score for the metric
            (name of criteria)

            Used for eval3 with UI upload path
        requirements:
          type: string
          title: Content for the requirements.txt for package installation
        entryPoint:
          type: string
          title: >-
            entry point of the evaluator inside the codebase. In
            module::function or path::function format
        status:
          $ref: '#/components/schemas/gatewayStatus'
          title: Status of the evaluator, used to expose build status to the user
          readOnly: true
        commitHash:
          type: string
          title: Commit hash of this evaluator from the user's original codebase
        source:
          $ref: '#/components/schemas/EvaluatorSource'
          description: Source information for the evaluator codebase.
        defaultDataset:
          type: string
          title: Default dataset that is associated with the evaluator
    gatewayEvaluatorState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - ACTIVE
        - BUILDING
        - BUILD_FAILED
      default: STATE_UNSPECIFIED
      title: |-
        - ACTIVE: The evaluator is ready to use for evaluation
         - BUILDING: The evaluator is being built, i.e. building the e2b template
         - BUILD_FAILED: The evaluator build failed, and it cannot be used for evaluation
    gatewayEvaluatorVersion:
      type: object
      properties:
        name:
          type: string
          description: The name of the evaluator revision.
          readOnly: true
        snapshot:
          $ref: '#/components/schemas/gatewayEvaluator'
          description: The snapshot of the evaluator at the time the revision was created.
          readOnly: true
        createTime:
          type: string
          format: date-time
          description: The timestamp that the revision was created.
          readOnly: true
        alternateIds:
          type: array
          items:
            type: string
          description: >-
            Other revision IDs that share the same snapshot (e.g. aliases like
            "latest").
          readOnly: true
    gatewayEvenLoadStrategy:
      type: object
      description: >-
        Dynamically adjust traffic allocation to balance the load per replica
        across

        the deployments as much as possible.
    gatewayExample:
      type: object
      properties:
        content:
          type: string
    gatewayExportBillingMetricsResponse:
      type: object
      properties:
        signedUrls:
          type: array
          items:
            type: string
          description: |-
            The signed URL of the exported file(s).
            There will be exactly one file. This may change in the future.
    gatewayFakeCluster:
      type: object
      properties:
        projectId:
          type: string
        location:
          type: string
        clusterName:
          type: string
      title: A fake cluster using https://pkg.go.dev/k8s.io/client-go/kubernetes/fake
    gatewayFeatureFlag:
      type: object
      properties:
        name:
          type: string
          title: The resource name, e.g. accounts/my-account/featureFlags/my-feature
          readOnly: true
        value:
          type: string
          description: |-
            Additional human-readable feature value if needed.

            If not set, the default value is "true".
        createTime:
          type: string
          format: date-time
          readOnly: true
    gatewayGenericDeployment:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        releaseValues:
          type: object
          title: Helm file fields with values for the generic deployment
        placement:
          $ref: '#/components/schemas/gatewayPlacement'
          title: Regions where the deployment should be placed
        createTime:
          type: string
          format: date-time
          description: The creation time of the generic deployment.
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: The update time for the generic deployment.
          readOnly: true
        genericDeploymentType:
          type: string
          title: >-
            Name of the GenericDeploymentType or GenericDeploymentTypeVersion.

            If the value for the field carries the resource name of
            GenericDeploymentType then the latest version will be used
        state:
          $ref: '#/components/schemas/gatewayGenericDeploymentState'
          readOnly: true
      title: >-
        The message represent the values for a helm chart used to create a
        release
      required:
        - releaseValues
        - placement
        - genericDeploymentType
    gatewayGenericDeploymentState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - CREATING
        - READY
        - FAILED
        - UPDATING
        - DELETING
        - DELETED
      default: STATE_UNSPECIFIED
      description: |-
        - CREATING: The deployment is still being created.
         - READY: The deployment is ready to be used.
         - FAILED: The deployment failed to be created.
         - UPDATING: There are in-progress updates happening with the deployment.
         - DELETING: The deployment is being deleted
         - DELETED: The deployment has been deleted.
    gatewayGenericDeploymentType:
      type: object
      properties:
        name:
          type: string
          description: The resource name of the generic deployment type.
          readOnly: true
        createTime:
          type: string
          format: date-time
          description: The creation time of the generic deployment type.
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: The update time for the generic deployment type.
          readOnly: true
        imageTag:
          type: string
          title: The image tag of the helm chart associated with the resource
    gatewayGenericDeploymentTypeVersion:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the generic deployment type version. e.g.
            accounts/{AccountId}/genericDeploymentTypes/{GenericDeploymentTypeId}/versions/{VersionId}
          readOnly: true
        createTime:
          type: string
          format: date-time
          description: >-
            The creation time of the generic deployment type version. Lists will
            be ordered by this field.
          readOnly: true
        snapshot:
          $ref: '#/components/schemas/gatewayGenericDeploymentType'
          description: Full snapshot of the GenericDeploymentType at this version.
          readOnly: true
    gatewayGetAccountServerlessTokenUsageResponse:
      type: object
      properties:
        averageTokensPerMinuteByBaseModel:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTimeSeries'
        totalPeakGeneratedTokensPerMinute:
          $ref: '#/components/schemas/gatewayTimeSeries'
          description: Account-wide peak TPM by token category.
        totalPeakUncachedPromptTokensPerMinute:
          $ref: '#/components/schemas/gatewayTimeSeries'
        totalPeakCachedPromptTokensPerMinute:
          $ref: '#/components/schemas/gatewayTimeSeries'
        peakGeneratedTokensPerMinuteByBaseModel:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTimeSeries'
          description: Peak TPM per base model and token category.
        peakUncachedPromptTokensPerMinuteByBaseModel:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTimeSeries'
        peakCachedPromptTokensPerMinuteByBaseModel:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTimeSeries'
    gatewayGetAccountUsageFilterOptionsRequestUsageType:
      type: string
      enum:
        - USAGE_TYPE_UNSPECIFIED
        - SERVERLESS
        - DEDICATED_DEPLOYMENT
      default: USAGE_TYPE_UNSPECIFIED
      description: >-
        Which usage stream(s) to load distinct filter values for. Mirrors
        GetAccountUsageRequest.UsageType.

        UNSPECIFIED loads filter options for both serverless and dedicated
        usage.

         - SERVERLESS: Serverless filter dimensions only (model_name, api_key_id, annotations.*).
         - DEDICATED_DEPLOYMENT: Dedicated deployment filter dimensions (deployment_name, annotations.team, .project, .environment).
    gatewayGetAccountUsageFilterOptionsResponse:
      type: object
      description: >-
        Distinct filter values for the account/time range (separate from
        GetAccountUsage).
    gatewayGetAccountUsageRequestUsageType:
      type: string
      enum:
        - USAGE_TYPE_UNSPECIFIED
        - SERVERLESS
        - DEDICATED_DEPLOYMENT
      default: USAGE_TYPE_UNSPECIFIED
      description: >-
        - USAGE_TYPE_UNSPECIFIED: Default value. When specified (or when
        usage_type field is not set),

        returns usage data for all deployment types: both serverless requests
        and dedicated deployments.
         - SERVERLESS: Returns only serverless usage data.
        Filters the response to include only usage from serverless API requests.
         - DEDICATED_DEPLOYMENT: Returns only dedicated deployment usage data.
        Filters the response to include only usage from dedicated deployments.
      title: Usage type to query usage for
    gatewayGetBatchInferenceJobInputUploadEndpointResponse:
      type: object
      properties:
        filenameToSignedUrls:
          type: object
          additionalProperties:
            type: string
          description: Signed URLs for users to upload their input to GCS.
    gatewayGetBatchInferenceJobOutputDownloadEndpointResponse:
      type: object
      properties:
        filenameToSignedUrls:
          type: object
          additionalProperties:
            type: string
          description: Signed URLs for users to download their input from GCS.
    gatewayGetBillingSummaryResponse:
      type: object
      properties:
        lineItems:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayLineItem'
          title: Individual billing line items
      title: Response for billing summary endpoint
    gatewayGetDatasetDownloadEndpointResponse:
      type: object
      properties:
        filenameToSignedUrls:
          type: object
          additionalProperties:
            type: string
          title: Signed URLs for downloading dataset files
    gatewayGetDatasetUploadEndpointResponse:
      type: object
      properties:
        filenameToSignedUrls:
          type: object
          additionalProperties:
            type: string
          title: Signed URLs for uploading dataset files
    gatewayGetDeploymentMetricsResponse:
      type: object
      properties:
        metrics:
          type: object
          additionalProperties:
            type: number
            format: double
          title: Map of metric name to metric value for the specified time range
    gatewayGetDpoJobMetricsFileResponse:
      type: object
      properties:
        signedUrl:
          type: string
          title: The signed URL for the metrics file
      title: |-
        when the JobMetrics file has been created for the DPO job
        and the file exists, we will populate this field
        empty otherwise
    gatewayGetEvaluationJobExecutionLogEndpointResponse:
      type: object
      properties:
        executionLogSignedUri:
          type: string
          description: >-
            Short-lived signed URL for the execution log file.

            Empty if the log file has not been created yet (e.g. job not started
            or still initializing).
        contentType:
          type: string
          description: |-
            Content type for the log file (e.g. "text/plain").
            Only set when execution_log_signed_uri is present.
        expireTime:
          type: string
          format: date-time
          description: |-
            Expiration time of the signed URL.
            Only set when execution_log_signed_uri is present.
      description: Response carries the stream log URL (for VirtualizedLogViewer).
    gatewayGetEvaluatorBuildLogEndpointResponse:
      type: object
      properties:
        buildLogSignedUri:
          type: string
          title: Signed URL for the build log
    gatewayGetEvaluatorSourceCodeEndpointResponse:
      type: object
      properties:
        filenameToSignedUrls:
          type: object
          additionalProperties:
            type: string
          title: Mapping from filename to signed URL for downloading the source code
    gatewayGetEvaluatorUploadEndpointResponse:
      type: object
      properties:
        filenameToSignedUrls:
          type: object
          additionalProperties:
            type: string
    gatewayGetLedgerResponse:
      type: object
      properties:
        ledger:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayLedgerEntry'
          description: The contents of the ledger.
    gatewayGetModelDownloadEndpointResponse:
      type: object
      properties:
        filenameToSignedUrls:
          type: object
          additionalProperties:
            type: string
          title: Signed URLs for for downloading model files
    gatewayGetModelUploadEndpointResponse:
      type: object
      properties:
        filenameToSignedUrls:
          type: object
          additionalProperties:
            type: string
          title: Signed URLs for uploading model files
        filenameToUnsignedUris:
          type: object
          additionalProperties:
            type: string
          description: >-
            Unsigned URIs (e.g. s3://bucket/key, gs://bucket/key) for uploading
            model files.

            Returned when the caller has permission to upload to the URIs.
    gatewayGetModelVersionCountResponse:
      type: object
      properties:
        count:
          type: integer
          format: int32
    gatewayGetOAuthArgumentsResponse:
      type: object
      properties:
        issuerUrl:
          type: string
        clientId:
          type: string
        cognitoDomain:
          type: string
    gatewayGetReinforcementFineTuningJobMetricsFileResponse:
      type: object
      properties:
        signedUrl:
          type: string
          description: The signed URL for the metrics file.
    gatewayGetReservationDataResponse:
      type: object
      properties:
        reservationDataByType:
          type: array
          items:
            type: object
            $ref: >-
              #/components/schemas/GetReservationDataResponseReservationDataByType
          title: GPU counts broken down by accelerator type
      title: Response with current GPU usage and reservation counts
    gatewayGetRlorTrainerJobPublicLogsResponse:
      type: object
      properties:
        signedUrl:
          type: string
          description: >-
            Signed URL for the public trainer logs file (human-readable status
            updates).

            The URL expires after 24 hours and should be fetched dynamically
            when needed.
    gatewayGetTerminationMessageResponse:
      type: object
      properties:
        message:
          type: string
          description: The termination message.
    gatewayGetTotalHistoricalSpendResponse:
      type: object
      properties:
        spend:
          $ref: '#/components/schemas/typeMoney'
    gatewayHiddenStatesGenConfig:
      type: object
      properties:
        deployedModel:
          type: string
          description: >-
            The deployed model or deployment to use for hidden states
            generation.

            Can be either a DeployedModel resource name
            (accounts/{account}/deployedModels/{id})

            or a Deployment resource name (accounts/{account}/deployments/{id}).

            When empty and deployment_shape is set, a deployment will be
            auto-created.
        maxWorkers:
          type: integer
          format: int32
        maxTokens:
          type: integer
          format: int32
        inputOffset:
          type: integer
          format: int32
        inputLimit:
          type: integer
          format: int32
        regenerateAssistant:
          type: boolean
        outputActivations:
          type: boolean
        apiKey:
          type: string
        deploymentShape:
          type: string
          description: >-
            Deployment shape to use when auto-creating a deployment for hidden
            states

            generation. When set and deployed_model is empty, the system will

            automatically create a deployment with this shape, wait for it to
            become

            ready, and clean it up after the job completes.
        replicaCount:
          type: integer
          format: int32
          description: Number of replicas for the auto-created deployment. Defaults to 1.
      title: >-
        Config for generating dataset with hidden states for SFTJ or eagle
        training.
    gatewayIdentityProvider:
      type: object
      properties:
        name:
          type: string
          title: |-
            The resource name of the identity provider.
            Format: accounts/{account}/identityProviders/{identity_provider}
          readOnly: true
        displayName:
          type: string
          title: Display name for the identity provider
        createTime:
          type: string
          format: date-time
          title: Creation timestamp
          readOnly: true
        updateTime:
          type: string
          format: date-time
          title: Last update timestamp
          readOnly: true
        samlConfig:
          $ref: '#/components/schemas/gatewaySamlConfig'
        oidcConfig:
          $ref: '#/components/schemas/gatewayOidcConfig'
        tenantDomains:
          type: array
          items:
            type: string
          title: |-
            List of allowed domains for this identity provider
            Example: ["example.com", "example.co.uk", "example.de"]
            If not provided, domain will be derived from the account email
        state:
          $ref: '#/components/schemas/gatewayIdentityProviderState'
          title: Current state of the identity provider
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          description: Contains information about the identity provider status.
          readOnly: true
        domainUrl:
          type: string
          description: The domain URL.
          readOnly: true
        issuerUrl:
          type: string
          description: The OIDC issuer URL.
          readOnly: true
        clientId:
          type: string
          description: The OIDC client ID.
          readOnly: true
        enableJitUserProvisioning:
          type: boolean
          description: >-
            Enable Just-In-Time (JIT) user provisioning.

            When enabled, users are automatically created in the account on
            first SSO login

            if they don't already exist. When disabled, users must be
            pre-provisioned

            before they can authenticate via SSO.
        jitDefaultRole:
          type: string
          description: >-
            Default role assigned to JIT-provisioned users.

            Valid values: "admin", "user", "contributor", "inference-user".

            Only applies when enable_jit_user_provisioning is true and RBAC V2
            is enabled.

            If empty or unset, defaults to "inference-user" (least privilege).

            If RBAC V2 is not enabled for the account, JIT users always get
            "user" role.
        enforceSso:
          type: boolean
          title: >-
            Enforce SSO authentication and restrict account access to users with
            approved email domains.

            When enabled:

            - Users with email domains matching tenant_domains must authenticate
            via this identity provider

            - Users with other email domains are blocked (tenant_domains becomes
            an allowlist)

            - Superusers and API keys are exempt from this enforcement
        enableIdpInitiatedSso:
          type: boolean
          description: >-
            Enable IdP-initiated SAML (Security Assertion Markup Language)
            single sign-on.

            When enabled, users can start the login flow from their identity
            provider's

            portal (e.g., Okta app launcher) instead of from the Fireworks login
            page.

            Only supported for SAML identity providers.
    gatewayIdentityProviderState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - CREATING
        - READY
        - UPDATING
        - DELETING
      default: STATE_UNSPECIFIED
      title: State of the identity provider
    gatewayInferenceLog:
      type: object
      properties:
        name:
          type: string
          title: >-
            Resource name in the format
            "accounts/{account_id}/inference-logs/{inference_log_id}"
          readOnly: true
        createTime:
          type: string
          format: date-time
          title: The time the inference log was created
          readOnly: true
        model:
          type: string
          title: The model used for the inference
          readOnly: true
        requestType:
          type: string
          title: Request type (chat, completion, embedding, etc.)
          readOnly: true
        inputContent:
          type: string
          title: The input content provided in the request
          readOnly: true
        outputContent:
          type: string
          title: The output content generated by the model
          readOnly: true
        durationMs:
          type: string
          format: int64
          title: Duration of the request in milliseconds
          readOnly: true
        statusCode:
          type: integer
          format: int32
          title: Status code of the API call
          readOnly: true
        metadata:
          type: object
          additionalProperties:
            type: string
          title: Additional metadata about the log
          readOnly: true
        updateTime:
          type: string
          format: date-time
          title: The update time for the inference log
          readOnly: true
      title: Trace resource definition
    gatewayInvoice:
      type: object
      properties:
        id:
          type: string
          title: ID of the invoice
        amountDue:
          $ref: '#/components/schemas/typeMoney'
          description: |-
            This is the final amount required to be charged to the customer
            after any credits, discounts and customer balance have been applied.
        invoiceUrl:
          type: string
          description: A URL for the invoice portal.
        state:
          $ref: '#/components/schemas/gatewayInvoiceState'
        targetTime:
          type: string
          format: date-time
          description: The scheduled time of the invoice.
        paidTime:
          type: string
          format: date-time
          description: The timestamp when the invoice was paid.
        type:
          $ref: '#/components/schemas/gatewayInvoiceType'
          title: The invoice type
    gatewayInvoiceState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - DRAFT
        - ISSUED
        - PAID
        - VOID
        - FAILED
      default: STATE_UNSPECIFIED
    gatewayInvoiceType:
      type: string
      enum:
        - TYPE_UNSPECIFIED
        - POSTPAID_BILLING
        - PREPAID_CREDITS
        - CONTRACTED
      default: TYPE_UNSPECIFIED
    gatewayJobProgress:
      type: object
      properties:
        percent:
          type: integer
          format: int32
          description: Progress percent, within the range from 0 to 100.
        epoch:
          type: integer
          format: int32
          description: >-
            The epoch for which the progress percent is reported, usually
            starting from 0.

            This is optional for jobs that don't run in an epoch fasion, e.g.
            BIJ, EVJ.
        totalInputRequests:
          type: integer
          format: int32
          description: Total number of input requests/rows in the job.
        totalProcessedRequests:
          type: integer
          format: int32
          description: >-
            Total number of requests that have been processed (successfully or
            failed).
        successfullyProcessedRequests:
          type: integer
          format: int32
          description: Number of requests that were processed successfully.
        failedRequests:
          type: integer
          format: int32
          description: Number of requests that failed to process.
        outputRows:
          type: integer
          format: int32
          description: Number of output rows generated.
        inputTokens:
          type: integer
          format: int32
          description: Total number of input tokens processed.
        outputTokens:
          type: integer
          format: int32
          description: Total number of output tokens generated.
        cachedInputTokenCount:
          type: integer
          format: int32
          description: The number of input tokens that hit the prompt cache.
      description: Progress of a job, e.g. RLOR, EVJ, BIJ etc.
    gatewayJobState:
      type: string
      enum:
        - JOB_STATE_UNSPECIFIED
        - JOB_STATE_CREATING
        - JOB_STATE_RUNNING
        - JOB_STATE_COMPLETED
        - JOB_STATE_FAILED
        - JOB_STATE_CANCELLED
        - JOB_STATE_DELETING
        - JOB_STATE_WRITING_RESULTS
        - JOB_STATE_VALIDATING
        - JOB_STATE_DELETING_CLEANING_UP
        - JOB_STATE_PENDING
        - JOB_STATE_EXPIRED
        - JOB_STATE_RE_QUEUEING
        - JOB_STATE_CREATING_INPUT_DATASET
        - JOB_STATE_IDLE
        - JOB_STATE_CANCELLING
        - JOB_STATE_EARLY_STOPPED
        - JOB_STATE_PAUSED
        - JOB_STATE_DELETED
      default: JOB_STATE_UNSPECIFIED
      description: |-
        JobState represents the state an asynchronous job can be in.

         - JOB_STATE_PAUSED: Job is paused, typically due to account suspension or manual intervention.
         - JOB_STATE_DELETED: Job has been deleted.
    gatewayLLMAssertion:
      type: object
      properties:
        llmEvaluatorPrompt:
          type: string
          title: Prompt used to evaluate the output
        providers:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayProvider'
          title: One or more providers to use
        prompts:
          type: array
          items:
            type: string
          title: One or more prompts to evaluate
        evaluateOptions:
          $ref: '#/components/schemas/gatewayEvaluateOptions'
          title: Options for how to run the evaluation
      required:
        - providers
        - prompts
    gatewayLedgerEntry:
      type: object
      properties:
        timestamp:
          type: string
          format: date-time
          description: The timestamp of the entry.
        value:
          type: string
          description: The contents of the entry.
    gatewayLineItem:
      type: object
      properties:
        category:
          type: string
          title: Category name (e.g., "Audio Transcription", "Text Completion")
        groupingKey:
          type: string
          title: Primary grouping key (e.g., "model", "model_bucket")
        groupingValue:
          type: string
          title: >-
            Primary grouping value (e.g., "whisper-v3",
            "llama-v3p3-70b-instruct")
        secondaryGroupingKey:
          type: string
          title: Secondary grouping key (e.g., "is_batch")
        secondaryGroupingValue:
          type: string
          title: Secondary grouping value (e.g., "true", "false")
        quantity:
          type: number
          format: double
          title: Quantity consumed
        unitAmount:
          $ref: '#/components/schemas/typeMoney'
          title: Unit amount (price per unit)
        totalCost:
          $ref: '#/components/schemas/typeMoney'
          title: Total cost for this line item
      title: Individual billing line item
    gatewayListAccountServerlessRateLimitsResponse:
      type: object
      properties:
        rateLimits:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayAccountRateLimit'
          description: >-
            Snapshot of effective limits at the end of the requested range (or
            at the current time if

            `end` is unset), using the same resolution rules as the time series.
        series:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTimeSeries'
          description: >-
            Time series of peak effective limits per interval, one series per
            deployment and metric.
    gatewayListAccountsResponse:
      type: object
      properties:
        accounts:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayAccount'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of accounts.
    gatewayListApiKeysResponse:
      type: object
      properties:
        apiKeys:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayApiKey'
          description: List of API keys retrieved.
        nextPageToken:
          type: string
          title: >-
            Token for fetching the next page of results. Pagination support to
            be added.

            TODO: Implement pagination
        totalSize:
          type: integer
          format: int32
          description: The total number of API keys.
    gatewayListAuditLogsResponse:
      type: object
      properties:
        auditLogs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayAuditLogEntry'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of request logs matching the request.
    gatewayListBatchInferenceJobsResponse:
      type: object
      properties:
        batchInferenceJobs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayBatchInferenceJob'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of batch inference jobs.
    gatewayListClustersResponse:
      type: object
      properties:
        clusters:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayCluster'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of clusters.
    gatewayListCostsResponse:
      type: object
      properties:
        costDataItems:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/ListCostsResponseCostDataItem'
    gatewayListCreditRedemptionsResponse:
      type: object
      properties:
        creditRedemptions:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayCreditRedemption'
          description: The list of credit redemptions.
        nextPageToken:
          type: string
          description: |-
            A token that can be sent as page_token to retrieve the next page.
            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of redemptions matching the request.
    gatewayListDatasetValidationJobsResponse:
      type: object
      properties:
        datasetValidationJobs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDatasetValidationJob'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
    gatewayListDatasetsResponse:
      type: object
      properties:
        datasets:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDataset'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of datasets
    gatewayListDeployedModelsResponse:
      type: object
      properties:
        deployedModels:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDeployedModel'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of deployed models
    gatewayListDeploymentMetricsResponse:
      type: object
      properties:
        series:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTimeSeries'
          title: Array of time series, each with its own labels and values
    gatewayListDeploymentShapeVersionsResponse:
      type: object
      properties:
        deploymentShapeVersions:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDeploymentShapeVersion'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of deployment shape versions.
    gatewayListDeploymentShapesResponse:
      type: object
      properties:
        deploymentShapes:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDeploymentShape'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of deployment shapes.
    gatewayListDeploymentShardsResponse:
      type: object
      properties:
        deploymentShards:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDeploymentShard'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of deployment shards.
    gatewayListDeploymentTemplatesResponse:
      type: object
      properties:
        deploymentTemplates:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDeploymentTemplate'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of deployments
    gatewayListDeploymentsResponse:
      type: object
      properties:
        deployments:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDeployment'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of deployments.
    gatewayListDeveloperPassesResponse:
      type: object
      properties:
        developerPasses:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDeveloperPass'
          description: The list of developer passes.
        nextPageToken:
          type: string
          description: Token to retrieve the next page of results.
        totalSize:
          type: integer
          format: int32
          description: The total number of developer passes matching the query.
    gatewayListDpoJobsResponse:
      type: object
      properties:
        dpoJobs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDpoJob'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of dpo jobs
    gatewayListEagleTrainingJobsResponse:
      type: object
      properties:
        eagleTrainingJobs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayEagleTrainingJob'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of EAGLE training jobs.
    gatewayListEvaluationJobsResponse:
      type: object
      properties:
        evaluationJobs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayEvaluationJob'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
    gatewayListEvaluationsResponse:
      type: object
      properties:
        evaluations:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayEvaluation'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
    gatewayListEvaluatorVersionsResponse:
      type: object
      properties:
        evaluatorVersions:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayEvaluatorVersion'
        nextPageToken:
          type: string
          description: |-
            A token, which can be sent as page_token to retrieve the next page.
            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of evaluator versions.
    gatewayListEvaluatorsResponse:
      type: object
      properties:
        evaluators:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayEvaluator'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
    gatewayListFeatureFlagsResponse:
      type: object
      properties:
        featureFlags:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayFeatureFlag'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
    gatewayListGenericDeploymentTypeVersionsResponse:
      type: object
      properties:
        genericDeploymentTypeVersions:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayGenericDeploymentTypeVersion'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of generic deployment type versions.
    gatewayListGenericDeploymentTypesResponse:
      type: object
      properties:
        genericDeploymentTypes:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayGenericDeploymentType'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of generic deployment.
    gatewayListGenericDeploymentsResponse:
      type: object
      properties:
        genericDeployments:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayGenericDeployment'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of generic deployment.
    gatewayListIdentityProvidersResponse:
      type: object
      properties:
        identityProviders:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayIdentityProvider'
          title: List of identity providers
        nextPageToken:
          type: string
          title: Next page token
        totalSize:
          type: integer
          format: int32
          title: Total count
    gatewayListInferenceLogsResponse:
      type: object
      properties:
        inferenceLogs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayInferenceLog'
          title: The list of inference logs
        nextPageToken:
          type: string
          title: A token to retrieve the next page of results
        totalSize:
          type: integer
          format: int32
          title: The total number of inference logs matching the request
      title: ListInferenceLogsResponse returns the list of inference logs
    gatewayListInvoicesResponse:
      type: object
      properties:
        invoices:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayInvoice'
          title: Usage-based invoices from Orb and Stripe
        contracts:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayBillcomInvoice'
          title: Contract-based invoices from Bill.com
        nextPageToken:
          type: string
          title: dummy fields to be compatible with list.ListAndPrint
        totalSize:
          type: integer
          format: int32
    gatewayListModelsResponse:
      type: object
      properties:
        models:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayModel'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of models
    gatewayListPaymentMethodsResponse:
      type: object
      properties:
        defaultPaymentMethodId:
          type: string
        stripePaymentMethods:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/ListPaymentMethodsResponseStripePaymentMethod'
    gatewayListPricingPlansResponse:
      type: object
      properties:
        pricingPlans:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayPricingPlan'
          title: The list of pricing plans
        nextPageToken:
          type: string
          title: Token to retrieve the next page of results
        totalSize:
          type: integer
          format: int32
          title: The total number of pricing plans matching the query
    gatewayListQuotasResponse:
      type: object
      properties:
        quotas:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayQuota'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of quotas
    gatewayListReinforcementFineTuningJobsResponse:
      type: object
      properties:
        reinforcementFineTuningJobs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayReinforcementFineTuningJob'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of fine-tuning jobs
    gatewayListReservationsResponse:
      type: object
      properties:
        reservations:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayReservation'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of reservations.
    gatewayListRlorTrainerJobCheckpointsResponse:
      type: object
      properties:
        checkpoints:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayRlorTrainerJobCheckpoint'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
    gatewayListRlorTrainerJobsResponse:
      type: object
      properties:
        rlorTrainerJobs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayRlorTrainerJob'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of fine-tuning jobs
    gatewayListRoutersResponse:
      type: object
      properties:
        routers:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayRouter'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of routers
    gatewayListSecretsResponse:
      type: object
      properties:
        secrets:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewaySecret'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
          description: The total number of secrets.
    gatewayListSupervisedFineTuningJobsResponse:
      type: object
      properties:
        supervisedFineTuningJobs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewaySupervisedFineTuningJob'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of fine-tuning jobs
    gatewayListTrailsResponse:
      type: object
      properties:
        trails:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTrail'
          title: The list of trails
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          title: The total number of trails
      title: Response for listing trails
    gatewayListTrainingSessionJobsResponse:
      type: object
      properties:
        trainingSessionJobs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTrainingSessionJob'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
    gatewayListTrainingSessionsResponse:
      type: object
      properties:
        trainingSessions:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTrainingSession'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
    gatewayListTrainingShapeVersionsResponse:
      type: object
      properties:
        trainingShapeVersions:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTrainingShapeVersion'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
    gatewayListTrainingShapesResponse:
      type: object
      properties:
        trainingShapes:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTrainingShape'
        nextPageToken:
          type: string
        totalSize:
          type: integer
          format: int32
    gatewayListUsersResponse:
      type: object
      properties:
        users:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayUser'
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalSize:
          type: integer
          format: int32
          description: The total number of users.
    gatewayMetrics:
      type: string
      enum:
        - METRICS_UNSPECIFIED
        - REPLICA_COUNT
        - LOAD
        - CONCURRENT_REQUESTS
        - PROMPT_CACHE_HIT_RATE
        - REQUESTS_TOTAL
        - REQUESTS_ERROR_RATE
        - TOKENS_PROMPT_PER_REQUEST
        - TOKENS_GENERATED_PER_REQUEST
        - SPECULATIVE_HIT_TOTAL
        - SPECULATIVE_HIT_USER
        - REQUESTS_PER_SECOND
        - TOKENS_PER_SECOND
        - TOKENS_GENERATED_PER_SECOND
        - LATENCY
        - GENERATION_QUEUE_LATENCY
        - PREFILL_QUEUE_LATENCY
        - FIRST_TOKEN_LATENCY
        - GENERATION_PER_TOKEN_LATENCY
        - SERVERLESS_REQUESTS_TOTAL
        - SERVERLESS_REQUESTS_RATE_MIRROR_PER_ACCOUNT
        - SERVERLESS_REQUESTS_LIMIT_PER_ACCOUNT
        - SERVERLESS_TOKENS_PROMPT_LIMIT_PER_ACCOUNT
        - SERVERLESS_TOKENS_PROMPT_RATE_MIRROR_PER_ACCOUNT
        - SERVERLESS_TOKENS_GENERATED_LIMIT_PER_ACCOUNT
        - SERVERLESS_TOKENS_GENERATED_RATE_MIRROR_PER_ACCOUNT
        - SERVERLESS_CACHED_PROMPT_TOKENS
        - SERVERLESS_TOTAL_PROMPT_TOKENS
      default: METRICS_UNSPECIFIED
      description: |-
        - LATENCY: Percentile metrics
         - SERVERLESS_REQUESTS_TOTAL: Serverless account-specific metrics
    gatewayModel:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the model. e.g.
            accounts/my-account/models/my-model
          readOnly: true
        displayName:
          type: string
          description: |-
            Human-readable display name of the model. e.g. "My Model"
            Must be fewer than 64 characters long.
        description:
          type: string
          description: >-
            The description of the model. Must be fewer than 1000 characters
            long.
        createTime:
          type: string
          format: date-time
          description: The creation time of the model.
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayModelState'
          description: The state of the model.
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          description: Contains detailed message when the last model operation fails.
          readOnly: true
        kind:
          $ref: '#/components/schemas/ModelKind'
          description: |-
            The kind of model.
            If not specified, the default is HF_PEFT_ADDON.
        githubUrl:
          type: string
          description: The URL to GitHub repository of the model.
        huggingFaceUrl:
          type: string
          description: The URL to the Hugging Face model.
        baseModelDetails:
          $ref: '#/components/schemas/gatewayBaseModelDetails'
          description: |-
            Base model details.
            Required if kind is HF_BASE_MODEL. Must not be set otherwise.
        peftDetails:
          $ref: '#/components/schemas/gatewayPEFTDetails'
          description: |-
            PEFT addon details.
            Required if kind is HF_PEFT_ADDON or HF_TEFT_ADDON.
        teftDetails:
          $ref: '#/components/schemas/gatewayTEFTDetails'
          description: |-
            TEFT addon details.
            Required if kind is HF_TEFT_ADDON. Must not be set otherwise.
        public:
          type: boolean
          description: If true, the model will be publicly readable.
        conversationConfig:
          $ref: '#/components/schemas/gatewayConversationConfig'
          description: If set, the Chat Completions API will be enabled for this model.
        contextLength:
          type: integer
          format: int32
          description: The maximum context length supported by the model.
        supportsImageInput:
          type: boolean
          description: If set, images can be provided as input to the model.
        supportsTools:
          type: boolean
          description: >-
            If set, tools (i.e. functions) can be provided as input to the
            model,

            and the model may respond with one or more tool calls.
        importedFrom:
          type: string
          description: >-
            The name of the the model from which this was imported. This field
            is empty

            if the model was not imported.
          readOnly: true
        fineTuningJob:
          type: string
          description: >-
            If the model was created from a fine-tuning job, this is the
            fine-tuning

            job name.
          readOnly: true
        defaultDraftModel:
          type: string
          description: |-
            The default draft model to use when creating a deployment. If empty,
            speculative decoding is disabled by default.
        defaultDraftTokenCount:
          type: integer
          format: int32
          description: |-
            The default draft token count to use when creating a deployment.
            Must be specified if default_draft_model is specified.
        deployedModelRefs:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayDeployedModelRef'
          description: Populated from GetModel API call only.
          readOnly: true
        cluster:
          type: string
          description: |-
            The resource name of the BYOC cluster to which this model belongs.
            e.g. accounts/my-account/clusters/my-cluster. Empty if it belongs to
            a Fireworks cluster.
          readOnly: true
        deprecationDate:
          $ref: '#/components/schemas/typeDate'
          description: >-
            If specified, this is the date when the serverless deployment of the
            model will be taken down.
        calibrated:
          type: boolean
          description: >-
            If true, the model is calibrated and can be deployed to non-FP16
            precisions.
          readOnly: true
        tunable:
          type: boolean
          description: >-
            Deprecated: V1 training stack only — LoRA only, limited architecture
            support.

            If the model has use_training_v2=true and your account has
            AllowTrainingV2,

            use supervised_lora_tunable and supervised_full_parameter_tunable
            instead.
          readOnly: true
        supportsLora:
          type: boolean
          description: Whether this model supports LoRA.
        useHfApplyChatTemplate:
          type: boolean
          description: >-
            If true, the model will use the Hugging Face apply_chat_template API
            to apply the chat template.
        updateTime:
          type: string
          format: date-time
          description: The update time for the model.
          readOnly: true
        defaultSamplingParams:
          type: object
          additionalProperties:
            type: number
            format: float
          description: >-
            A json object that contains the default sampling parameters for the
            model.
          readOnly: true
        rlTunable:
          type: boolean
          description: >-
            Deprecated: V1 training stack only — LoRA only, limited architecture
            support.

            If the model has use_training_v2=true and your account has
            AllowTrainingV2,

            use rl_lora_tunable and rl_full_parameter_tunable instead.
          readOnly: true
        trainingContextLength:
          type: integer
          format: int32
          description: The maximum context length supported by the model.
        snapshotType:
          $ref: '#/components/schemas/ModelSnapshotType'
        supportsServerless:
          type: boolean
          description: If true, the model has a serverless deployment.
          readOnly: true
        supervisedLoraTunable:
          type: boolean
          description: >-
            V2 only. Whether the model supports LoRA supervised fine-tuning and
            DPO (lora_rank > 0).

            True when a validated LORA_TRAINER training shape exists.
          readOnly: true
        supervisedFullParameterTunable:
          type: boolean
          description: >-
            V2 only. Whether the model supports full-parameter supervised
            fine-tuning and DPO (lora_rank = 0).

            True when a validated POLICY_TRAINER training shape exists.
          readOnly: true
        rlLoraTunable:
          type: boolean
          description: >-
            V2 only. Whether the model supports LoRA reinforcement learning
            (lora_rank > 0).

            True when a validated LORA_TRAINER training shape exists plus a
            deployment shape.
          readOnly: true
        rlFullParameterTunable:
          type: boolean
          description: >-
            V2 only. Whether the model supports full-parameter reinforcement
            learning (lora_rank = 0).

            True when validated POLICY_TRAINER + FORWARD_ONLY training shapes
            exist plus a deployment shape.
          readOnly: true
    gatewayModelState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - UPLOADING
        - READY
      default: STATE_UNSPECIFIED
      description: |-
        - UPLOADING: The model is still being uploaded (upload is asynchronous).
         - READY: The model is ready to be used.
    gatewayModelVersion:
      type: object
      properties:
        name:
          type: string
          description: The resource name of the deployment snapshot.
          readOnly: true
        createTime:
          type: string
          format: date-time
          readOnly: true
        snapshot:
          $ref: '#/components/schemas/gatewayModel'
          readOnly: true
    gatewayMultiRegion:
      type: string
      enum:
        - MULTI_REGION_UNSPECIFIED
        - GLOBAL
        - US
        - EUROPE
        - APAC
      default: MULTI_REGION_UNSPECIFIED
    gatewayNotificationSettings:
      type: object
      properties:
        monthlySpendThresholds:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/typeMoney'
          description: >-
            Spend thresholds at which to send monthly usage warning emails.

            These should be below the account's monthly spend quota.

            Example: [{currency_code: "USD", units: 500}, {currency_code: "USD",
            units: 800}]
              would alert at $500 and $800.
            Note: An alert at 80% of the monthly spend limit is always added

            in addition to the thresholds specified in this list.
      description: >-
        NotificationSettings configures notification preferences for an account.

        (accounts/{account}/notificationSettings) when more notification
        categories

        are added. Requires adding singleton support to the resource generator.
    gatewayOidcConfig:
      type: object
      properties:
        issuerUrl:
          type: string
          title: OIDC issuer URL
        clientId:
          type: string
          title: Client ID
        clientSecret:
          type: string
          title: Client secret
      title: OIDC Configuration
      required:
        - issuerUrl
        - clientId
        - clientSecret
    gatewayOrpoConfig:
      type: object
      properties:
        lambda:
          type: number
          format: float
          description: Weight for the ORPO odds-ratio loss term.
      description: Hyperparameters for Odds Ratio Preference Optimization (ORPO) training.
    gatewayPEFTDetails:
      type: object
      properties:
        baseModel:
          type: string
          title: The base model name. e.g. accounts/fireworks/models/falcon-7b
        r:
          type: integer
          format: int32
          description: |-
            The rank of the update matrices.
            Must be between 4 and 64, inclusive.
        targetModules:
          type: array
          items:
            type: string
          title: >-
            This is the target modules for an adapter that we extract from

            for more information what target module means, check out

            https://huggingface.co/docs/peft/conceptual_guides/lora#common-lora-parameters-in-peft
        baseModelType:
          type: string
          description: The type of the model.
          readOnly: true
        mergeAddonModelName:
          type: string
          title: >-
            The resource name of the model to merge with base model, e.g
            accounts/fireworks/models/falcon-7b-lora
      title: PEFT addon details.
      required:
        - baseModel
        - r
        - targetModules
    gatewayPlacement:
      type: object
      properties:
        region:
          $ref: '#/components/schemas/gatewayRegion'
          description: The region where the deployment must be placed.
        multiRegion:
          $ref: '#/components/schemas/gatewayMultiRegion'
          description: The multi-region where the deployment must be placed.
        regions:
          type: array
          items:
            $ref: '#/components/schemas/gatewayRegion'
          title: The list of regions where the deployment must be placed
      description: >-
        The desired geographic region where the deployment must be placed.
        Exactly one field will be

        specified.
    gatewayPolicySettings:
      type: object
      properties:
        name:
          type: string
          title: Resource name, e.g. accounts/my-account/policySettings
          readOnly: true
        rules:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/PolicySettingsModelAccessRule'
          description: >-
            Full model allowlist (governance doc §1); empty means default-deny
            for all models.
        updateTime:
          type: string
          format: date-time
          description: The update time for the policy settings resource.
          readOnly: true
      title: >-
        Account-level policy settings (singleton per account). Holds model
        access and may grow with

        other policy sections (e.g. regional residency) without separate
        top-level API resources.
    gatewayPreviewDatasetResponse:
      type: object
      properties:
        examples:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayExample'
          description: The list of examples in the dataset for the requested page.
        nextPageToken:
          type: string
          description: >-
            A token, which can be sent as `page_token` to retrieve the next
            page.

            If this field is omitted, there are no subsequent pages.
        totalCount:
          type: integer
          format: int32
          description: The total number of examples in the dataset.
    gatewayPreviewEvaluationResponse:
      type: object
      properties:
        results:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayPreviewEvaluationResult'
          title: Results for each sample
        totalSamples:
          type: integer
          format: int32
          title: Summary statistics
        totalRuntimeMs:
          type: string
          format: int64
          title: overalll runtime
      title: Response for preview evaluation
    gatewayPreviewEvaluationResult:
      type: object
      properties:
        success:
          type: boolean
          title: The input sample
        reason:
          type: string
          title: Output from the evaluation run
        score:
          type: number
          format: double
          description: |-
            Score (if applicable)
            Deprecated: Use metrics field instead.
        messages:
          type: array
          items:
            type: object
          title: messages, which can be any kind of object
        metrics:
          type: object
          additionalProperties:
            type: number
            format: double
          title: Metrics from the evaluation run
      title: A single evaluation result from a preview
    gatewayPreviewEvaluatorResponse:
      type: object
      properties:
        results:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayPreviewEvaluatorSampleResult'
        totalSamples:
          type: integer
          format: int32
        totalRuntimeMs:
          type: string
          format: int64
        stdout:
          type: array
          items:
            type: string
        stderr:
          type: array
          items:
            type: string
    gatewayPreviewEvaluatorSampleResult:
      type: object
      properties:
        success:
          type: string
          title: >-
            Overall eval success (true) or failure (false) on the sample, empty
            string means success not defined
        score:
          type: number
          format: double
          title: Score (rollup score if applicable)
        perMetricEvals:
          type: object
          additionalProperties:
            type: object
          title: Per metric eval results
        reason:
          type: string
          title: reason for the eval result
    gatewayPricingPlan:
      type: object
      properties:
        name:
          type: string
          title: |-
            The resource name of the pricing plan
            Format: accounts/{account}/pricingPlans/{pricing_plan_id}
          readOnly: true
        billingType:
          $ref: '#/components/schemas/PricingPlanBillingType'
          title: Billing type determines how usage is tracked and billed
        tokenConfig:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/PricingPlanTokenBillingConfig'
          title: Configuration for token-based billing
        acceleratorHourConfig:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/PricingPlanAcceleratorHourBillingConfig'
          title: Configuration for accelerator-hour billing
        startTime:
          type: string
          format: date-time
          title: When this pricing plan becomes active
        endTime:
          type: string
          format: date-time
          title: When this pricing plan expires
        createTime:
          type: string
          format: date-time
          title: Creation timestamp
          readOnly: true
        updateTime:
          type: string
          format: date-time
          title: Last update timestamp
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayPricingPlanState'
          title: The state of the pricing plan
          readOnly: true
      description: >-
        PricingPlan defines a custom billing plan for an account

        Each PricingPlan is associated with a contract, and describes the
        pricing terms for a specific service.
      required:
        - billingType
        - startTime
    gatewayPricingPlanState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - CREATING
        - READY
      default: STATE_UNSPECIFIED
    gatewayPromoteCheckpointResponse:
      type: object
      properties:
        model:
          $ref: '#/components/schemas/gatewayModel'
          description: The created model.
    gatewayProvider:
      type: object
      properties:
        id:
          type: string
          title: Provider ID (e.g., "openai:gpt-4")
        config:
          type: object
          additionalProperties:
            type: string
          title: Optional provider-specific configuration
        label:
          type: string
          title: Optional label for the provider
      title: Represents an LLM provider
    gatewayPurpose:
      type: string
      enum:
        - PURPOSE_UNSPECIFIED
        - PURPOSE_PILOT
      default: PURPOSE_UNSPECIFIED
      description: Scheduling purpose for training jobs and deployments.
    gatewayQuota:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the quota, e.g.
            accounts/my-account/quotas/h100-us-iowa-1
          readOnly: true
        value:
          type: string
          format: int64
          description: >-
            The value of the quota being enforced. This may be lower than the
            max_value

            if the user manually lowers it.
        maxValue:
          type: string
          format: int64
          description: The maximum approved value.
        usage:
          type: number
          format: double
          description: The usage of the quota.
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: The update time for the quota.
          readOnly: true
    gatewayRedeemCreditCodeRequest:
      type: object
      properties:
        code:
          type: string
          description: The user-facing credit code string (e.g., "HACKATHON_2025").
        name:
          type: string
          description: The resource name of the account redeeming the credit code.
      description: >-
        RedeemCreditCodeRequest is the request to redeem a credit code for an
        account.
      required:
        - code
        - name
    gatewayRedeemCreditCodeResponse:
      type: object
      properties:
        amount:
          $ref: '#/components/schemas/typeMoney'
          description: The amount of credits redeemed.
      description: RedeemCreditCodeResponse returns the amount of credits redeemed.
    gatewayRefreshSessionTokenRequest:
      type: object
      properties:
        refreshToken:
          type: string
          description: The refresh token used to mint a fresh access token.
      description: Request to refresh an existing session JWT before it expires.
    gatewayRefreshSessionTokenResponse:
      type: object
      properties:
        token:
          type: string
          description: New signed JWT with fresh TTL and the same session claims.
        expireTime:
          type: string
          format: date-time
          description: When the new token expires.
        refreshToken:
          type: string
          description: |-
            Refresh token for subsequent refreshes. In V1 this may be unchanged;
            later versions may rotate it.
        refreshExpireTime:
          type: string
          format: date-time
          description: When the refresh token expires.
      description: Response containing the refreshed session JWT.
    gatewayRegion:
      type: string
      enum:
        - REGION_UNSPECIFIED
        - US_IOWA_1
        - US_VIRGINIA_1
        - US_VIRGINIA_2
        - US_ILLINOIS_1
        - AP_TOKYO_1
        - US_ARIZONA_1
        - US_TEXAS_1
        - US_ILLINOIS_2
        - EU_FRANKFURT_1
        - US_TEXAS_2
        - EU_ICELAND_1
        - EU_ICELAND_2
        - US_WASHINGTON_1
        - US_WASHINGTON_2
        - US_WASHINGTON_3
        - AP_TOKYO_2
        - US_CALIFORNIA_1
        - US_UTAH_1
        - US_GEORGIA_1
        - US_GEORGIA_2
        - US_WASHINGTON_4
        - US_GEORGIA_3
        - NA_BRITISHCOLUMBIA_1
        - US_GEORGIA_4
        - US_OHIO_1
        - US_NEWYORK_1
        - EU_NETHERLANDS_1
        - US_WASHINGTON_5
        - US_MINNESOTA_1
        - US_CALIFORNIA_2
        - AP_MALAYSIA_1
        - US_OHIO_2
      default: REGION_UNSPECIFIED
    gatewayReinforcementFineTuningJob:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
        createTime:
          type: string
          format: date-time
          readOnly: true
        completedTime:
          type: string
          format: date-time
          description: The completed time for the reinforcement fine-tuning job.
          readOnly: true
        dataset:
          type: string
          description: The name of the dataset used for training.
        evaluationDataset:
          type: string
          description: The name of a separate dataset to use for evaluation.
        evalAutoCarveout:
          type: boolean
          description: Whether to auto-carve the dataset for eval.
        state:
          $ref: '#/components/schemas/gatewayJobState'
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        createdBy:
          type: string
          description: The email address of the user who initiated this fine-tuning job.
          readOnly: true
        trainingConfig:
          $ref: '#/components/schemas/gatewayBaseTrainingConfig'
          description: Common training configurations.
        evaluator:
          type: string
          description: The evaluator resource name to use for RLOR fine-tuning job.
        wandbConfig:
          $ref: '#/components/schemas/gatewayWandbConfig'
          description: >-
            The Weights & Biases team/user account for logging training
            progress.
        awsS3Config:
          $ref: '#/components/schemas/gatewayAwsS3Config'
          description: The AWS configuration for S3 dataset access.
        azureBlobStorageConfig:
          $ref: '#/components/schemas/gatewayAzureBlobStorageConfig'
          description: The Azure configuration for Blob Storage dataset access.
        outputStats:
          type: string
          description: The output dataset's aggregated stats for the evaluation job.
          readOnly: true
        jobProgress:
          $ref: '#/components/schemas/gatewayJobProgress'
          description: Job progress.
          readOnly: true
        inferenceParameters:
          $ref: >-
            #/components/schemas/gatewayReinforcementFineTuningJobInferenceParameters
          description: RFT inference parameters.
        chunkSize:
          type: integer
          format: int32
          description: >-
            Data chunking for rollout, default size 200, enabled when dataset >
            300. Valid range is 1-10,000.
        outputMetrics:
          type: string
          readOnly: true
        maxInferenceReplicaCount:
          type: integer
          format: int32
          title: |-
            Maximum number of replicas to use for the deployment.
            Default is 1
        nodeCount:
          type: integer
          format: int32
          description: |-
            The number of nodes to use for the fine-tuning job.
            If not specified, the default is 1.
        lossConfig:
          $ref: '#/components/schemas/gatewayReinforcementLearningLossConfig'
          description: >-
            Reinforcement learning loss method + hyperparameters for the
            underlying trainers.
        trainerLogsSignedUrl:
          type: string
          description: |-
            The signed URL for the trainer logs file (stdout/stderr).
            Only populated if the account has trainer log reading enabled.
          readOnly: true
        acceleratorSeconds:
          type: object
          additionalProperties:
            type: string
            format: int64
          description: >-
            Accelerator seconds used by the job, keyed by accelerator type
            (e.g., "NVIDIA_H100_80GB"). Updated when job completes or is
            cancelled.
          readOnly: true
        maxConcurrentRollouts:
          type: integer
          format: int32
          description: Maximum number of concurrent rollouts during the RFT job.
        maxConcurrentEvaluations:
          type: integer
          format: int32
          description: Maximum number of concurrent evaluations during the RFT job.
        purpose:
          $ref: '#/components/schemas/gatewayPurpose'
          description: Scheduling purpose for this job.
      required:
        - dataset
        - evaluator
    gatewayReinforcementFineTuningJobInferenceParameters:
      type: object
      properties:
        maxOutputTokens:
          type: integer
          format: int32
          description: Maximum number of tokens to generate per response.
        temperature:
          type: number
          format: float
          description: Sampling temperature, typically between 0 and 2.
        topP:
          type: number
          format: float
          description: Top-p sampling parameter, typically between 0 and 1.
        responseCandidatesCount:
          type: integer
          format: int32
          title: >-
            Number of response candidates to generate per input. RFT requires at
            least 2 candidates
        extraBody:
          type: string
          description: |-
            Additional parameters for the inference request as a JSON string.
            For example: "{\"stop\": [\"\\n\"]}".
        topK:
          type: integer
          format: int32
          description: >-
            Top-k sampling parameter, limits the token selection to the top k
            tokens.
      title: RFT inference parameters
    gatewayReinforcementLearningLossConfig:
      type: object
      properties:
        method:
          $ref: '#/components/schemas/ReinforcementLearningLossConfigMethod'
        klBeta:
          type: number
          format: float
          description: |-
            KL coefficient (beta) override for GRPO-like methods.
            If unset, the trainer default is used.
        dpo:
          $ref: '#/components/schemas/gatewayDpoConfig'
          description: DPO-specific configuration. Intended for METHOD=DPO.
        orpo:
          $ref: '#/components/schemas/gatewayOrpoConfig'
          description: ORPO-specific configuration. Intended for METHOD=ORPO.
      description: >-
        Loss method + hyperparameters for reinforcement-learning-style
        fine-tuning (e.g. RFT / RL trainers).

        For preference jobs (DPO API), the default loss method is GRPO when
        METHOD_UNSPECIFIED.
    gatewayReplicaStats:
      type: object
      properties:
        pendingSchedulingReplicaCount:
          type: integer
          format: int32
          description: Number of replicas waiting to be scheduled to a node.
          readOnly: true
        downloadingModelReplicaCount:
          type: integer
          format: int32
          description: Number of replicas downloading model weights.
          readOnly: true
        initializingReplicaCount:
          type: integer
          format: int32
          description: Number of replicas initializing the model server.
          readOnly: true
        readyReplicaCount:
          type: integer
          format: int32
          description: Number of replicas that are ready and serving traffic.
          readOnly: true
        revocableReplicaCount:
          type: integer
          format: int32
          title: The number of replicas that can be revoked
          readOnly: true
        partialReplicaCount:
          type: number
          format: float
          title: Number of partial replicas before a full replica is ready
          readOnly: true
    gatewayReservation:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the reservation. e.g.
            accounts/my-account/reservations/abcdef
          readOnly: true
        displayName:
          type: string
          description: >-
            Human-readable display name of the reservation. e.g. "My
            Reservation"

            Must be fewer than 64 characters long.
        description:
          type: string
          description: Description of the reservation.
        createTime:
          type: string
          format: date-time
          description: The creation time of the reservation.
          readOnly: true
        createdBy:
          type: string
          description: The email address of the user who created this reservation.
          readOnly: true
        region:
          $ref: '#/components/schemas/gatewayRegion'
          description: >-
            The region for this reservation. Exactly one of region or
            multi_region must

            be specified.
        multiRegion:
          $ref: '#/components/schemas/gatewayMultiRegion'
          description: |-
            The multi-region for this reservation. Exactly one of region or
            multi_region must be specified.
        acceleratorType:
          $ref: '#/components/schemas/gatewayAcceleratorType'
          description: The type of accelerator for this reservation.
        reservedCount:
          type: integer
          format: int32
          description: The number of accelerators reserved.
        startTime:
          type: string
          format: date-time
          description: >-
            The timestamp at which the reservation starts (inclusive).

            If unspecified, this is midnight of the following calendar day
            (UTC).
        endTime:
          type: string
          format: date-time
          description: |-
            The timestamp at which the reservation ends (exclusive).
            If unspecified, this will be 1 calendar year after the start time.
        updateTime:
          type: string
          format: date-time
          description: The update time for the reservation.
          readOnly: true
      required:
        - acceleratorType
        - reservedCount
    gatewayRlorTrainerJob:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
        createTime:
          type: string
          format: date-time
          readOnly: true
        completedTime:
          type: string
          format: date-time
          readOnly: true
        dataset:
          type: string
          description: The name of the dataset used for training.
        evaluationDataset:
          type: string
          description: The name of a separate dataset to use for evaluation.
        evalAutoCarveout:
          type: boolean
          description: Whether to auto-carve the dataset for eval.
        state:
          $ref: '#/components/schemas/gatewayJobState'
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        createdBy:
          type: string
          description: The email address of the user who initiated this fine-tuning job.
          readOnly: true
        trainingConfig:
          $ref: '#/components/schemas/gatewayBaseTrainingConfig'
          description: Common training configurations.
        rewardWeights:
          type: array
          items:
            type: string
          description: >-
            A list of reward metrics to use for training in format of
            "<reward_name>=<weight>".
        wandbConfig:
          $ref: '#/components/schemas/gatewayWandbConfig'
          description: >-
            The Weights & Biases team/user account for logging training
            progress.
        awsS3Config:
          $ref: '#/components/schemas/gatewayAwsS3Config'
          description: The AWS configuration for S3 dataset access.
        azureBlobStorageConfig:
          $ref: '#/components/schemas/gatewayAzureBlobStorageConfig'
          description: The Azure configuration for Azure Blob Storage dataset access.
        jobProgress:
          $ref: '#/components/schemas/gatewayJobProgress'
          description: Job progress.
          readOnly: true
        keepAlive:
          type: boolean
          title: indicates this RLOR trainer job should run in keep-alive mode
        rolloutDeploymentName:
          type: string
          description: >-
            Rollout deployment name associated with this RLOR trainer job.

            This is optional. If not set, trainer will not trigger weight sync
            to rollout engine.
        lossConfig:
          $ref: '#/components/schemas/gatewayReinforcementLearningLossConfig'
          description: >-
            Reinforcement learning loss method + hyperparameters for the
            underlying trainer.
        nodeCount:
          type: integer
          format: int32
          description: |-
            The number of nodes to use for the fine-tuning job.
            If not specified, the default is 1.
        acceleratorSeconds:
          type: object
          additionalProperties:
            type: string
            format: int64
          description: >-
            Accelerator seconds used by the job, keyed by accelerator type
            (e.g., "NVIDIA_H100_80GB").

            Updated periodically.
          readOnly: true
        serviceMode:
          type: boolean
          title: >-
            Whether to deploy as a service with tinker-style api endpoints
            exposure
        directRouteHandle:
          type: string
          title: |-
            Only valid when service_mode enabled
            The direct route handle for the trainer in service mode (tinker api)
          readOnly: true
        hotLoadDeploymentId:
          type: string
          description: >-
            The deployment ID used for hot loading. When set, checkpoints are
            saved

            to this deployment's hot load bucket, enabling weight swaps on
            inference.

            Only valid for service-mode or keep-alive jobs.
        purpose:
          $ref: '#/components/schemas/gatewayPurpose'
          description: Scheduling purpose for this job.
        forwardOnly:
          type: boolean
          description: >-
            When true, run the trainer in forward-only mode (no
            backward/optimizer).

            Used for reference models in GRPO that only need forward passes.
        managedBy:
          type: string
          description: For managed service use only. Users do not need to set this field.
      title: 'Next ID: 36 (field 34 reserved for removed public_logs_signed_url)'
    gatewayRlorTrainerJobCheckpoint:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the checkpoint.

            Format:
            accounts/{account}/rlorTrainerJobs/{job}/checkpoints/{checkpoint}
          readOnly: true
        createTime:
          type: string
          format: date-time
          description: The creation time of the checkpoint (from GCS object metadata).
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: |-
            The update time of the checkpoint. Equal to create_time since
            checkpoints are immutable GCS objects.
          readOnly: true
        checkpointType:
          $ref: '#/components/schemas/gatewayCheckpointType'
          description: The type of checkpoint.
          readOnly: true
        promotable:
          type: boolean
          description: >-
            Whether this checkpoint can be promoted to a model.

            Only INFERENCE_BASE and INFERENCE_LORA checkpoints are promotable.

            INFERENCE_ARC_V2 and TRAINING_* checkpoints cannot be promoted
            directly.
          readOnly: true
      description: >-
        A checkpoint saved during an RLOR trainer job. Checkpoints are stored in
        GCS

        and discovered on-demand (not persisted in the database).
    gatewayRolloutStrategy:
      type: string
      enum:
        - ROLLOUT_STRATEGY_UNSPECIFIED
        - ROLLOUT_STRATEGY_STANDARD
        - ROLLOUT_STRATEGY_HOT_RELOAD
      default: ROLLOUT_STRATEGY_UNSPECIFIED
      description: |-
        The rollout strategy to use when deploying the model version.

         - ROLLOUT_STRATEGY_STANDARD: Standard rollout strategy updates the deployment using a normal k8s rolling restart
         - ROLLOUT_STRATEGY_HOT_RELOAD: Hot reload rollout strategy updates the deployment by hot reloading the model version on the existing replicas of the deployment
    gatewayRouter:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
        createTime:
          type: string
          format: date-time
          readOnly: true
        createdBy:
          type: string
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayRouterState'
          description: The state of the router.
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        deployments:
          type: array
          items:
            type: string
          description: The deployment names to be covered by the router.
        model:
          type: string
          description: |-
            The model name to route requests to.
            model is only applicable to single-region deployments.
            For multi-region deployments, model must be empty.
        weightedRandom:
          $ref: '#/components/schemas/gatewayWeightedRandomStrategy'
        evenLoad:
          $ref: '#/components/schemas/gatewayEvenLoadStrategy'
        aliases:
          type: array
          items:
            type: string
          title: >-
            Aliases for the router. These are the alias names that can be used
            to

            route requests through the router.

            By default, these aliases are generated by the system.

            Examples:
              "accounts/foo/deployments/my-deployment" or "foo/my-deployment"
              "accounts/foo/routers/my-router"
              "accounts/foo/models/my-base-model" - only if this is the first deployment using this model
          readOnly: true
        autoGenerated:
          type: boolean
          description: >-
            auto_generated indicates this router was generated by the system for
            a deployment.

            Auto-generated routers cannot be updated or deleted by the user.
          readOnly: true
        public:
          type: boolean
          description: >-
            True if the router is public (any account can query the underlying
            workload), false if the router is private

            (only the account that owns the router can query the underlying
            workload).
    gatewayRouterState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - CREATING
        - READY
        - UPDATING
        - DELETING
      default: STATE_UNSPECIFIED
      description: |-
        - CREATING: The router is being created.
         - READY: The router is ready for access.
         - UPDATING: There are in-progress updates happening with the router.
         - DELETING: The router is being deleted.
    gatewaySamlConfig:
      type: object
      properties:
        metadataUrl:
          type: string
          title: SAML metadata URL (must be publicly accessible)
        metadataXml:
          type: string
          title: >-
            SAML metadata XML content (use when metadata URL is not publicly
            accessible)
      description: |-
        SAML Configuration
        Exactly one of metadata_url or metadata_xml must be provided.
    gatewayScalingSchedule:
      type: object
      properties:
        minReplicaCount:
          type: integer
          format: int32
          description: >-
            Minimum number of replicas guaranteed when this schedule is active.

            When multiple schedules overlap, the effective minimum is the
            highest

            min_replica_count across all active schedules ("max wins").

            Must be >= 0 and <= the deployment's max_replica_count.
        schedule:
          type: string
          description: >-
            Cron expression defining when this schedule's window starts.

            Standard 5-field cron format: minute hour day-of-month month
            day-of-week.

            Examples: "0 8 * * Mon-Fri" (8am weekdays), "0 0 1 * *" (midnight on
            1st of month).
        duration:
          type: string
          description: >-
            Duration that the schedule remains active after the cron trigger.

            Must be between 300 seconds (5 minutes) and 604,800 seconds (7
            days),

            and use whole-second precision. Schedules needing longer windows
            should

            raise the deployment's base min_replica_count instead.

            Example: "36000s" = 10 hours (e.g., 8am to 6pm).
        timezone:
          type: string
          description: >-
            IANA timezone for the cron expression. e.g., "America/New_York",
            "Europe/London", "UTC".

            Required because cron expressions without a timezone are ambiguous.

            DST transitions are handled automatically.
        description:
          type: string
          description: |-
            Human-readable description of the schedule.
            e.g., "Weekday business hours", "Wednesday peak load".
        disabled:
          type: boolean
          description: >-
            If true, this schedule is temporarily disabled without being
            deleted.

            Useful for holidays or temporary schedule changes.
      title: >-
        A time-based scaling schedule that sets a minimum replica floor during a
        recurring window.

        Follows the GCP MIG scaling schedule pattern: cron start time +
        duration.
      required:
        - minReplicaCount
        - schedule
        - duration
        - timezone
    gatewaySecret:
      type: object
      properties:
        name:
          type: string
          title: |-
            name follows the convention
            accounts/account-id/secrets/unkey-key-id
        keyName:
          type: string
          title: name of the key. In this case, it can be WOLFRAM_ALPHA_API_KEY
        value:
          type: string
          example: sk-1234567890abcdef
          description: >-
            The secret value. This field is INPUT_ONLY and will not be returned
            in GET or LIST responses

            for security reasons. The value is only accepted when creating or
            updating secrets.
      required:
        - name
        - keyName
    gatewaySplitDatasetResponse:
      type: object
      properties:
        chunkDatasetNames:
          type: array
          items:
            type: string
          title: The resource names of the created chunk datasets
        chunksCreated:
          type: integer
          format: int32
          title: The number of chunks created
        totalExamples:
          type: string
          format: int64
          title: The total number of examples processed
      title: Response message for dataset splitting
    gatewaySplitted:
      type: object
      properties:
        sourceDatasetId:
          type: string
      required:
        - sourceDatasetId
    gatewayStatus:
      type: object
      properties:
        code:
          $ref: '#/components/schemas/gatewayCode'
          description: The status code.
        message:
          type: string
          description: A developer-facing error message in English.
      title: >-
        Mimics
        [https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto]
    gatewaySupervisedFineTuningJob:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
        createTime:
          type: string
          format: date-time
          readOnly: true
        completedTime:
          type: string
          format: date-time
          readOnly: true
        dataset:
          type: string
          description: The name of the dataset used for training.
        awsS3Config:
          $ref: '#/components/schemas/gatewayAwsS3Config'
          description: The AWS configuration for S3 dataset access.
        azureBlobStorageConfig:
          $ref: '#/components/schemas/gatewayAzureBlobStorageConfig'
          description: The Azure configuration for Azure Blob Storage dataset access.
        state:
          $ref: '#/components/schemas/gatewayJobState'
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        createdBy:
          type: string
          description: The email address of the user who initiated this fine-tuning job.
          readOnly: true
        outputModel:
          type: string
          description: >-
            The model ID to be assigned to the resulting fine-tuned model. If
            not specified, the job ID will be used.
        baseModel:
          type: string
          description: |-
            The name of the base model to be fine-tuned
            Only one of 'base_model' or 'warm_start_from' should be specified.
        warmStartFrom:
          type: string
          description: |-
            The PEFT addon model in Fireworks format to be fine-tuned from
            Only one of 'base_model' or 'warm_start_from' should be specified.
        jinjaTemplate:
          type: string
          title: >-
            The Jinja template for conversation formatting. If not specified,
            defaults to the base model's conversation template configuration
        earlyStop:
          type: boolean
          description: >-
            Whether to stop training early if the validation loss does not
            improve.
        epochs:
          type: integer
          format: int32
          description: The number of epochs to train for.
        learningRate:
          type: number
          format: float
          description: The learning rate used for training.
        maxContextLength:
          type: integer
          format: int32
          description: The maximum context length to use with the model.
        loraRank:
          type: integer
          format: int32
          description: The rank of the LoRA layers.
        wandbConfig:
          $ref: '#/components/schemas/gatewayWandbConfig'
          description: >-
            The Weights & Biases team/user account for logging training
            progress.
        evaluationDataset:
          type: string
          description: The name of a separate dataset to use for evaluation.
        isTurbo:
          type: boolean
          description: Whether to run the fine-tuning job in turbo mode.
        evalAutoCarveout:
          type: boolean
          description: Whether to auto-carve the dataset for eval.
        updateTime:
          type: string
          format: date-time
          description: The update time for the supervised fine-tuning job.
          readOnly: true
        nodes:
          type: integer
          format: int32
          description: |-
            Deprecated: multi-node scheduling is now handled by the cookbook
            orchestrator in V2 workflows. This field is ignored for V2 jobs and
            will be removed in a future release.
        batchSize:
          type: integer
          format: int32
          title: The batch size for sequence packing in training
        mtpEnabled:
          type: boolean
          description: |-
            Deprecated: MTP is not supported in V2 training. These fields are
            retained for V1 Helm-based SFT backward compatibility only.
        mtpNumDraftTokens:
          type: integer
          format: int32
          description: 'Deprecated: see mtp_enabled.'
        mtpFreezeBaseModel:
          type: boolean
          description: 'Deprecated: see mtp_enabled.'
        jobProgress:
          $ref: '#/components/schemas/gatewayJobProgress'
          description: Job progress.
          readOnly: true
        metricsFileSignedUrl:
          type: string
          title: The signed URL for the metrics file
        trainerLogsSignedUrl:
          type: string
          description: |-
            The signed URL for the trainer logs file (stdout/stderr).
            Only populated if the account has trainer log reading enabled.
          readOnly: true
        gradientAccumulationSteps:
          type: integer
          format: int32
          title: Number of gradient accumulation steps
        learningRateWarmupSteps:
          type: integer
          format: int32
          title: Number of steps for learning rate warm up
        batchSizeSamples:
          type: integer
          format: int32
          description: The number of samples per gradient batch.
        estimatedCost:
          $ref: '#/components/schemas/typeMoney'
          description: The estimated cost of the job.
          readOnly: true
        optimizerWeightDecay:
          type: number
          format: float
          description: Weight decay (L2 regularization) for optimizer.
        purpose:
          $ref: '#/components/schemas/gatewayPurpose'
          description: Scheduling purpose for this job.
      required:
        - dataset
    gatewayTEFTDetails:
      type: object
    gatewayTimeSeries:
      type: object
      properties:
        labels:
          type: object
          additionalProperties:
            type: string
          title: >-
            Labels for this time series (e.g. {"deployment": "deploy1",
            "instance": "pod1"})
        values:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/gatewayTimeSeriesPoint'
          title: Array of [timestamp, value] pairs for this series
    gatewayTimeSeriesPoint:
      type: object
      properties:
        timestamp:
          type: string
          format: int64
          title: Unix timestamp in seconds
        value:
          type: string
          title: The value at this timestamp
    gatewayTrail:
      type: object
      properties:
        name:
          type: string
          title: 'Resource name: accounts/{AccountId}/trails/{TrailId}'
          readOnly: true
        displayName:
          type: string
          title: Human-readable display name for the trail
        createTime:
          type: string
          format: date-time
          title: Creation timestamp
          readOnly: true
        updateTime:
          type: string
          format: date-time
          title: Last update timestamp
          readOnly: true
        description:
          type: string
          title: Optional description of what this trail is tracking
        createdBy:
          type: string
          title: The email address of the user who created this trail
          readOnly: true
        langfuseConfig:
          type: string
          description: >-
            Langfuse configuration for this trail, inherited from the account at
            creation time.

            Contains API credentials for a Project within the customer's
            Langfuse Organization.

            Serialized JSON format:
            {"public_key":"pk-lf-xxx","secret_key":"sk-lf-xxx","host":"https://langfuse-prod.fireworks.ai"}

            If empty, the default Langfuse Organization/Project will be used.
          readOnly: true
        defaultModel:
          type: string
          description: >-
            Default model for requests through this trail.

            Format: "{provider}/{model_id}" (e.g.,
            "anthropic/claude-3-5-sonnet-20240620").

            Can be overridden per request.
        providerKey:
          type: string
          description: >-
            Provider API key for this trail.

            When creating a trail: provide the raw API key (e.g.,
            "sk-ant-api03-xxxx...")

            After creation: this field contains a secret reference (e.g.,
            "accounts/{account_id}/secrets/trail-xxx-provider-key")

            The LiteLLM gateway retrieves the actual key from Secret Manager
            using this reference.

            Can be overridden by specifying api_key in the request body.
    gatewayTrailMetrics:
      type: object
      properties:
        totalTraceCount:
          type: string
          format: int64
          description: Total number of traces collected for this trail.
          readOnly: true
        firstTraceTime:
          type: string
          format: date-time
          description: Timestamp of the first trace in this trail.
          readOnly: true
        lastTraceTime:
          type: string
          format: date-time
          description: Timestamp of the most recent trace in this trail.
          readOnly: true
        promptTokenCount:
          type: string
          format: int64
          description: Number of prompt tokens across all traces in this trail.
          readOnly: true
        completionTokenCount:
          type: string
          format: int64
          description: Number of completion tokens across all traces in this trail.
          readOnly: true
        totalTokenCount:
          type: string
          format: int64
          description: Number of total tokens across all traces in this trail.
          readOnly: true
      description: Metrics for a trail, fetched from the tracing service.
    gatewayTrainerShardingScheme:
      type: object
      properties:
        tensorParallelism:
          type: integer
          format: int32
          description: Tensor-parallel degree. 0 means unspecified (server defaults to 1).
        pipelineParallelism:
          type: integer
          format: int32
          description: >-
            Pipeline-parallel degree. 0 means unspecified (server defaults to
            1).
        contextParallelism:
          type: integer
          format: int32
          description: Context-parallel degree. 0 means unspecified (server defaults to 1).
        expertParallelism:
          type: integer
          format: int32
          description: Expert-parallel degree. 0 means unspecified (server defaults to 1).
        sequenceParallelism:
          type: boolean
          description: Whether sequence parallelism should be enabled.
      description: Structured parallelism/sharding profile used by trainer launches.
    gatewayTrainingSession:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
          description: >-
            Human-readable display name of the training session. e.g. "Training
            session"

            Must be fewer than 64 characters long.
        createTime:
          type: string
          format: date-time
          readOnly: true
        updateTime:
          type: string
          format: date-time
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayTrainingSessionState'
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        createdBy:
          type: string
          readOnly: true
        referenceState:
          $ref: '#/components/schemas/TrainingSessionReferenceState'
          readOnly: true
      description: >-
        TrainingSession represents a single training context on a shared
        trainer.

        It starts with a base model reference and can have LoRA adapters loaded
        or

        promoted via ExecuteTrainingSessionLoadState. Each session gets its own

        inference route in the API gateway.
    gatewayTrainingSessionJob:
      type: object
      properties:
        name:
          type: string
          readOnly: true
        displayName:
          type: string
          description: >-
            Human-readable display name of the training session job. e.g.
            "Reference sessions"

            Must be fewer than 64 characters long.
        createTime:
          type: string
          format: date-time
          readOnly: true
        updateTime:
          type: string
          format: date-time
          readOnly: true
        state:
          $ref: '#/components/schemas/gatewayJobState'
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          readOnly: true
        createdBy:
          type: string
          readOnly: true
        baseModel:
          type: string
          description: >-
            Base model used for sessions created under this training session
            job.
      description: >-
        TrainingSessionJob groups all TrainingSessions for a specific job run on
        a

        given base model. Each job is scoped to an account and bound to a shared

        trainer.
      required:
        - baseModel
    gatewayTrainingSessionState:
      type: string
      enum:
        - TRAINING_SESSION_STATE_UNSPECIFIED
        - READY
        - FAILED
      default: TRAINING_SESSION_STATE_UNSPECIFIED
    gatewayTrainingShape:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the training shape. e.g.
            accounts/my-account/trainingShapes/my-training-shape
          readOnly: true
        displayName:
          type: string
          description: >-
            Human-readable display name of the training shape. e.g. "Llama3 70B
            H200 BF16"

            Must be fewer than 64 characters long.
        description:
          type: string
          description: >-
            The description of the training shape. Must be fewer than 1000
            characters long.
        createTime:
          type: string
          format: date-time
          description: The creation time of the training shape.
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: The update time for the training shape.
          readOnly: true
        baseModel:
          type: string
          title: The base model name. e.g. accounts/fireworks/models/llama-3-70b
        deploymentShapeVersion:
          type: string
          title: >-
            The validated deployment shape version used for numerics
            verification.

            e.g.
            accounts/fireworks/deploymentShapes/rft-qwen3-4b/versions/abcd1234
        trainerImageTag:
          type: string
          description: >-
            The validated trainer runtime image tag used for numerics
            verification.
        trainerMode:
          $ref: '#/components/schemas/TrainingShapeTrainerMode'
          description: Trainer mode used for the validated launch profile.
        nodeCount:
          type: integer
          format: int32
          description: Node count validated for the launch profile.
        trainerShardingScheme:
          $ref: '#/components/schemas/gatewayTrainerShardingScheme'
          description: >-
            Structured sharding/parallelism profile validated for the trainer
            launch.
        modelType:
          type: string
          description: The model type of the base model (e.g. "llama", "qwen").
          readOnly: true
        parameterCount:
          type: string
          format: int64
          description: >-
            The parameter count of the base model, used for billion-parameter
            bucket matching.
          readOnly: true
        acceleratorType:
          $ref: '#/components/schemas/gatewayAcceleratorType'
          description: The type of accelerator to use.
        acceleratorCount:
          type: integer
          format: int32
          description: Total number of accelerators used by the job.
        baseModelWeightPrecision:
          $ref: '#/components/schemas/gatewayWeightPrecision'
          description: >-
            Default precision for base weights during training (prefer BF16;
            QLoRA/QAT omitted).
        maxSupportedContextLength:
          type: integer
          format: int32
          description: Capacity limits validated for this shape.
      title: >-
        A training shape standardizes hardware/precision/parallelism defaults
        for training jobs

        of a given base model family and parameter bucket.
      required:
        - baseModel
        - trainerImageTag
    gatewayTrainingShapeVersion:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name, e.g.
            accounts/.../trainingShapes/.../versions/{version_id}
          readOnly: true
        createTime:
          type: string
          format: date-time
          title: Creation time (used for ordering desc by default)
          readOnly: true
        snapshot:
          $ref: '#/components/schemas/gatewayTrainingShape'
          description: Full snapshot of the TrainingShape at this version.
          readOnly: true
        validated:
          type: boolean
          description: |-
            Whether this version has been validated through capacity tests.
            Only superusers can set this flag.
        public:
          type: boolean
          description: If true, this version will be publicly readable.
        latestValidated:
          type: boolean
          description: >-
            If true, this version is the latest validated version (at most one
            per shape).
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: Last update time of mutable fields such as validated/public.
          readOnly: true
      title: >-
        A training shape version is a specific immutable snapshot of a training
        shape.

        Versions are immutable, created on updates, and deleted when the parent
        training shape is deleted.
    gatewayTransformed:
      type: object
      properties:
        sourceDatasetId:
          type: string
        filter:
          type: string
        originalFormat:
          $ref: '#/components/schemas/DatasetFormat'
      required:
        - sourceDatasetId
    gatewayUploadDatasetResponse:
      type: object
      properties:
        filenameToSignedUrls:
          type: object
          additionalProperties:
            type: string
          title: Signed URLs for uploading dataset files
    gatewayUser:
      type: object
      properties:
        name:
          type: string
          title: >-
            The resource name of the user. e.g.
            accounts/my-account/users/my-user
          readOnly: true
        displayName:
          type: string
          description: |-
            Human-readable display name of the user. e.g. "Alice"
            Must be fewer than 64 characters long.
        serviceAccount:
          type: boolean
          title: Whether this user is a service account (can only be set by admins)
        createTime:
          type: string
          format: date-time
          description: The creation time of the user.
          readOnly: true
        role:
          type: string
          description: >-
            The user's role: admin, user, contributor, inference-user, or
            custom.

            When set to "custom", the user's permissions are governed by
            permission_preset.
        email:
          type: string
          description: The user's email address.
        state:
          $ref: '#/components/schemas/gatewayUserState'
          description: The state of the user.
          readOnly: true
        status:
          $ref: '#/components/schemas/gatewayStatus'
          description: Contains information about the user status.
          readOnly: true
        updateTime:
          type: string
          format: date-time
          description: The update time for the user.
          readOnly: true
        permissionPreset:
          type: string
          description: >-
            The permission preset for this user. Only valid when role is
            "custom".
      required:
        - role
    gatewayUserState:
      type: string
      enum:
        - STATE_UNSPECIFIED
        - CREATING
        - READY
        - UPDATING
        - DELETING
      default: STATE_UNSPECIFIED
    gatewayUserUploaded:
      type: object
    gatewayValidateAssertionsResponse:
      type: object
      properties:
        status:
          type: string
        metricToErrors:
          type: object
          additionalProperties:
            $ref: >-
              #/components/schemas/ValidateAssertionsResponseValidateAssertionError
      title: >-
        Response for validating assertions. The metric_to_errors contains all
        metrics from request,

        and the error_messages contains all errors for the metric code. (could
        be empty which means no errors)
    gatewayValidateModelConfigRequest:
      type: object
      properties:
        configJson:
          type: string
          description: The config JSON of the model.
        tokenizerConfigJson:
          type: string
          description: The tokenizer config JSON of the model.
      required:
        - configJson
    gatewayValidateModelUploadResponse:
      type: object
      properties:
        warnings:
          type: array
          items:
            type: string
          title: Warnings generated during validation (e.g., unknown config fields)
    gatewayWandbConfig:
      type: object
      properties:
        enabled:
          type: boolean
          description: Whether to enable wandb logging.
        apiKey:
          type: string
          description: The API key for the wandb service.
        project:
          type: string
          description: The project name for the wandb service.
        entity:
          type: string
          description: The entity name for the wandb service.
        runId:
          type: string
          description: The run ID for the wandb service.
        url:
          type: string
          description: The URL for the wandb service.
          readOnly: true
      description: >-
        WandbConfig is the configuration for the Weights & Biases (wandb)
        logging which

        will be used by a training job.
    gatewayWeightPrecision:
      type: string
      enum:
        - WEIGHT_PRECISION_UNSPECIFIED
        - BFLOAT16
        - INT8
        - NF4
        - FP8
        - FP4_FP8
      default: WEIGHT_PRECISION_UNSPECIFIED
      description: |-
        The weight precision for model training/inference.

         - BFLOAT16: no quantization applied
         - INT8: enable 8-bit quantization with LLM.int8()
         - NF4: enable 4-bit quantization with LLM.nf4()
         - FP8: base model quantization in FP8
         - FP4_FP8: base model linear module quantization in FP4, mixed with experts and some special keys/layers in FP8.
    gatewayWeightedRandomStrategy:
      type: object
      description: Use replica count as weight.
    googlelongrunningOperation:
      type: object
      properties:
        name:
          type: string
          description: >-
            The server-assigned name, which is only unique within the same
            service that

            originally returns it. If you use the default HTTP mapping, the

            `name` should be a resource name ending with
            `operations/{unique_id}`.
        metadata:
          $ref: '#/components/schemas/protobufAny'
          description: >-
            Service-specific metadata associated with the operation. It
            typically

            contains progress information and common metadata such as create
            time.

            Some services might not provide such metadata. Any method that
            returns a

            long-running operation should document the metadata type, if any.
        done:
          type: boolean
          description: >-
            If the value is `false`, it means the operation is still in
            progress.

            If `true`, the operation is completed, and either `error` or
            `response` is

            available.
        error:
          $ref: '#/components/schemas/googlerpcStatus'
          description: >-
            The error result of the operation in case of failure or
            cancellation.
        response:
          $ref: '#/components/schemas/protobufAny'
          description: >-
            The normal, successful response of the operation. If the original

            method returns no data on success, such as `Delete`, the response is

            `google.protobuf.Empty`. If the original method is standard

            `Get`/`Create`/`Update`, the response should be the resource. For
            other

            methods, the response should have the type `XxxResponse`, where
            `Xxx`

            is the original method name. For example, if the original method
            name

            is `TakeSnapshot()`, the inferred response type is

            `TakeSnapshotResponse`.
      description: >-
        This resource represents a long-running operation that is the result of
        a

        network API call.
    googlerpcStatus:
      type: object
      properties:
        code:
          type: integer
          format: int32
          description: |-
            The status code, which should be an enum value of
            [google.rpc.Code][google.rpc.Code].
        message:
          type: string
          description: >-
            A developer-facing error message, which should be in English. Any

            user-facing error message should be localized and sent in the

            [google.rpc.Status.details][google.rpc.Status.details] field, or
            localized

            by the client.
        details:
          type: array
          items:
            type: object
            $ref: '#/components/schemas/protobufAny'
          description: >-
            A list of messages that carry the error details.  There is a common
            set of

            message types for APIs to use.
      description: >-
        The `Status` type defines a logical error model that is suitable for

        different programming environments, including REST APIs and RPC APIs. It
        is

        used by [gRPC](https://github.com/grpc). Each `Status` message contains

        three pieces of data: error code, error message, and error details.


        You can find out more about this error model and how to work with it in
        the

        [API Design Guide](https://cloud.google.com/apis/design/errors).
    protobufAny:
      type: object
      properties:
        '@type':
          type: string
          description: >-
            A URL/resource name that uniquely identifies the type of the
            serialized

            protocol buffer message. This string must contain at least

            one "/" character. The last segment of the URL's path must represent

            the fully qualified name of the type (as in

            `path/google.protobuf.Duration`). The name should be in a canonical
            form

            (e.g., leading "." is not accepted).


            In practice, teams usually precompile into the binary all types that
            they

            expect it to use in the context of Any. However, for URLs which use
            the

            scheme `http`, `https`, or no scheme, one can optionally set up a
            type

            server that maps type URLs to message definitions as follows:


            * If no scheme is provided, `https` is assumed.

            * An HTTP GET on the URL must yield a [google.protobuf.Type][]
              value in binary format, or produce an error.
            * Applications are allowed to cache lookup results based on the
              URL, or have them precompiled into a binary to avoid any
              lookup. Therefore, binary compatibility needs to be preserved
              on changes to types. (Use versioned type names to manage
              breaking changes.)

            Note: this functionality is not currently available in the official

            protobuf release, and it is not used for type URLs beginning with

            type.googleapis.com.


            Schemes other than `http`, `https` (or the empty scheme) might be

            used with implementation specific semantics.
      additionalProperties: {}
      description: >-
        `Any` contains an arbitrary serialized protocol buffer message along
        with a

        URL that describes the type of the serialized message.


        Protobuf library provides support to pack/unpack Any values in the form

        of utility functions or additional generated methods of the Any type.


        Example 1: Pack and unpack a message in C++.

            Foo foo = ...;
            Any any;
            any.PackFrom(foo);
            ...
            if (any.UnpackTo(&foo)) {
              ...
            }

        Example 2: Pack and unpack a message in Java.

            Foo foo = ...;
            Any any = Any.pack(foo);
            ...
            if (any.is(Foo.class)) {
              foo = any.unpack(Foo.class);
            }

        Example 3: Pack and unpack a message in Python.

            foo = Foo(...)
            any = Any()
            any.Pack(foo)
            ...
            if any.Is(Foo.DESCRIPTOR):
              any.Unpack(foo)
              ...

        Example 4: Pack and unpack a message in Go

             foo := &pb.Foo{...}
             any, err := anypb.New(foo)
             if err != nil {
               ...
             }
             ...
             foo := &pb.Foo{}
             if err := any.UnmarshalTo(foo); err != nil {
               ...
             }

        The pack methods provided by protobuf library will by default use

        'type.googleapis.com/full.type.name' as the type URL and the unpack

        methods only use the fully qualified type name after the last '/'

        in the type URL, for example "foo.bar.com/x/y.z" will yield type

        name "y.z".


        JSON


        The JSON representation of an `Any` value uses the regular

        representation of the deserialized, embedded message, with an

        additional field `@type` which contains the type URL. Example:

            package google.profile;
            message Person {
              string first_name = 1;
              string last_name = 2;
            }

            {
              "@type": "type.googleapis.com/google.profile.Person",
              "firstName": <string>,
              "lastName": <string>
            }

        If the embedded message type is well-known and has a custom JSON

        representation, that representation will be embedded adding a field

        `value` which holds the custom JSON in addition to the `@type`

        field. Example (for message [google.protobuf.Duration][]):

            {
              "@type": "type.googleapis.com/google.protobuf.Duration",
              "value": "1.212s"
            }
    protobufNullValue:
      type: string
      enum:
        - NULL_VALUE
      default: NULL_VALUE
      description: >-
        `NullValue` is a singleton enumeration to represent the null value for
        the

        `Value` type union.

         The JSON representation for `NullValue` is JSON `null`.

         - NULL_VALUE: Null value.
    typeDate:
      type: object
      properties:
        year:
          type: integer
          format: int32
          description: >-
            Year of the date. Must be from 1 to 9999, or 0 to specify a date
            without

            a year.
        month:
          type: integer
          format: int32
          description: >-
            Month of a year. Must be from 1 to 12, or 0 to specify a year
            without a

            month and day.
        day:
          type: integer
          format: int32
          description: >-
            Day of a month. Must be from 1 to 31 and valid for the year and
            month, or 0

            to specify a year by itself or a year and month where the day isn't

            significant.
      description: >-
        * A full date, with non-zero year, month, and day values

        * A month and day value, with a zero year, such as an anniversary

        * A year on its own, with zero month and day values

        * A year and month value, with a zero day, such as a credit card
        expiration

        date


        Related types are [google.type.TimeOfDay][google.type.TimeOfDay] and

        `google.protobuf.Timestamp`.
      title: >-
        Represents a whole or partial calendar date, such as a birthday. The
        time of

        day and time zone are either specified elsewhere or are insignificant.
        The

        date is relative to the Gregorian Calendar. This can represent one of
        the

        following:
    typeMoney:
      type: object
      properties:
        currencyCode:
          type: string
          description: The three-letter currency code defined in ISO 4217.
        units:
          type: string
          format: int64
          description: >-
            The whole units of the amount.

            For example if `currencyCode` is `"USD"`, then 1 unit is one US
            dollar.
        nanos:
          type: integer
          format: int32
          description: >-
            Number of nano (10^-9) units of the amount.

            The value must be between -999,999,999 and +999,999,999 inclusive.

            If `units` is positive, `nanos` must be positive or zero.

            If `units` is zero, `nanos` can be positive, zero, or negative.

            If `units` is negative, `nanos` must be negative or zero.

            For example $-1.75 is represented as `units`=-1 and
            `nanos`=-750,000,000.
      description: Represents an amount of money with its currency type.
    FileUploadResponse:
      type: object
      properties:
        id:
          type: string
          description: The dataset id.
        object:
          type: string
          description: The object type, which is always file.
        bytes:
          type: integer
          format: int64
          description: The size of the file, in bytes.
        created_at:
          type: integer
          format: int64
          description: The Unix timestamp (in seconds) for when the file was created.
        filename:
          type: string
          description: The name of the file.
        purpose:
          type: string
          description: The intended purpose of the file.
    CreateResponse:
      properties:
        model:
          type: string
          title: Model
          description: >-
            The model to use for generating the response. Example:
            `accounts/<ACCOUNT_ID>/models/<MODEL_ID>`.
        input:
          anyOf:
            - type: string
            - items:
                additionalProperties: true
                type: object
              type: array
          title: Input
          description: >-
            The input to the model. Can be a simple text string or a list of
            message objects for complex inputs with multiple content types.
        previous_response_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Previous Response Id
          description: >-
            The ID of a previous response to continue the conversation from.
            When provided, the conversation history from that response will be
            automatically loaded.
        instructions:
          anyOf:
            - type: string
            - type: 'null'
          title: Instructions
          description: >-
            System instructions that guide the model's behavior throughout the
            conversation. Similar to a system message.
        max_output_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max Output Tokens
          description: >-
            The maximum number of tokens that can be generated in the response.
            Must be at least 1. If not specified, the model will generate up to
            its maximum context length.
        max_tool_calls:
          anyOf:
            - type: integer
              minimum: 1
            - type: 'null'
          title: Max Tool Calls
          description: >-
            The maximum number of tool calls allowed in a single response.
            Useful for controlling costs and limiting tool execution. Must be at
            least 1.
        metadata:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Metadata
          description: >-
            Set of up to 16 key-value pairs that can be attached to the
            response. Useful for storing additional information in a structured
            format.
        parallel_tool_calls:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Parallel Tool Calls
          description: >-
            Whether to enable parallel function calling during tool use. When
            true, the model can call multiple tools simultaneously. Default is
            True.
          default: true
        reasoning:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Reasoning
          description: >-
            Configuration for reasoning output. When enabled, the model will
            return its reasoning process along with the response.
        store:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Store
          description: >-
            Whether to store the response. When set to false, the response will
            not be stored and will not be retrievable via the API. This is
            useful for ephemeral or sensitive data. See an example in our
            [Controlling Response Storage
            cookbook](https://github.com/fw-ai/cookbook/blob/main/learn/response-api/mcp_server_with_store_false_argument.ipynb).
            Default is True.
          default: true
        stream:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Stream
          description: >-
            Whether to stream the response back as Server-Sent Events (SSE).
            When true, tokens are sent incrementally as they are generated.
            Default is False.
          default: false
        temperature:
          anyOf:
            - type: number
              maximum: 2
              minimum: 0
            - type: 'null'
          title: Temperature
          description: >-
            The sampling temperature to use, between 0 and 2. Higher values like
            0.8 make output more random, while lower values like 0.2 make it
            more focused and deterministic. Default is 1.0.
          default: 1
        text:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Text
          description: >-
            Text generation configuration parameters. Used for advanced text
            generation settings.
        tool_choice:
          anyOf:
            - type: string
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Tool Choice
          description: >-
            Controls which (if any) tool the model should use. Can be 'none'
            (never call tools), 'auto' (model decides), 'required' (must call at
            least one tool), or an object specifying a particular tool to call.
            Default is 'auto'.
          default: auto
        tools:
          anyOf:
            - items:
                additionalProperties: true
                type: object
              type: array
            - type: 'null'
          title: Tools
          description: >-
            A list of MCP tools the model may call. See our cookbooks for
            examples on [basic MCP
            usage](https://github.com/fw-ai/cookbook/blob/main/learn/response-api/fireworks_mcp_examples.ipynb)
            and [streaming with
            MCP](https://github.com/fw-ai/cookbook/blob/main/learn/response-api/fireworks_mcp_with_streaming.ipynb).
        top_p:
          anyOf:
            - type: number
              maximum: 1
              minimum: 0
            - type: 'null'
          title: Top P
          description: >-
            An alternative to temperature sampling, called nucleus sampling,
            where the model considers the results of tokens with top_p
            probability mass. So 0.1 means only tokens comprising the top 10%
            probability mass are considered. Default is 1.0. We generally
            recommend altering this or temperature but not both.
          default: 1
        truncation:
          anyOf:
            - type: string
            - type: 'null'
          title: Truncation
          description: >-
            The truncation strategy to use for the context when it exceeds the
            model's maximum length. Can be 'auto' (automatically truncate) or
            'disabled' (return error if context too long). Default is
            'disabled'.
          default: disabled
        user:
          anyOf:
            - type: string
            - type: 'null'
          title: User
          description: >-
            A unique identifier representing your end-user, which can help
            Fireworks to monitor and detect abuse. This can be a username,
            email, or any other unique identifier.
      type: object
      required:
        - model
        - input
      title: CreateResponse
      description: >-
        Request model for creating a new response.


        This model defines all the parameters needed to create a new model
        response,

        including model configuration, input data, tool definitions, and
        conversation continuation.
    DeleteResponse:
      properties:
        message:
          type: string
          title: Message
          description: Confirmation message
          example: Response deleted successfully
      type: object
      required:
        - message
      title: DeleteResponse
      description: Response model for deleting a response.
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    Message:
      properties:
        id:
          type: string
          title: Id
          description: The unique identifier of the message.
        type:
          type: string
          title: Type
          description: The object type, always 'message'.
          default: message
        role:
          type: string
          title: Role
          description: >-
            The role of the message sender. Can be 'user', 'assistant', or
            'system'.
        content:
          items:
            $ref: '#/components/schemas/MessageContent'
          type: array
          title: Content
          description: >-
            An array of content parts that make up the message. Each part has a
            type and associated data.
        status:
          type: string
          title: Status
          description: The status of the message. Can be 'in_progress' or 'completed'.
      type: object
      required:
        - id
        - role
        - content
        - status
      title: Message
      description: Represents a message in a conversation.
    MessageContent:
      properties:
        type:
          type: string
          title: Type
          description: >-
            The type of the content part. Can be 'input_text', 'output_text',
            'image', etc.
        text:
          anyOf:
            - type: string
            - type: 'null'
          title: Text
          description: The text content, if applicable.
      type: object
      required:
        - type
      title: MessageContent
      description: Represents a piece of content within a message.
    Response:
      properties:
        id:
          anyOf:
            - type: string
            - type: 'null'
          title: Id
          description: The unique identifier of the response. Will be None if store=False.
        object:
          type: string
          title: Object
          description: The object type, which is always 'response'.
          default: response
        created_at:
          type: integer
          title: Created At
          description: The Unix timestamp (in seconds) when the response was created.
        status:
          type: string
          title: Status
          description: >-
            The status of the response. Can be 'completed', 'in_progress',
            'incomplete', 'failed', or 'cancelled'.
        model:
          type: string
          title: Model
          description: >-
            The model used to generate the response (e.g.,
            `accounts/<ACCOUNT_ID>/models/<MODEL_ID>`).
        output:
          items:
            anyOf:
              - $ref: '#/components/schemas/Message'
              - $ref: '#/components/schemas/ToolCall'
              - $ref: '#/components/schemas/ToolOutput'
          type: array
          title: Output
          description: >-
            An array of output items produced by the model. Can contain
            messages, tool calls, and tool outputs.
        previous_response_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Previous Response Id
          description: >-
            The ID of the previous response in the conversation, if this
            response continues a conversation.
        usage:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Usage
          description: >-
            Token usage information for the request. Contains 'prompt_tokens',
            'completion_tokens', and 'total_tokens'.
        error:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Error
          description: >-
            Error information if the response failed. Contains 'type', 'code',
            and 'message' fields.
        incomplete_details:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Incomplete Details
          description: >-
            Details about why the response is incomplete, if status is
            'incomplete'. Contains 'reason' field which can be
            'max_output_tokens', 'max_tool_calls', or 'content_filter'.
        instructions:
          anyOf:
            - type: string
            - type: 'null'
          title: Instructions
          description: >-
            System instructions that guide the model's behavior. Similar to a
            system message.
        max_output_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max Output Tokens
          description: >-
            The maximum number of tokens that can be generated in the response.
            Must be at least 1.
        max_tool_calls:
          anyOf:
            - type: integer
              minimum: 1
            - type: 'null'
          title: Max Tool Calls
          description: >-
            The maximum number of tool calls allowed in a single response. Must
            be at least 1.
        parallel_tool_calls:
          type: boolean
          title: Parallel Tool Calls
          description: >-
            Whether to enable parallel function calling during tool use. Default
            is True.
          default: true
        reasoning:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Reasoning
          description: >-
            Reasoning output from the model, if reasoning is enabled. Contains
            'content' and 'type' fields.
        store:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Store
          description: >-
            Whether to store this response for future retrieval. If False, the
            response will not be persisted and previous_response_id cannot
            reference it. Default is True.
          default: true
        temperature:
          type: number
          maximum: 2
          minimum: 0
          title: Temperature
          description: >-
            The sampling temperature to use, between 0 and 2. Higher values like
            0.8 make output more random, while lower values like 0.2 make it
            more focused and deterministic. Default is 1.0.
          default: 1
        text:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Text
          description: Text generation configuration parameters, if applicable.
        tool_choice:
          anyOf:
            - type: string
            - additionalProperties: true
              type: object
          title: Tool Choice
          description: >-
            Controls which (if any) tool the model should use. Can be 'none',
            'auto', 'required', or an object specifying a particular tool.
            Default is 'auto'.
          default: auto
        tools:
          items:
            additionalProperties: true
            type: object
          type: array
          title: Tools
          description: >-
            A list of tools the model may call. Each tool is defined with a type
            and function specification following the OpenAI tool format.
            Supports 'function', 'mcp', 'sse', and 'python' tool types.
        top_p:
          type: number
          maximum: 1
          minimum: 0
          title: Top P
          description: >-
            An alternative to temperature sampling, called nucleus sampling,
            where the model considers the results of tokens with top_p
            probability mass. So 0.1 means only tokens comprising the top 10%
            probability mass are considered. Default is 1.0.
          default: 1
        truncation:
          type: string
          title: Truncation
          description: >-
            The truncation strategy to use for the context. Can be 'auto' or
            'disabled'. Default is 'disabled'.
          default: disabled
        user:
          anyOf:
            - type: string
            - type: 'null'
          title: User
          description: >-
            A unique identifier representing your end-user, which can help
            Fireworks to monitor and detect abuse.
        metadata:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Metadata
          description: >-
            Set of up to 16 key-value pairs that can be attached to the
            response. Useful for storing additional information about the
            response in a structured format.
      type: object
      required:
        - created_at
        - status
        - model
        - output
      title: Response
      description: >-
        Represents a response object returned from the API.


        A response includes the model output, token usage, configuration
        parameters,

        and metadata about the conversation state.
    ResponseList:
      properties:
        object:
          type: string
          title: Object
          description: The object type, which is always 'list'.
          default: list
        data:
          items:
            $ref: '#/components/schemas/Response'
          type: array
          title: Data
          description: >-
            An array of response objects, sorted by creation time in descending
            order (most recent first).
        has_more:
          type: boolean
          title: Has More
          description: >-
            Indicates whether there are more responses available beyond this
            page. If true, use the 'last_id' value as the 'after' cursor to
            fetch the next page.
        first_id:
          anyOf:
            - type: string
            - type: 'null'
          title: First Id
          description: >-
            The ID of the first response in the current page. Used for
            pagination.
        last_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Last Id
          description: >-
            The ID of the last response in the current page. Use this as the
            'after' cursor to fetch the next page if has_more is true.
      type: object
      required:
        - data
        - has_more
      title: ResponseList
      description: |-
        Response model for listing responses.

        Returned from the GET /v1/responses endpoint. Provides a paginated list
        of response objects with cursor-based pagination support.
    ToolCall:
      properties:
        id:
          type: string
          title: Id
          description: The unique identifier of the tool call.
        type:
          type: string
          title: Type
          description: The type of tool call. Can be 'function_call' or 'mcp_call'.
        call_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Call Id
          description: >-
            The call ID for function calls, used to match with
            function_call_output.
        name:
          anyOf:
            - type: string
            - type: 'null'
          title: Name
          description: The name of the function to call (for function_call type).
        arguments:
          anyOf:
            - type: string
            - type: 'null'
          title: Arguments
          description: >-
            The arguments for the function call as a JSON string (for
            function_call type).
        status:
          anyOf:
            - type: string
            - type: 'null'
          title: Status
          description: >-
            The status of the tool call. Can be 'in_progress', 'completed', or
            'incomplete'.
        function:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Function
          description: >-
            The function definition for function tool calls. Contains 'name' and
            'arguments' keys. Deprecated for function_call type.
        mcp:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Mcp
          description: >-
            The MCP (Model Context Protocol) tool call definition for MCP tool
            calls.
      type: object
      required:
        - id
        - type
      title: ToolCall
      description: Represents a tool call made by the model.
    ToolOutput:
      properties:
        type:
          type: string
          title: Type
          description: The object type, always 'tool_output'.
          default: tool_output
        tool_call_id:
          type: string
          title: Tool Call Id
          description: The ID of the tool call that this output corresponds to.
        output:
          type: string
          title: Output
          description: The output content from the tool execution.
      type: object
      required:
        - tool_call_id
        - output
      title: ToolOutput
      description: Represents the output/result of a tool call.
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
        input:
          title: Input
        ctx:
          type: object
          title: Context
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
    ChatCompletionFunction:
      properties:
        name:
          type: string
          title: Name
          description: >-
            The name of the function to be called. Must be a-z, A-Z, 0-9, or
            contain underscores and dashes, with a maximum length of 64.
        description:
          anyOf:
            - type: string
            - type: 'null'
          title: Description
          description: >-
            A description of what the function does, used by the model to choose
            when and how to call the function.
        parameters:
          additionalProperties: true
          type: object
          title: Parameters
          description: >-
            The parameters the function accepts, described as a JSON Schema
            object.


            The JSON Schema object should have the following structure:


            ```json

            {
              "type": "object",
              "required": ["param1", "param2"],
              "properties": {
                "param1": {
                  "type": "string",
                  "description": "..."
                },
                "param2": {
                  "type": "number",
                  "description": "..."
                }
              }
            }

            ```


            - The `type` field must be `"object"`.

            - The `required` field is an array of strings indicating which
            parameters are required.

            - The `properties` field is a map of property names to their
            definitions, where each property is an object with `type` (string)
            and `description` (string) fields.


            To describe a function that accepts no parameters, provide the
            value:


            ```json

            {"type": "object", "properties": {}}

            ```
        strict:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Strict
      additionalProperties: false
      type: object
      required:
        - name
      title: ChatCompletionFunction
    ChatCompletionMessageToolCall:
      properties:
        id:
          anyOf:
            - type: string
            - type: 'null'
          title: Id
          description: The ID of the tool call.
        type:
          type: string
          title: Type
          description: The type of the tool. Currently, only `function` is supported.
          default: function
        function:
          anyOf:
            - $ref: '#/components/schemas/ChatCompletionMessageToolCallFunction'
            - type: string
          title: Function
          description: The function that the model called.
      additionalProperties: false
      type: object
      required:
        - function
      title: ChatCompletionMessageToolCall
    ChatCompletionMessageToolCallFunction:
      properties:
        name:
          anyOf:
            - type: string
            - type: 'null'
          title: Name
          description: The name of the function to call.
        arguments:
          anyOf:
            - type: string
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Arguments
          description: >-
            The arguments to call the function with, as generated by the model
            in JSON format. Note that the model does not always generate valid
            JSON, and may hallucinate parameters not defined by your function
            schema. Validate the arguments in your code before calling your
            function.
      additionalProperties: false
      type: object
      title: ChatCompletionMessageToolCallFunction
    ChatCompletionRequest:
      properties:
        model:
          type: string
          title: Model
          description: |-
            The name of the model to use.

            Example: `"accounts/fireworks/models/kimi-k2-instruct-0905"`
        messages:
          items:
            $ref: '#/components/schemas/ChatMessage'
          type: array
          title: Messages
          description: A list of messages comprising the conversation so far.
        tools:
          items:
            $ref: '#/components/schemas/ChatCompletionTool'
          type: array
          title: Tools
          description: >-
            A list of tools the model may call. Currently, only functions are
            supported as a tool.


            Use this to provide a list of functions the model may generate JSON
            inputs for.


            See the our [model
            library](https://app.fireworks.ai/models/?filter=LLM&functionCalling=true)
            for the list of supported models
        tool_choice:
          anyOf:
            - type: string
              enum:
                - auto
                - none
                - any
                - required
            - $ref: '#/components/schemas/FunctionSelection'
          title: Tool Choice
          description: >-
            Controls which (if any) tool is called by the model.


            - `none`: the model will not call any tool and instead generates a
            message.

            - `auto`: the model can pick between generating a message or calling
            one or more tools.

            - `required` (alias: `any`): the model must call one or more tools.
              To force a specific function, pass an object of the form `{ "type": "function", "name": "my_function" }` or `{ "type": "function", "function": { "name": "my_function" } }` for OpenAI compatibility.
          default: auto
        stream:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Stream
          description: >-
            Whether to stream back partial progress. If set, tokens will be sent
            as data-only [server-sent
            events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
            as they become available, with the stream terminated by a `data:
            [DONE]` message.
          default: false
        response_format:
          anyOf:
            - $ref: '#/components/schemas/ResponseFormat'
            - type: 'null'
          description: >-
            Allows to force the model to produce specific output format.


            Setting to `{ "type": "json_object" }` enables JSON mode, which
            guarantees the message the model generates is valid JSON.


            If `"type"` is `"json_schema"`, a JSON schema must be provided.
            E.g., `response_format = {"type": "json_schema", "json_schema":
            <json_schema>}`.


            Important: when using JSON mode, it's crucial to also instruct the
            model to produce JSON via a system or user message. Without this,
            the model may generate an unending stream of whitespace until the
            generation reaches the token limit, resulting in a long-running and
            seemingly "stuck" request.


            Also note that the message content may be partially cut off if
            `finish_reason="length"`, which indicates the generation exceeded
            `max_tokens` or the conversation exceeded the max context length. In
            this case the return value might not be a valid JSON.
        temperature:
          anyOf:
            - type: number
            - type: 'null'
          title: Temperature
          description: >-
            What sampling temperature to use, between 0 and 2. Higher values
            like 0.8 will make the output more random, while lower values like
            0.2 will make it more focused and deterministic.


            We generally recommend altering this or top_p but not both.


            Required range: `0 <= x <= 2`


            Example: `1`
        top_k:
          anyOf:
            - type: integer
            - type: 'null'
          title: Top K
          description: >-
            Top-k sampling is another sampling method where the k most probable
            next tokens are filtered and the probability mass is redistributed
            among only those k next tokens. The value of k controls the number
            of candidates for the next token at each step during text
            generation. Must be between 0 and 100.


            Required range: `0 <= x <= 100`


            Example: `50`
        user:
          anyOf:
            - type: string
            - type: 'null'
          title: User
          description: >-
            A unique identifier representing your end-user, which can help
            monitor and detect abuse.
        prompt_cache_key:
          anyOf:
            - type: string
            - type: 'null'
          title: Prompt Cache Key
          description: >-
            A key used for prompt caching session affinity. Requests with the
            same prompt_cache_key are routed to the same backend to maximize KV
            cache hit rates. This is the preferred field for session affinity
            (takes priority over the 'user' field).
        prompt_cache_isolation_key:
          anyOf:
            - type: string
            - type: 'null'
          title: Prompt Cache Isolation Key
          description: Isolation key for prompt caching to separate cache entries.
        raw_output:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Raw Output
          description: Return raw output from the model.
          default: false
        perf_metrics_in_response:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Perf Metrics In Response
          description: >-
            Whether to include performance metrics in the response body.


            **Non-streaming requests:** Performance metrics are always included
            in response headers (e.g., `fireworks-prompt-tokens`,
            `fireworks-server-time-to-first-token`). Setting this to `true`
            additionally includes the same metrics in the response body under
            the `perf_metrics` field.


            **Streaming requests:** Performance metrics are only included in the
            response body under the `perf_metrics` field in the final chunk
            (when `finish_reason` is set). This is because headers may not be
            accessible during streaming.


            The response body `perf_metrics` field contains the following
            metrics:


            **Basic Metrics (all deployments):**


            - `prompt-tokens`: Number of tokens in the prompt

            - `cached-prompt-tokens`: Number of cached prompt tokens

            - `server-time-to-first-token`: Time from request start to first
            token (in seconds)

            - `server-processing-time`: Total processing time (in seconds, only
            for completed requests)


            **Predicted Outputs Metrics:**


            - `speculation-prompt-tokens`: Number of speculative prompt tokens

            - `speculation-prompt-matched-tokens`: Number of matched speculative
            prompt tokens (for completed requests)


            **Dedicated Deployment Only Metrics:**


            - `speculation-generated-tokens`: Number of speculative generated
            tokens (for completed requests)

            - `speculation-acceptance`: Speculation acceptance rates by position

            - `backend-host`: Hostname of the backend server

            - `num-concurrent-requests`: Number of concurrent requests

            - `deployment`: Deployment name

            - `tokenizer-queue-duration`: Time spent in tokenizer queue

            - `tokenizer-duration`: Time spent in tokenizer

            - `prefill-queue-duration`: Time spent in prefill queue

            - `prefill-duration`: Time spent in prefill

            - `generation-queue-duration`: Time spent in generation queue

            - `generation-duration`: Time spent in generation
          default: false
        'n':
          type: integer
          title: 'N'
          description: >-
            How many completions to generate for each prompt.


            **Note:** Because this parameter generates many completions, it can
            quickly consume your token quota. Use carefully and ensure that you
            have reasonable settings for `max_tokens` and `stop`.


            Required range: `1 <= x <= 128`


            Example: `1`
          default: 1
        service_tier:
          type: string
          enum:
            - auto
            - default
            - flex
            - priority
          title: Service Tier
          description: >-
            The service tier to use for the request. Specifies the processing
            type used for serving the request. Only "priority" is supported,
            while all other values will be treated as "default" tier.
          default: default
        stop:
          anyOf:
            - type: string
            - items:
                type: string
              type: array
            - type: 'null'
          title: Stop
          description: >-
            Up to 4 sequences where the API will stop generating further tokens.
            The returned text will NOT contain the stop sequence.
        max_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max Tokens
          description: >-
            The maximum number of tokens to generate in the completion. If the
            token count of your prompt plus max_tokens exceeds the model's
            context length, the behavior depends on
            context_length_exceeded_behavior. By default, max_tokens will be
            lowered to fit in the context window instead of returning an error.
        max_completion_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max Completion Tokens
          description: Alias for max_tokens. Cannot be specified together with max_tokens.
        top_p:
          anyOf:
            - type: number
            - type: 'null'
          title: Top P
          description: >-
            An alternative to sampling with temperature, called nucleus
            sampling, where the model considers the results of the tokens with
            top_p probability mass. So 0.1 means only the tokens comprising the
            top 10% probability mass are considered.


            We generally recommend altering this or temperature but not both.


            Required range: `0 <= x <= 1`


            Example: `1`
        min_p:
          anyOf:
            - type: number
            - type: 'null'
          title: Min P
          description: >-
            Minimum probability threshold for token selection. Only tokens with
            probability >= min_p are considered for selection. This is an
            alternative to `top_p` and `top_k` sampling.


            Required range: `0 <= x <= 1`
        typical_p:
          anyOf:
            - type: number
            - type: 'null'
          title: Typical P
          description: >-
            Typical-p sampling is an alternative to nucleus sampling. It
            considers the most typical tokens whose cumulative probability is at
            most typical_p.


            Required range: `0 <= x <= 1`
        frequency_penalty:
          anyOf:
            - type: number
            - type: 'null'
          title: Frequency Penalty
          description: >-
            Number between -2.0 and 2.0. Positive values penalize new tokens
            based on their existing frequency in the text so far, decreasing the
            model's likelihood to repeat the same line verbatim.


            Reasonable value is around 0.1 to 1 if the aim is to just reduce
            repetitive samples somewhat. If the aim is to strongly suppress
            repetition, then one can increase the coefficients up to 2, but this
            can noticeably degrade the quality of samples. Negative values can
            be used to increase the likelihood of repetition.


            See also `presence_penalty` for penalizing tokens that have at least
            one appearance at a fixed rate.


            OpenAI compatible (follows OpenAI's conventions for handling token
            frequency and repetition penalties).


            Required range: `-2 <= x <= 2`
        presence_penalty:
          anyOf:
            - type: number
            - type: 'null'
          title: Presence Penalty
          description: >-
            Number between -2.0 and 2.0. Positive values penalize new tokens
            based on whether they appear in the text so far, increasing the
            model's likelihood to talk about new topics.


            Reasonable value is around 0.1 to 1 if the aim is to just reduce
            repetitive samples somewhat. If the aim is to strongly suppress
            repetition, then one can increase the coefficients up to 2, but this
            can noticeably degrade the quality of samples. Negative values can
            be used to increase the likelihood of repetition.


            See also `frequency_penalty` for penalizing tokens at an increasing
            rate depending on how often they appear.


            OpenAI compatible (follows OpenAI's conventions for handling token
            frequency and repetition penalties).


            Required range: `-2 <= x <= 2`
        repetition_penalty:
          anyOf:
            - type: number
            - type: 'null'
          title: Repetition Penalty
          description: >-
            Applies a penalty to repeated tokens to discourage or encourage
            repetition. A value of `1.0` means no penalty, allowing free
            repetition. Values above `1.0` penalize repetition, reducing the
            likelihood of repeating tokens. Values between `0.0` and `1.0`
            reward repetition, increasing the chance of repeated tokens. For a
            good balance, a value of `1.2` is often recommended. Note that the
            penalty is applied to both the generated output and the prompt in
            decoder-only models.


            Required range: `0 <= x <= 2`
        mirostat_target:
          anyOf:
            - type: number
            - type: 'null'
          title: Mirostat Target
          description: >-
            Defines the target perplexity for the Mirostat algorithm. Perplexity
            measures the unpredictability of the generated text, with higher
            values encouraging more diverse and creative outputs, while lower
            values prioritize predictability and coherence. The algorithm
            dynamically adjusts the token selection to maintain this target
            during text generation.


            If not specified, Mirostat sampling is disabled.
        mirostat_lr:
          anyOf:
            - type: number
            - type: 'null'
          title: Mirostat Lr
          description: >-
            Specifies the learning rate for the Mirostat sampling algorithm,
            which controls how quickly the model adjusts its token distribution
            to maintain the target perplexity. A smaller value slows down the
            adjustments, leading to more stable but gradual shifts, while higher
            values speed up corrections at the cost of potential instability.
        seed:
          anyOf:
            - type: integer
            - type: 'null'
          title: Seed
          description: Random seed for deterministic sampling.
        logprobs:
          anyOf:
            - type: integer
            - type: boolean
            - type: 'null'
          title: Logprobs
          description: >-
            Include log probabilities in the response. This accepts either a
            boolean or an integer:


            If set to `true`, log probabilities are included and the number of
            alternatives can be controlled via `top_logprobs` (OpenAI-compatible
            behavior).


            If set to an integer N (0-5), include log probabilities for up to N
            most likely tokens per position in the legacy format.


            The API will always return the logprob of the sampled token, so
            there may be up to `logprobs+1` elements in the response when an
            integer is used. The maximum value for the integer form is 5.
        top_logprobs:
          anyOf:
            - type: integer
            - type: 'null'
          title: Top Logprobs
          description: >-
            An integer between 0 and 5 specifying the number of most likely
            tokens to return at each token position, each with an associated log
            probability. The minimum value is 0 and the maximum value is 5.


            When `logprobs` is set, `top_logprobs` can be used to modify how
            many top log probabilities are returned. If `top_logprobs` is not
            set, the API will return up to `logprobs` tokens per position.


            Required range: `0 <= x <= 5`
        echo:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Echo
          description: Echo back the prompt in addition to the completion.
          default: false
        echo_last:
          anyOf:
            - type: integer
            - type: 'null'
          title: Echo Last
          description: >-
            Echo back the last N tokens of the prompt in addition to the
            completion. This is useful for obtaining logprobs of the prompt
            suffix but without transferring too much data. Passing
            `echo_last=len(prompt)` is the same as `echo=True`
        ignore_eos:
          type: boolean
          title: Ignore Eos
          description: >-
            This setting controls whether the model should ignore the End of
            Sequence (EOS) token. When set to `True`, the model will continue
            generating tokens even after the EOS token is produced. By default,
            it stops when the EOS token is reached.
          default: false
        context_length_exceeded_behavior:
          type: string
          enum:
            - error
            - truncate
          title: Context Length Exceeded Behavior
          description: >-
            What to do if the token count of prompt plus `max_tokens` exceeds
            the model's context window.


            Passing `truncate` limits the `max_tokens` to at most
            `context_window_length - prompt_length`. This is the default.


            Passing `error` would trigger a request error.


            The default of `'truncate'` is selected as it allows to ask for high
            `max_tokens` value while respecting the context window length
            without having to do client-side prompt tokenization.


            Note, that it differs from OpenAI's behavior that matches that of
            `error`.
          default: truncate
        logit_bias:
          anyOf:
            - additionalProperties:
                type: number
              type: object
            - type: 'null'
          title: Logit Bias
          description: >-
            Modify the likelihood of specified tokens appearing in the
            completion. Accepts a json object that maps tokens (specified by
            their token ID in the tokenizer) to an associated bias value from
            -100 to 100. Mathematically, the bias is added to the logits
            generated by the model prior to sampling.
        speculation:
          anyOf:
            - type: string
            - items:
                type: integer
              type: array
            - type: 'null'
          title: Speculation
          description: Speculative decoding prompt or token IDs to speed up generation.
        prediction:
          anyOf:
            - $ref: '#/components/schemas/PredictedOutput'
            - type: string
            - type: 'null'
          title: Prediction
          description: >-
            OpenAI-compatible predicted output for speculative decoding. Can be
            a PredictedOutput object or a simple string. Automatically
            transformed to speculation.
        metadata:
          anyOf:
            - additionalProperties:
                type: string
              type: object
            - type: 'null'
          title: Metadata
          description: >-
            Additional metadata to store with the request for
            tracing/distillation.
        reasoning_effort:
          anyOf:
            - type: string
              enum:
                - low
                - medium
                - high
                - max
                - none
            - type: integer
            - type: boolean
            - type: 'null'
          title: Reasoning Effort
          description: >-
            Controls reasoning behavior for supported models. When enabled, the
            model's reasoning appears in the `reasoning_content` field of the
            response, separate from the final answer in `content`.


            **Accepted values:**


            - **String** (OpenAI-compatible): `'low'`, `'medium'`, `'high'`, or
            `'max'` to enable reasoning with varying effort levels; `'none'` to
            disable reasoning.

            - **Boolean** (Fireworks extension): `true` to enable reasoning,
            `false` to disable it.

            - **Integer** (Fireworks extension): A positive integer to set a
            hard token limit on reasoning output (only effective for
            grammar-based reasoning models).


            **Important:** Boolean values are normalized internally: `true`
            becomes `'medium'`, and `false` becomes `'none'`. This normalization
            happens before model-specific validation, so if a model doesn't
            support `'none'`, passing `false` will produce an error referencing
            `'none'`.


            **Model-specific behavior:**


            - **Qwen3 (e.g., Qwen3-8B)**: Grammar-based reasoning. Default
            reasoning on. Use `'none'` or `false` to disable. Supports integer
            token limits to cap reasoning output. `'low'` maps to a default
            token limit (~3000 tokens).

            - **MiniMax M2**: Reasoning is required (always on). Defaults to
            `'medium'` when omitted. Accepts only string `reasoning_effort`:
            `'low'`, `'medium'`, or `'high'`. `'none'` and boolean values are
            rejected.

            - **DeepSeek V3.1**: Binary on/off reasoning. Default reasoning off
            (matches chat template). Use `true`, `'low'`, `'medium'`, or
            `'high'` to enable; `'none'` or `false` to disable.

            - **DeepSeek V3.2**: Binary on/off reasoning. Default reasoning on.
            Use `'none'` or `false` to disable; effort levels and integers have
            no additional effect.

            - **DeepSeek V4**: Accepts `'none'`, `'low'`, `'medium'`, `'high'`,
            and `'max'`. Default reasoning on (`'high'`). `'max'` prepends a
            thorough-reasoning preamble; `'high'` enables thinking. `'low'` and
            `'medium'` are silently promoted to `'high'`. `'none'` or `false`
            disables thinking.

            - **GLM 4.5, GLM 4.5 Air, GLM 4.6, GLM 4.7**: Binary on/off
            reasoning. Default reasoning on. Use `'none'` or `false` to disable;
            effort levels and integers have no additional effect.

            - **Harmony (OpenAI GPT-OSS 120B, GPT-OSS 20B)**: Accepts only
            `'low'`, `'medium'`, or `'high'`. Does not support `'none'`,
            `false`, or integer values — using these will return an error (e.g.,
            "Invalid reasoning effort: none"). When omitted, defaults to
            `'medium'`. Lower effort produces faster responses with shorter
            reasoning.
        reasoning_history:
          anyOf:
            - type: string
              enum:
                - disabled
                - interleaved
                - preserved
            - type: 'null'
          title: Reasoning History
          description: >-
            Controls how historical assistant reasoning content is included in
            the prompt for multi-turn conversations.


            **Accepted values:**


            - `null`: Use model/template default behavior (for **GLM-4.7**, the
            model/template default is `'interleaved'`, i.e. historical reasoning
            is cleared by default)

            - `'disabled'`: Strip `reasoning_content` from all messages before
            prompt construction

            - `'interleaved'`: Strip `reasoning_content` from messages up to
            (and including) the last user message

            - `'preserved'`: Preserve historical `reasoning_content` across the
            conversation


            **Model support:**


            | Model | Default | Supported values |

            | --- | --- | --- |

            | Kimi K2 Instruct | `'preserved'` | `'disabled'`, `'interleaved'`,
            `'preserved'` |

            | MiniMax M2 | `'interleaved'` | `'disabled'`, `'interleaved'` |

            | GLM-4.7 | `'interleaved'` | `'disabled'`, `'interleaved'`,
            `'preserved'` |

            | GLM-4.6 | `'interleaved'` | `'disabled'`, `'interleaved'` |

            | Qwen 3.6 | `'preserved'` | `'disabled'`, `'preserved'` |

            | DeepSeek V4 | `'interleaved'` | `'interleaved'` |


            For other models, refer to the model provider's documentation.


            **Note:** This parameter controls prompt formatting only. To disable
            reasoning computation entirely, use `reasoning_effort='none'`.
        thinking:
          anyOf:
            - $ref: '#/components/schemas/ThinkingConfigEnabled'
            - $ref: '#/components/schemas/ThinkingConfigDisabled'
            - type: 'null'
          title: Thinking
          description: >-
            Configuration for enabling extended thinking (Anthropic-compatible
            format). This is an alternative to `reasoning_effort` for
            controlling reasoning behavior.


            **Format:**


            - `{"type": "enabled"}` - Enable thinking (equivalent to
            `reasoning_effort: true`)

            - `{"type": "enabled", "budget_tokens": <int>}` - Enable thinking
            with a token budget (equivalent to `reasoning_effort: <int>`). Must
            be >= 1024.

            - `{"type": "disabled"}` - Disable thinking (equivalent to
            `reasoning_effort: "none"`)


            **Note:** Cannot be specified together with `reasoning_effort`. If
            both are provided, a validation error will be raised.
        return_token_ids:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Return Token Ids
          description: Return token IDs alongside text to avoid retokenization drift.
          default: false
        functions:
          items:
            $ref: '#/components/schemas/ChatCompletionFunction'
          type: array
          title: Functions
          description: >-
            Deprecated in OpenAI. Use 'tools' instead. This will be
            automatically transformed to tools.
          deprecated: true
        prompt_truncate_len:
          anyOf:
            - type: integer
            - type: 'null'
          title: Prompt Truncate Len
          description: >-
            The size (in tokens) to which to truncate chat prompts. This
            includes the system prompt (if any), previous user/assistant
            messages, and the current user message. Earlier user/assistant
            messages will be evicted first to fit the prompt into this length.
            The system prompt is preserved whenever possible and only truncated
            as a last resort.


            This should usually be set to a number much smaller << than the
            model's maximum context size, to allow enough remaining tokens for
            generating a response.


            If omitted, you may receive "prompt too long" errors in your
            responses as conversations grow. Note that even with this set, you
            may still receive "prompt too long" errors if individual messages
            (such as a very long system prompt or user message) exceed the
            model's context window on their own.
        parallel_tool_calls:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Parallel Tool Calls
          description: Enable parallel function calling.
        safe_tokenization:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Safe Tokenization
          description: >-
            When true, special tokens in user-provided content are never
            interpreted as actual special tokens during tokenization. This
            prevents prompt injection via special token strings (e.g.
            <|im_start|>, <｜User｜>). Supported for models using Jinja or
            HuggingFace chat templates with HuggingFace tokenizers. Returns an
            error if the model does not support it, or if combined with
            custom_chat_template on HuggingFace-backed models. Note:
            prompt_truncate_len is not applied when safe_tokenization is
            enabled.
        function_call:
          anyOf:
            - type: string
              enum:
                - auto
                - none
            - $ref: '#/components/schemas/FunctionNameSpec'
            - type: 'null'
          title: Function Call
          description: >-
            Deprecated in OpenAI. Use 'tool_choice' instead. This will be
            automatically transformed to tool_choice.
          deprecated: true
      additionalProperties: false
      type: object
      required:
        - messages
        - model
      title: ChatCompletionRequest
    ChatCompletionTool:
      properties:
        type:
          type: string
          enum:
            - function
          title: Type
          description: The type of the tool. Currently, only `function` is supported.
        function:
          anyOf:
            - $ref: '#/components/schemas/ChatCompletionFunction'
            - type: 'null'
          description: Required for function tools.
      additionalProperties: false
      type: object
      required:
        - type
      title: ChatCompletionTool
    ChatMessage:
      properties:
        role:
          type: string
          title: Role
          description: >-
            The role of the messages author. One of `system`, `user`, or
            `assistant`.
        content:
          anyOf:
            - type: string
            - items:
                $ref: '#/components/schemas/ChatMessageContent'
              type: array
            - type: 'null'
          title: Content
          description: >-
            The contents of the message. `content` is required for all messages,
            and may be null for assistant messages with function calls.
        reasoning_content:
          anyOf:
            - type: string
            - type: 'null'
          title: Reasoning Content
          description: >-
            The reasoning or thinking process generated by the model. This field
            is only available for certain reasoning models (GLM 4.5, GLM 4.5
            Air, GPT OSS 120B, GPT OSS 20B) and contains the model's internal
            reasoning that would otherwise appear in `<think></think>` tags
            within the content field.
        tool_calls:
          anyOf:
            - items:
                $ref: '#/components/schemas/ChatCompletionMessageToolCall'
              type: array
            - type: 'null'
          title: Tool Calls
          description: The tool calls generated by the model, such as function calls.
        tool_call_id:
          anyOf:
            - type: string
            - type: 'null'
          title: Tool Call Id
      additionalProperties: false
      type: object
      required:
        - role
      title: ChatMessage
      description: A chat completion message.
    ChatMessageContent:
      properties:
        type:
          type: string
          title: Type
        text:
          anyOf:
            - type: string
            - type: 'null'
          title: Text
        image_url:
          anyOf:
            - $ref: '#/components/schemas/ChatMessageContentImageURL'
            - type: 'null'
        video_url:
          anyOf:
            - $ref: '#/components/schemas/ChatMessageContentVideoURL'
            - type: 'null'
      additionalProperties: false
      type: object
      required:
        - type
      title: ChatMessageContent
    ChatMessageContentImageURL:
      properties:
        url:
          type: string
          title: Url
        detail:
          anyOf:
            - type: string
            - type: 'null'
          title: Detail
      additionalProperties: false
      type: object
      required:
        - url
      title: ChatMessageContentImageURL
    ChatMessageContentVideoURL:
      properties:
        url:
          type: string
          title: Url
        detail:
          anyOf:
            - type: string
            - type: 'null'
          title: Detail
        max_frames:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max Frames
        spatial_limit:
          anyOf:
            - type: integer
            - type: 'null'
          title: Spatial Limit
        sample_fps:
          anyOf:
            - type: number
            - type: 'null'
          title: Sample Fps
      additionalProperties: false
      type: object
      required:
        - url
      title: ChatMessageContentVideoURL
    CompletionRequest:
      properties:
        model:
          type: string
          title: Model
          description: |-
            The name of the model to use.

            Example: `"accounts/fireworks/models/kimi-k2-instruct-0905"`
        user:
          anyOf:
            - type: string
            - type: 'null'
          title: User
          description: >-
            A unique identifier representing your end-user, which can help
            monitor and detect abuse.
        prompt_cache_key:
          anyOf:
            - type: string
            - type: 'null'
          title: Prompt Cache Key
          description: >-
            A key used for prompt caching session affinity. Requests with the
            same prompt_cache_key are routed to the same backend to maximize KV
            cache hit rates. This is the preferred field for session affinity
            (takes priority over the 'user' field).
        prompt_cache_isolation_key:
          anyOf:
            - type: string
            - type: 'null'
          title: Prompt Cache Isolation Key
          description: Isolation key for prompt caching to separate cache entries.
        raw_output:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Raw Output
          description: Return raw output from the model.
          default: false
        perf_metrics_in_response:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Perf Metrics In Response
          description: >-
            Whether to include performance metrics in the response body.


            **Non-streaming requests:** Performance metrics are always included
            in response headers (e.g., `fireworks-prompt-tokens`,
            `fireworks-server-time-to-first-token`). Setting this to `true`
            additionally includes the same metrics in the response body under
            the `perf_metrics` field.


            **Streaming requests:** Performance metrics are only included in the
            response body under the `perf_metrics` field in the final chunk
            (when `finish_reason` is set). This is because headers may not be
            accessible during streaming.


            The response body `perf_metrics` field contains the following
            metrics:


            **Basic Metrics (all deployments):**


            - `prompt-tokens`: Number of tokens in the prompt

            - `cached-prompt-tokens`: Number of cached prompt tokens

            - `server-time-to-first-token`: Time from request start to first
            token (in seconds)

            - `server-processing-time`: Total processing time (in seconds, only
            for completed requests)


            **Predicted Outputs Metrics:**


            - `speculation-prompt-tokens`: Number of speculative prompt tokens

            - `speculation-prompt-matched-tokens`: Number of matched speculative
            prompt tokens (for completed requests)


            **Dedicated Deployment Only Metrics:**


            - `speculation-generated-tokens`: Number of speculative generated
            tokens (for completed requests)

            - `speculation-acceptance`: Speculation acceptance rates by position

            - `backend-host`: Hostname of the backend server

            - `num-concurrent-requests`: Number of concurrent requests

            - `deployment`: Deployment name

            - `tokenizer-queue-duration`: Time spent in tokenizer queue

            - `tokenizer-duration`: Time spent in tokenizer

            - `prefill-queue-duration`: Time spent in prefill queue

            - `prefill-duration`: Time spent in prefill

            - `generation-queue-duration`: Time spent in generation queue

            - `generation-duration`: Time spent in generation
          default: false
        stream:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Stream
          description: >-
            Whether to stream back partial progress. If set, tokens will be sent
            as data-only [server-sent
            events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
            as they become available, with the stream terminated by a `data:
            [DONE]` message.
          default: false
        'n':
          type: integer
          title: 'N'
          description: >-
            How many completions to generate for each prompt.


            **Note:** Because this parameter generates many completions, it can
            quickly consume your token quota. Use carefully and ensure that you
            have reasonable settings for `max_tokens` and `stop`.


            Required range: `1 <= x <= 128`


            Example: `1`
          default: 1
        service_tier:
          type: string
          enum:
            - auto
            - default
            - flex
            - priority
          title: Service Tier
          description: >-
            The service tier to use for the request. Specifies the processing
            type used for serving the request. Only "priority" is supported,
            while all other values will be treated as "default" tier.
          default: default
        stop:
          anyOf:
            - type: string
            - items:
                type: string
              type: array
            - type: 'null'
          title: Stop
          description: >-
            Up to 4 sequences where the API will stop generating further tokens.
            The returned text will NOT contain the stop sequence.
        max_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max Tokens
          description: >-
            The maximum number of tokens to generate in the completion. If the
            token count of your prompt plus max_tokens exceeds the model's
            context length, the behavior depends on
            context_length_exceeded_behavior. By default, max_tokens will be
            lowered to fit in the context window instead of returning an error.
        max_completion_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max Completion Tokens
          description: Alias for max_tokens. Cannot be specified together with max_tokens.
        temperature:
          anyOf:
            - type: number
            - type: 'null'
          title: Temperature
          description: >-
            What sampling temperature to use, between 0 and 2. Higher values
            like 0.8 will make the output more random, while lower values like
            0.2 will make it more focused and deterministic.


            We generally recommend altering this or top_p but not both.


            Required range: `0 <= x <= 2`


            Example: `1`
        top_k:
          anyOf:
            - type: integer
            - type: 'null'
          title: Top K
          description: >-
            Top-k sampling is another sampling method where the k most probable
            next tokens are filtered and the probability mass is redistributed
            among only those k next tokens. The value of k controls the number
            of candidates for the next token at each step during text
            generation. Must be between 0 and 100.


            Required range: `0 <= x <= 100`


            Example: `50`
        top_p:
          anyOf:
            - type: number
            - type: 'null'
          title: Top P
          description: >-
            An alternative to sampling with temperature, called nucleus
            sampling, where the model considers the results of the tokens with
            top_p probability mass. So 0.1 means only the tokens comprising the
            top 10% probability mass are considered.


            We generally recommend altering this or temperature but not both.


            Required range: `0 <= x <= 1`


            Example: `1`
        min_p:
          anyOf:
            - type: number
            - type: 'null'
          title: Min P
          description: >-
            Minimum probability threshold for token selection. Only tokens with
            probability >= min_p are considered for selection. This is an
            alternative to `top_p` and `top_k` sampling.


            Required range: `0 <= x <= 1`
        typical_p:
          anyOf:
            - type: number
            - type: 'null'
          title: Typical P
          description: >-
            Typical-p sampling is an alternative to nucleus sampling. It
            considers the most typical tokens whose cumulative probability is at
            most typical_p.


            Required range: `0 <= x <= 1`
        frequency_penalty:
          anyOf:
            - type: number
            - type: 'null'
          title: Frequency Penalty
          description: >-
            Number between -2.0 and 2.0. Positive values penalize new tokens
            based on their existing frequency in the text so far, decreasing the
            model's likelihood to repeat the same line verbatim.


            Reasonable value is around 0.1 to 1 if the aim is to just reduce
            repetitive samples somewhat. If the aim is to strongly suppress
            repetition, then one can increase the coefficients up to 2, but this
            can noticeably degrade the quality of samples. Negative values can
            be used to increase the likelihood of repetition.


            See also `presence_penalty` for penalizing tokens that have at least
            one appearance at a fixed rate.


            OpenAI compatible (follows OpenAI's conventions for handling token
            frequency and repetition penalties).


            Required range: `-2 <= x <= 2`
        presence_penalty:
          anyOf:
            - type: number
            - type: 'null'
          title: Presence Penalty
          description: >-
            Number between -2.0 and 2.0. Positive values penalize new tokens
            based on whether they appear in the text so far, increasing the
            model's likelihood to talk about new topics.


            Reasonable value is around 0.1 to 1 if the aim is to just reduce
            repetitive samples somewhat. If the aim is to strongly suppress
            repetition, then one can increase the coefficients up to 2, but this
            can noticeably degrade the quality of samples. Negative values can
            be used to increase the likelihood of repetition.


            See also `frequency_penalty` for penalizing tokens at an increasing
            rate depending on how often they appear.


            OpenAI compatible (follows OpenAI's conventions for handling token
            frequency and repetition penalties).


            Required range: `-2 <= x <= 2`
        repetition_penalty:
          anyOf:
            - type: number
            - type: 'null'
          title: Repetition Penalty
          description: >-
            Applies a penalty to repeated tokens to discourage or encourage
            repetition. A value of `1.0` means no penalty, allowing free
            repetition. Values above `1.0` penalize repetition, reducing the
            likelihood of repeating tokens. Values between `0.0` and `1.0`
            reward repetition, increasing the chance of repeated tokens. For a
            good balance, a value of `1.2` is often recommended. Note that the
            penalty is applied to both the generated output and the prompt in
            decoder-only models.


            Required range: `0 <= x <= 2`
        mirostat_target:
          anyOf:
            - type: number
            - type: 'null'
          title: Mirostat Target
          description: >-
            Defines the target perplexity for the Mirostat algorithm. Perplexity
            measures the unpredictability of the generated text, with higher
            values encouraging more diverse and creative outputs, while lower
            values prioritize predictability and coherence. The algorithm
            dynamically adjusts the token selection to maintain this target
            during text generation.


            If not specified, Mirostat sampling is disabled.
        mirostat_lr:
          anyOf:
            - type: number
            - type: 'null'
          title: Mirostat Lr
          description: >-
            Specifies the learning rate for the Mirostat sampling algorithm,
            which controls how quickly the model adjusts its token distribution
            to maintain the target perplexity. A smaller value slows down the
            adjustments, leading to more stable but gradual shifts, while higher
            values speed up corrections at the cost of potential instability.
        seed:
          anyOf:
            - type: integer
            - type: 'null'
          title: Seed
          description: Random seed for deterministic sampling.
        logprobs:
          anyOf:
            - type: integer
            - type: boolean
            - type: 'null'
          title: Logprobs
          description: >-
            Include log probabilities in the response. This accepts either a
            boolean or an integer:


            If set to `true`, log probabilities are included and the number of
            alternatives can be controlled via `top_logprobs` (OpenAI-compatible
            behavior).


            If set to an integer N (0-5), include log probabilities for up to N
            most likely tokens per position in the legacy format.


            The API will always return the logprob of the sampled token, so
            there may be up to `logprobs+1` elements in the response when an
            integer is used. The maximum value for the integer form is 5.
        top_logprobs:
          anyOf:
            - type: integer
            - type: 'null'
          title: Top Logprobs
          description: >-
            An integer between 0 and 5 specifying the number of most likely
            tokens to return at each token position, each with an associated log
            probability. The minimum value is 0 and the maximum value is 5.


            When `logprobs` is set, `top_logprobs` can be used to modify how
            many top log probabilities are returned. If `top_logprobs` is not
            set, the API will return up to `logprobs` tokens per position.


            Required range: `0 <= x <= 5`
        echo:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Echo
          description: Echo back the prompt in addition to the completion.
          default: false
        echo_last:
          anyOf:
            - type: integer
            - type: 'null'
          title: Echo Last
          description: >-
            Echo back the last N tokens of the prompt in addition to the
            completion. This is useful for obtaining logprobs of the prompt
            suffix but without transferring too much data. Passing
            `echo_last=len(prompt)` is the same as `echo=True`
        ignore_eos:
          type: boolean
          title: Ignore Eos
          description: >-
            This setting controls whether the model should ignore the End of
            Sequence (EOS) token. When set to `True`, the model will continue
            generating tokens even after the EOS token is produced. By default,
            it stops when the EOS token is reached.
          default: false
        context_length_exceeded_behavior:
          type: string
          enum:
            - error
            - truncate
          title: Context Length Exceeded Behavior
          description: >-
            What to do if the token count of prompt plus `max_tokens` exceeds
            the model's context window.


            Passing `truncate` limits the `max_tokens` to at most
            `context_window_length - prompt_length`. This is the default.


            Passing `error` would trigger a request error.


            The default of `'truncate'` is selected as it allows to ask for high
            `max_tokens` value while respecting the context window length
            without having to do client-side prompt tokenization.


            Note, that it differs from OpenAI's behavior that matches that of
            `error`.
          default: truncate
        response_format:
          anyOf:
            - $ref: '#/components/schemas/ResponseFormat'
            - type: 'null'
          description: >-
            Allows to force the model to produce specific output format.


            Setting to `{ "type": "json_object" }` enables JSON mode, which
            guarantees the message the model generates is valid JSON.


            If `"type"` is `"json_schema"`, a JSON schema must be provided.
            E.g., `response_format = {"type": "json_schema", "json_schema":
            <json_schema>}`.


            Important: when using JSON mode, it's crucial to also instruct the
            model to produce JSON via a system or user message. Without this,
            the model may generate an unending stream of whitespace until the
            generation reaches the token limit, resulting in a long-running and
            seemingly "stuck" request.


            Also note that the message content may be partially cut off if
            `finish_reason="length"`, which indicates the generation exceeded
            `max_tokens` or the conversation exceeded the max context length. In
            this case the return value might not be a valid JSON.
        logit_bias:
          anyOf:
            - additionalProperties:
                type: number
              type: object
            - type: 'null'
          title: Logit Bias
          description: >-
            Modify the likelihood of specified tokens appearing in the
            completion. Accepts a json object that maps tokens (specified by
            their token ID in the tokenizer) to an associated bias value from
            -100 to 100. Mathematically, the bias is added to the logits
            generated by the model prior to sampling.
        speculation:
          anyOf:
            - type: string
            - items:
                type: integer
              type: array
            - type: 'null'
          title: Speculation
          description: Speculative decoding prompt or token IDs to speed up generation.
        prediction:
          anyOf:
            - $ref: '#/components/schemas/PredictedOutput'
            - type: string
            - type: 'null'
          title: Prediction
          description: >-
            OpenAI-compatible predicted output for speculative decoding. Can be
            a PredictedOutput object or a simple string. Automatically
            transformed to speculation.
        metadata:
          anyOf:
            - additionalProperties:
                type: string
              type: object
            - type: 'null'
          title: Metadata
          description: >-
            Additional metadata to store with the request for
            tracing/distillation.
        reasoning_effort:
          anyOf:
            - type: string
              enum:
                - low
                - medium
                - high
                - max
                - none
            - type: integer
            - type: boolean
            - type: 'null'
          title: Reasoning Effort
          description: >-
            Controls reasoning behavior for supported models. When enabled, the
            model's reasoning appears in the `reasoning_content` field of the
            response, separate from the final answer in `content`.


            **Accepted values:**


            - **String** (OpenAI-compatible): `'low'`, `'medium'`, `'high'`, or
            `'max'` to enable reasoning with varying effort levels; `'none'` to
            disable reasoning.

            - **Boolean** (Fireworks extension): `true` to enable reasoning,
            `false` to disable it.

            - **Integer** (Fireworks extension): A positive integer to set a
            hard token limit on reasoning output (only effective for
            grammar-based reasoning models).


            **Important:** Boolean values are normalized internally: `true`
            becomes `'medium'`, and `false` becomes `'none'`. This normalization
            happens before model-specific validation, so if a model doesn't
            support `'none'`, passing `false` will produce an error referencing
            `'none'`.


            **Model-specific behavior:**


            - **Qwen3 (e.g., Qwen3-8B)**: Grammar-based reasoning. Default
            reasoning on. Use `'none'` or `false` to disable. Supports integer
            token limits to cap reasoning output. `'low'` maps to a default
            token limit (~3000 tokens).

            - **MiniMax M2**: Reasoning is required (always on). Defaults to
            `'medium'` when omitted. Accepts only string `reasoning_effort`:
            `'low'`, `'medium'`, or `'high'`. `'none'` and boolean values are
            rejected.

            - **DeepSeek V3.1**: Binary on/off reasoning. Default reasoning off
            (matches chat template). Use `true`, `'low'`, `'medium'`, or
            `'high'` to enable; `'none'` or `false` to disable.

            - **DeepSeek V3.2**: Binary on/off reasoning. Default reasoning on.
            Use `'none'` or `false` to disable; effort levels and integers have
            no additional effect.

            - **DeepSeek V4**: Accepts `'none'`, `'low'`, `'medium'`, `'high'`,
            and `'max'`. Default reasoning on (`'high'`). `'max'` prepends a
            thorough-reasoning preamble; `'high'` enables thinking. `'low'` and
            `'medium'` are silently promoted to `'high'`. `'none'` or `false`
            disables thinking.

            - **GLM 4.5, GLM 4.5 Air, GLM 4.6, GLM 4.7**: Binary on/off
            reasoning. Default reasoning on. Use `'none'` or `false` to disable;
            effort levels and integers have no additional effect.

            - **Harmony (OpenAI GPT-OSS 120B, GPT-OSS 20B)**: Accepts only
            `'low'`, `'medium'`, or `'high'`. Does not support `'none'`,
            `false`, or integer values — using these will return an error (e.g.,
            "Invalid reasoning effort: none"). When omitted, defaults to
            `'medium'`. Lower effort produces faster responses with shorter
            reasoning.
        reasoning_history:
          anyOf:
            - type: string
              enum:
                - disabled
                - interleaved
                - preserved
            - type: 'null'
          title: Reasoning History
          description: >-
            Controls how historical assistant reasoning content is included in
            the prompt for multi-turn conversations.


            **Accepted values:**


            - `null`: Use model/template default behavior (for **GLM-4.7**, the
            model/template default is `'interleaved'`, i.e. historical reasoning
            is cleared by default)

            - `'disabled'`: Strip `reasoning_content` from all messages before
            prompt construction

            - `'interleaved'`: Strip `reasoning_content` from messages up to
            (and including) the last user message

            - `'preserved'`: Preserve historical `reasoning_content` across the
            conversation


            **Model support:**


            | Model | Default | Supported values |

            | --- | --- | --- |

            | Kimi K2 Instruct | `'preserved'` | `'disabled'`, `'interleaved'`,
            `'preserved'` |

            | MiniMax M2 | `'interleaved'` | `'disabled'`, `'interleaved'` |

            | GLM-4.7 | `'interleaved'` | `'disabled'`, `'interleaved'`,
            `'preserved'` |

            | GLM-4.6 | `'interleaved'` | `'disabled'`, `'interleaved'` |

            | Qwen 3.6 | `'preserved'` | `'disabled'`, `'preserved'` |

            | DeepSeek V4 | `'interleaved'` | `'interleaved'` |


            For other models, refer to the model provider's documentation.


            **Note:** This parameter controls prompt formatting only. To disable
            reasoning computation entirely, use `reasoning_effort='none'`.
        thinking:
          anyOf:
            - $ref: '#/components/schemas/ThinkingConfigEnabled'
            - $ref: '#/components/schemas/ThinkingConfigDisabled'
            - type: 'null'
          title: Thinking
          description: >-
            Configuration for enabling extended thinking (Anthropic-compatible
            format). This is an alternative to `reasoning_effort` for
            controlling reasoning behavior.


            **Format:**


            - `{"type": "enabled"}` - Enable thinking (equivalent to
            `reasoning_effort: true`)

            - `{"type": "enabled", "budget_tokens": <int>}` - Enable thinking
            with a token budget (equivalent to `reasoning_effort: <int>`). Must
            be >= 1024.

            - `{"type": "disabled"}` - Disable thinking (equivalent to
            `reasoning_effort: "none"`)


            **Note:** Cannot be specified together with `reasoning_effort`. If
            both are provided, a validation error will be raised.
        return_token_ids:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Return Token Ids
          description: Return token IDs alongside text to avoid retokenization drift.
          default: false
        prompt:
          anyOf:
            - type: string
            - items:
                type: string
              type: array
            - items:
                type: integer
              type: array
            - items:
                items:
                  type: integer
                type: array
              type: array
          title: Prompt
          description: >-
            The prompt to generate completions for.


            It can be a single string or an array of strings.


            It can also be an array of integers or an array of integer arrays,
            which allows to pass already tokenized prompt.


            If multiple prompts are specified, several choices with
            corresponding `index` will be returned in the output.
        images:
          anyOf:
            - items:
                type: string
              type: array
            - items:
                items:
                  type: string
                type: array
              type: array
            - type: 'null'
          title: Images
          description: >-
            The list of base64 encoded images for visual language completition
            generation.


            They should be formatted as MIME_TYPE,<base64 encoded str>


            eg. data:image/jpeg;base64,<base64 encoded str>


            Additionally, the number of images provided should match the number
            of '<image>' special token in the prompt
      additionalProperties: false
      type: object
      required:
        - prompt
        - model
      title: CompletionRequest
    EmbeddingRequest:
      properties:
        model:
          type: string
          title: Model
          description: The name of the model to use.
        user:
          anyOf:
            - type: string
            - type: 'null'
          title: User
          description: >-
            A unique identifier representing your end-user, which can help
            monitor and detect abuse.
        prompt_cache_key:
          anyOf:
            - type: string
            - type: 'null'
          title: Prompt Cache Key
          description: >-
            A key used for prompt caching session affinity. Requests with the
            same prompt_cache_key are routed to the same backend to maximize KV
            cache hit rates. This is the preferred field for session affinity
            (takes priority over the 'user' field).
        prompt_cache_isolation_key:
          anyOf:
            - type: string
            - type: 'null'
          title: Prompt Cache Isolation Key
          description: Isolation key for prompt caching to separate cache entries.
        raw_output:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Raw Output
          description: Return raw output from the model.
          default: false
        perf_metrics_in_response:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Perf Metrics In Response
          description: >-
            Whether to include performance metrics in the response body.


            **Non-streaming requests:** Performance metrics are always included
            in response headers (e.g., `fireworks-prompt-tokens`,
            `fireworks-server-time-to-first-token`). Setting this to `true`
            additionally includes the same metrics in the response body under
            the `perf_metrics` field.


            **Streaming requests:** Performance metrics are only included in the
            response body under the `perf_metrics` field in the final chunk
            (when `finish_reason` is set). This is because headers may not be
            accessible during streaming.


            The response body `perf_metrics` field contains the following
            metrics:


            **Basic Metrics (all deployments):**


            - `prompt-tokens`: Number of tokens in the prompt

            - `cached-prompt-tokens`: Number of cached prompt tokens

            - `server-time-to-first-token`: Time from request start to first
            token (in seconds)

            - `server-processing-time`: Total processing time (in seconds, only
            for completed requests)


            **Predicted Outputs Metrics:**


            - `speculation-prompt-tokens`: Number of speculative prompt tokens

            - `speculation-prompt-matched-tokens`: Number of matched speculative
            prompt tokens (for completed requests)


            **Dedicated Deployment Only Metrics:**


            - `speculation-generated-tokens`: Number of speculative generated
            tokens (for completed requests)

            - `speculation-acceptance`: Speculation acceptance rates by position

            - `backend-host`: Hostname of the backend server

            - `num-concurrent-requests`: Number of concurrent requests

            - `deployment`: Deployment name

            - `tokenizer-queue-duration`: Time spent in tokenizer queue

            - `tokenizer-duration`: Time spent in tokenizer

            - `prefill-queue-duration`: Time spent in prefill queue

            - `prefill-duration`: Time spent in prefill

            - `generation-queue-duration`: Time spent in generation queue

            - `generation-duration`: Time spent in generation
          default: false
        input:
          anyOf:
            - type: string
            - items:
                type: string
              type: array
            - items:
                items:
                  type: integer
                type: array
              type: array
            - items:
                type: integer
              type: array
            - additionalProperties: true
              type: object
            - items:
                additionalProperties: true
                type: object
              type: array
          title: Input
        prompt_template:
          anyOf:
            - type: string
            - type: 'null'
          title: Prompt Template
        encoding_format:
          type: string
          enum:
            - float
            - base64
          title: Encoding Format
          default: float
        dimensions:
          anyOf:
            - type: integer
            - type: 'null'
          title: Dimensions
        return_logits:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          title: Return Logits
        normalize:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Normalize
          default: false
        fanout_size:
          anyOf:
            - type: integer
            - type: 'null'
          title: Fanout Size
      additionalProperties: false
      type: object
      required:
        - input
        - model
      title: EmbeddingRequest
    FunctionNameSpec:
      properties:
        name:
          type: string
          title: Name
      additionalProperties: false
      type: object
      required:
        - name
      title: FunctionNameSpec
    FunctionSelection:
      properties:
        type:
          type: string
          const: function
          title: Type
        function:
          anyOf:
            - $ref: '#/components/schemas/FunctionNameSpec'
            - type: 'null'
      additionalProperties: false
      type: object
      required:
        - type
      title: FunctionSelection
    PredictedOutput:
      properties:
        content:
          anyOf:
            - type: string
            - items:
                $ref: '#/components/schemas/ChatMessageContent'
              type: array
          title: Content
        type:
          type: string
          const: content
          title: Type
          default: content
      additionalProperties: false
      type: object
      required:
        - content
      title: PredictedOutput
      description: OpenAI-compatible struct for the "speculation" field.
    RerankRequestBody:
      properties:
        model:
          anyOf:
            - type: string
            - type: 'null'
          title: Model
        query:
          type: string
          title: Query
        documents:
          items:
            type: string
          type: array
          title: Documents
        top_n:
          anyOf:
            - type: integer
            - type: 'null'
          title: Top N
        return_documents:
          type: boolean
          title: Return Documents
          default: true
        task:
          anyOf:
            - type: string
            - type: 'null'
          title: Task
      type: object
      required:
        - query
        - documents
      title: RerankRequestBody
    ResponseFormat:
      properties:
        type:
          type: string
          enum:
            - json_object
            - json_schema
            - grammar
            - text
          title: Type
        schema:
          anyOf:
            - type: string
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Schema
        grammar:
          anyOf:
            - type: string
            - type: 'null'
          title: Grammar
        json_schema:
          anyOf:
            - type: string
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Json Schema
      additionalProperties: false
      type: object
      required:
        - type
      title: ResponseFormat
    StreamOptions:
      properties:
        include_usage:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Include Usage
          default: false
      additionalProperties: false
      type: object
      title: StreamOptions
    TgiGenerateParameters:
      properties:
        details:
          type: boolean
          title: Details
          default: false
        max_new_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max New Tokens
        repetition_penalty:
          anyOf:
            - type: number
            - type: 'null'
          title: Repetition Penalty
        return_full_text:
          type: boolean
          title: Return Full Text
          default: false
        stop:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          title: Stop
        temperature:
          anyOf:
            - type: number
            - type: 'null'
          title: Temperature
        top_k:
          anyOf:
            - type: integer
            - type: 'null'
          title: Top K
        top_p:
          anyOf:
            - type: number
            - type: 'null'
          title: Top P
        truncate:
          anyOf:
            - type: integer
            - type: 'null'
          title: Truncate
        typical_p:
          anyOf:
            - type: number
            - type: 'null'
          title: Typical P
      additionalProperties: false
      type: object
      title: TgiGenerateParameters
    TgiGenerateRequest:
      properties:
        model:
          type: string
          title: Model
          description: The name of the model to use.
        user:
          anyOf:
            - type: string
            - type: 'null'
          title: User
          description: >-
            A unique identifier representing your end-user, which can help
            monitor and detect abuse.
        prompt_cache_key:
          anyOf:
            - type: string
            - type: 'null'
          title: Prompt Cache Key
          description: >-
            A key used for prompt caching session affinity. Requests with the
            same prompt_cache_key are routed to the same backend to maximize KV
            cache hit rates. This is the preferred field for session affinity
            (takes priority over the 'user' field).
        prompt_cache_isolation_key:
          anyOf:
            - type: string
            - type: 'null'
          title: Prompt Cache Isolation Key
          description: Isolation key for prompt caching to separate cache entries.
        raw_output:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Raw Output
          description: Return raw output from the model.
          default: false
        perf_metrics_in_response:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Perf Metrics In Response
          description: >-
            Whether to include performance metrics in the response body.


            **Non-streaming requests:** Performance metrics are always included
            in response headers (e.g., `fireworks-prompt-tokens`,
            `fireworks-server-time-to-first-token`). Setting this to `true`
            additionally includes the same metrics in the response body under
            the `perf_metrics` field.


            **Streaming requests:** Performance metrics are only included in the
            response body under the `perf_metrics` field in the final chunk
            (when `finish_reason` is set). This is because headers may not be
            accessible during streaming.


            The response body `perf_metrics` field contains the following
            metrics:


            **Basic Metrics (all deployments):**


            - `prompt-tokens`: Number of tokens in the prompt

            - `cached-prompt-tokens`: Number of cached prompt tokens

            - `server-time-to-first-token`: Time from request start to first
            token (in seconds)

            - `server-processing-time`: Total processing time (in seconds, only
            for completed requests)


            **Predicted Outputs Metrics:**


            - `speculation-prompt-tokens`: Number of speculative prompt tokens

            - `speculation-prompt-matched-tokens`: Number of matched speculative
            prompt tokens (for completed requests)


            **Dedicated Deployment Only Metrics:**


            - `speculation-generated-tokens`: Number of speculative generated
            tokens (for completed requests)

            - `speculation-acceptance`: Speculation acceptance rates by position

            - `backend-host`: Hostname of the backend server

            - `num-concurrent-requests`: Number of concurrent requests

            - `deployment`: Deployment name

            - `tokenizer-queue-duration`: Time spent in tokenizer queue

            - `tokenizer-duration`: Time spent in tokenizer

            - `prefill-queue-duration`: Time spent in prefill queue

            - `prefill-duration`: Time spent in prefill

            - `generation-queue-duration`: Time spent in generation queue

            - `generation-duration`: Time spent in generation
          default: false
        inputs:
          type: string
          title: Inputs
        parameters:
          anyOf:
            - $ref: '#/components/schemas/TgiGenerateParameters'
            - type: 'null'
        stream:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Stream
          default: false
      additionalProperties: false
      type: object
      required:
        - inputs
        - model
      title: TgiGenerateRequest
    ThinkingConfigDisabled:
      properties:
        type:
          type: string
          const: disabled
          title: Type
      additionalProperties: false
      type: object
      required:
        - type
      title: ThinkingConfigDisabled
      description: >-
        Configuration for disabling extended thinking (Anthropic-compatible
        format).
    ThinkingConfigEnabled:
      properties:
        type:
          type: string
          const: enabled
          title: Type
        budget_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Budget Tokens
          description: >-
            Determines how many tokens the model can use for its internal
            reasoning process. Larger budgets can enable more thorough analysis
            for complex problems, improving response quality. Must be >= 1024 if
            specified.
      additionalProperties: false
      type: object
      required:
        - type
      title: ThinkingConfigEnabled
      description: >-
        Configuration for enabling extended thinking (Anthropic-compatible
        format).
    CompletionResponse:
      additionalProperties: false
      description: The response message from a /v1/completions call.
      properties:
        id:
          description: A unique identifier of the response
          title: Id
          type: string
        object:
          default: text_completion
          description: The object type, which is always "text_completion"
          title: Object
          type: string
        created:
          description: The Unix time in seconds when the response was generated
          title: Created
          type: integer
        model:
          description: The model used for the completion
          title: Model
          type: string
        choices:
          description: The list of generated completion choices
          items:
            $ref: '#/components/schemas/Choice'
          title: Choices
          type: array
        usage:
          $ref: '#/components/schemas/UsageInfo'
          description: Usage statistics for the completion
        perf_metrics:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          default: null
          description: >-
            See parameter
            [perf_metrics_in_response](#body-perf-metrics-in-response)
          title: Perf Metrics
      required:
        - id
        - created
        - model
        - choices
        - usage
      title: CompletionResponse
      type: object
    Choice:
      additionalProperties: false
      description: A completion choice.
      properties:
        index:
          description: The index of the completion choice
          title: Index
          type: integer
        text:
          description: The completion response
          title: Text
          type: string
        logprobs:
          anyOf:
            - $ref: '#/components/schemas/LogProbs'
            - $ref: '#/components/schemas/NewLogProbs'
            - type: 'null'
          default: null
          description: The log probabilities of the most likely tokens
          title: Logprobs
        finish_reason:
          anyOf:
            - enum:
                - stop
                - length
                - error
              type: string
            - type: 'null'
          default: null
          description: >-
            The reason the model stopped generating tokens. This will be "stop"
            if the model hit a natural stop point or a provided stop sequence,
            or "length" if the maximum number of tokens specified in the request
            was reached
          title: Finish Reason
        raw_output:
          anyOf:
            - $ref: '#/components/schemas/RawOutput'
            - type: 'null'
          default: null
        prompt_token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          description: Token IDs for the prompt (when return_token_ids=true)
          title: Prompt Token Ids
        token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          description: Token IDs for the generated completion (when return_token_ids=true)
          title: Token Ids
      required:
        - index
        - text
      title: Choice
      type: object
    LogProbs:
      additionalProperties: false
      description: Legacy log probabilities format
      properties:
        tokens:
          items:
            type: string
          title: Tokens
          type: array
        token_logprobs:
          items:
            type: number
          title: Token Logprobs
          type: array
        top_logprobs:
          anyOf:
            - items:
                additionalProperties:
                  type: number
                type: object
              type: array
            - type: 'null'
          title: Top Logprobs
        text_offset:
          items:
            type: integer
          title: Text Offset
          type: array
        token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          title: Token Ids
      title: LogProbs
      type: object
    NewLogProbs:
      additionalProperties: false
      description: OpenAI-compatible log probabilities format
      properties:
        content:
          items:
            $ref: '#/components/schemas/NewLogProbsContent'
          title: Content
          type: array
      title: NewLogProbs
      type: object
    NewLogProbsContent:
      additionalProperties: false
      properties:
        token:
          title: Token
          type: string
        logprob:
          title: Logprob
          type: number
        sampling_logprob:
          anyOf:
            - type: number
            - type: 'null'
          title: Sampling Logprob
        bytes:
          items:
            type: integer
          title: Bytes
          type: array
        top_logprobs:
          items:
            $ref: '#/components/schemas/NewLogProbsContentTopLogProbs'
          title: Top Logprobs
          type: array
        token_id:
          title: Token Id
          type: integer
        text_offset:
          title: Text Offset
          type: integer
        last_activation:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          title: Last Activation
        routing_matrix:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          title: Routing Matrix
      required:
        - token
        - logprob
        - sampling_logprob
        - bytes
        - token_id
        - text_offset
      title: NewLogProbsContent
      type: object
    NewLogProbsContentTopLogProbs:
      additionalProperties: false
      properties:
        token:
          title: Token
          type: string
        logprob:
          title: Logprob
          type: number
        token_id:
          title: Token Id
          type: integer
        bytes:
          items:
            type: integer
          title: Bytes
          type: array
      required:
        - token
        - logprob
        - token_id
      title: NewLogProbsContentTopLogProbs
      type: object
    PromptTokensDetails:
      additionalProperties: false
      properties:
        cached_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          default: null
          title: Cached Tokens
      title: PromptTokensDetails
      type: object
    RawOutput:
      additionalProperties: false
      description: |-
        Extension of OpenAI that returns low-level interaction of what the model
        sees, including the formatted prompt and function calls
      properties:
        prompt_fragments:
          description: >-
            Pieces of the prompt (like individual messages) before truncation
            and concatenation. Depending on prompt_truncate_len some of the
            messages might be dropped. Contains a mix of strings to be tokenized
            and individual tokens (if dictated by the conversation template)
          items:
            anyOf:
              - type: string
              - type: integer
          title: Prompt Fragments
          type: array
        prompt_token_ids:
          description: Fully processed prompt as seen by the model
          items:
            type: integer
          title: Prompt Token Ids
          type: array
        completion:
          description: >-
            Raw completion produced by the model before any tool calls are
            parsed
          title: Completion
          type: string
        completion_token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          description: Token IDs for the raw completion
          title: Completion Token Ids
        completion_logprobs:
          anyOf:
            - $ref: '#/components/schemas/NewLogProbs'
            - type: 'null'
          default: null
          description: >-
            Log probabilities for the completion. Only populated if logprobs is
            specified in the request
        images:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          default: null
          description: Images in the prompt
          title: Images
        videos:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          default: null
          description: Videos in the prompt
          title: Videos
        grammar:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: >-
            Grammar used for constrained decoding, can be either user provided
            (directly or JSON schema) or inferred by the chat template
          title: Grammar
      required:
        - prompt_fragments
        - prompt_token_ids
        - completion
      title: RawOutput
      type: object
    UsageInfo:
      additionalProperties: false
      description: Usage statistics.
      properties:
        prompt_tokens:
          description: The number of tokens in the prompt
          title: Prompt Tokens
          type: integer
        total_tokens:
          description: The total number of tokens used in the request (prompt + completion)
          title: Total Tokens
          type: integer
        completion_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          default: null
          description: The number of tokens in the generated completion
          title: Completion Tokens
        prompt_tokens_details:
          anyOf:
            - $ref: '#/components/schemas/PromptTokensDetails'
            - type: 'null'
          default: null
          description: Details about prompt tokens, including cached tokens
      required:
        - prompt_tokens
        - total_tokens
      title: UsageInfo
      type: object
    CompletionStreamResponse:
      additionalProperties: false
      description: The streamed response message from a /v1/completions call.
      properties:
        id:
          description: A unique identifier of the response
          title: Id
          type: string
        object:
          default: text_completion
          description: The object type, which is always "text_completion"
          title: Object
          type: string
        created:
          description: The Unix time in seconds when the response was generated
          title: Created
          type: integer
        model:
          description: The model used for the chat completion
          title: Model
          type: string
        choices:
          description: The list of streamed completion choices
          items:
            $ref: '#/components/schemas/CompletionResponseStreamChoice'
          title: Choices
          type: array
        usage:
          anyOf:
            - $ref: '#/components/schemas/UsageInfo'
            - type: 'null'
          default: null
        perf_metrics:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          default: null
          description: >-
            See parameter
            [perf_metrics_in_response](#body-perf-metrics-in-response)
          title: Perf Metrics
      required:
        - id
        - created
        - model
        - choices
      title: CompletionStreamResponse
      type: object
    CompletionResponseStreamChoice:
      additionalProperties: false
      description: |-
        A streamed completion choice.

        Attributes:
          index (int): The index of the completion choice.
          text (str): The completion response.
          logprobs (float, optional): The log probabilities of the most likely tokens.
          finish_reason (str): The reason the model stopped generating tokens. This will be "stop" if
            the model hit a natural stop point or a provided stop sequence, or
            "length" if the maximum number of tokens specified in the request was
            reached.
          prompt_token_ids (Optional[List[int]]): Token IDs for the prompt (when return_token_ids=true, sent in first chunk)
          token_ids (Optional[List[int]]): Token IDs for this chunk (when return_token_ids=true)
      properties:
        index:
          title: Index
          type: integer
        text:
          title: Text
          type: string
        logprobs:
          anyOf:
            - $ref: '#/components/schemas/LogProbs'
            - $ref: '#/components/schemas/NewLogProbs'
            - type: 'null'
          default: null
          title: Logprobs
        finish_reason:
          anyOf:
            - enum:
                - stop
                - length
                - error
              type: string
            - type: 'null'
          default: null
          title: Finish Reason
        raw_output:
          anyOf:
            - $ref: '#/components/schemas/RawOutput'
            - type: 'null'
          default: null
        prompt_token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          title: Prompt Token Ids
        token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          title: Token Ids
      required:
        - index
        - text
      title: CompletionResponseStreamChoice
      type: object
    ChatCompletionResponse:
      additionalProperties: false
      description: The response message from a /v1/chat/completions call.
      properties:
        id:
          description: A unique identifier of the response
          title: Id
          type: string
        object:
          default: chat.completion
          description: The object type, which is always "chat.completion"
          title: Object
          type: string
        created:
          description: The Unix time in seconds when the response was generated
          title: Created
          type: integer
        model:
          description: The model used for the chat completion
          title: Model
          type: string
        choices:
          description: The list of chat completion choices
          items:
            $ref: '#/components/schemas/ChatCompletionResponseChoice'
          title: Choices
          type: array
        usage:
          anyOf:
            - $ref: '#/components/schemas/UsageInfo'
            - type: 'null'
          default: null
        perf_metrics:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          default: null
          description: >-
            See parameter
            [perf_metrics_in_response](#body-perf-metrics-in-response)
          title: Perf Metrics
        prompt_token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          description: Token IDs for the prompt (when return_token_ids=true)
          title: Prompt Token Ids
      required:
        - id
        - created
        - model
        - choices
      title: ChatCompletionResponse
      type: object
    ChatCompletionResponseChoice:
      additionalProperties: false
      description: |-
        A chat completion choice generated by a chat model.

        Attributes:
          index (int): The index of the chat completion choice.
          message (ChatMessage): The chat completion message.
          finish_reason (Optional[str]): The reason the model stopped generating tokens. This will be "stop" if
            the model hit a natural stop point or a provided stop sequence, or
            "length" if the maximum number of tokens specified in the request was
            reached.
          token_ids (Optional[List[int]]): Token IDs for the generated message content (when return_token_ids=true)
      properties:
        index:
          title: Index
          type: integer
        message:
          $ref: '#/components/schemas/ChatMessage'
        finish_reason:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          title: Finish Reason
        logprobs:
          anyOf:
            - $ref: '#/components/schemas/LogProbs'
            - $ref: '#/components/schemas/NewLogProbs'
            - type: 'null'
          default: null
          title: Logprobs
        raw_output:
          anyOf:
            - $ref: '#/components/schemas/RawOutput'
            - type: 'null'
          default: null
        token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          title: Token Ids
      required:
        - index
        - message
      title: ChatCompletionResponseChoice
      type: object
    ChatCompletionStreamResponse:
      additionalProperties: false
      description: The streamed response message from a /v1/chat/completions call.
      properties:
        id:
          description: A unique identifier of the response
          title: Id
          type: string
        object:
          default: chat.completion.chunk
          description: The object type, which is always "chat.completion.chunk"
          title: Object
          type: string
        created:
          description: The Unix time in seconds when the response was generated
          title: Created
          type: integer
        model:
          description: The model used for the chat completion
          title: Model
          type: string
        choices:
          description: The list of streamed chat completion choices
          items:
            $ref: '#/components/schemas/ChatCompletionResponseStreamChoice'
          title: Choices
          type: array
        usage:
          anyOf:
            - $ref: '#/components/schemas/UsageInfo'
            - type: 'null'
          default: null
        perf_metrics:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          default: null
          description: >-
            See parameter
            [perf_metrics_in_response](#body-perf-metrics-in-response)
          title: Perf Metrics
        prompt_token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          description: >-
            Token IDs for the prompt (when return_token_ids=true, sent in first
            chunk)
          title: Prompt Token Ids
      required:
        - id
        - created
        - model
        - choices
      title: ChatCompletionStreamResponse
      type: object
    ChatCompletionResponseStreamChoice:
      additionalProperties: false
      description: A streamed chat completion choice.
      properties:
        index:
          description: The index of the chat completion choice
          title: Index
          type: integer
        delta:
          $ref: '#/components/schemas/DeltaMessage'
          description: The message delta
        finish_reason:
          anyOf:
            - enum:
                - stop
                - length
                - function_call
                - tool_calls
              type: string
            - type: 'null'
          default: null
          description: >-
            The reason the model stopped generating tokens. This will be "stop"
            if the model hit a natural stop point or a provided stop sequence,
            or "length" if the maximum number of tokens specified in the request
            was reached
          title: Finish Reason
        logprobs:
          anyOf:
            - $ref: '#/components/schemas/LogProbs'
            - $ref: '#/components/schemas/NewLogProbs'
            - type: 'null'
          default: null
          title: Logprobs
        raw_output:
          anyOf:
            - $ref: '#/components/schemas/RawOutput'
            - type: 'null'
          default: null
        prompt_token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          description: Token IDs for the prompt (when return_token_ids=true)
          title: Prompt Token Ids
        token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          description: Token IDs for this chunk (when return_token_ids=true)
          title: Token Ids
      required:
        - index
        - delta
      title: ChatCompletionResponseStreamChoice
      type: object
    DeltaMessage:
      additionalProperties: false
      description: A message delta.
      properties:
        role:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: The role of the author of this message
          title: Role
        content:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: The contents of the chunk message
          title: Content
        reasoning_content:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: >-
            The reasoning or thinking process generated by the model. This field
            is only available for certain reasoning models (GLM 4.5, GLM 4.5
            Air, GPT OSS 120B, GPT OSS 20B) and contains the model's internal
            reasoning that would otherwise appear in `<think></think>` tags
            within the content field.
          title: Reasoning Content
        tool_calls:
          anyOf:
            - items:
                $ref: '#/components/schemas/ChatCompletionMessageToolCall'
              type: array
            - type: 'null'
          default: null
          title: Tool Calls
      title: DeltaMessage
      type: object
    AnthropicMessage:
      examples:
        - content:
            - citations: null
              text: Hi! How can I help you today?
              type: text
          id: msg_013Zva2CMHLNnXjNJJKqJ2EF
          model: claude-opus-4-6
          role: assistant
          stop_reason: end_turn
          stop_sequence: null
          type: message
      properties:
        id:
          description: |-
            Unique object identifier.

            The format and length of IDs may change over time.
          examples:
            - msg_013Zva2CMHLNnXjNJJKqJ2EF
          title: Id
          type: string
        type:
          const: message
          default: message
          description: |-
            Object type.

            For Messages, this is always `"message"`.
          title: Type
          type: string
        role:
          const: assistant
          default: assistant
          description: |-
            Conversational role of the generated message.

            This will always be `"assistant"`.
          title: Role
          type: string
        content:
          description: >-
            Content generated by the model.


            This is an array of content blocks, each of which has a `type` that
            determines its shape.


            Example:


            ```json

            [{"type": "text", "text": "Hi, I'm here to help."}]

            ```


            If the request input `messages` ended with an `assistant` turn, then
            the response `content` will continue directly from that last turn.
            You can use this to constrain the model's output.


            For example, if the input `messages` were:

            ```json

            [
              {"role": "user", "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"},
              {"role": "assistant", "content": "The best answer is ("}
            ]

            ```


            Then the response `content` might be:


            ```json

            [{"type": "text", "text": "B)"}]

            ```
          examples:
            - - citations: null
                text: Hi! How can I help you today?
                type: text
          items:
            $ref: '#/components/schemas/AnthropicContentBlock'
          title: Content
          type: array
        model:
          $ref: '#/components/schemas/AnthropicModel'
        stop_reason:
          anyOf:
            - $ref: '#/components/schemas/AnthropicStopReason'
            - type: 'null'
          description: >-
            The reason that the model stopped.


            This may be one the following values:

            * `"end_turn"`: the model reached a natural stopping point

            * `"max_tokens"`: the model exceeded the requested `max_tokens` or
            the model's maximum

            * `"stop_sequence"`: one of your provided custom `stop_sequences`
            was generated

            * `"tool_use"`: the model invoked one or more tools

            * `"pause_turn"`: the model paused a long-running turn. You may
            provide the response back as-is in a subsequent request to let the
            model continue.

            * `"refusal"`: when streaming classifiers intervene to handle
            potential policy violations


            In non-streaming mode this value is always non-null. In streaming
            mode, it is null in the `message_start` event and non-null
            otherwise.
          title: Stop Reason
        stop_sequence:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: >-
            Which custom stop sequence was generated, if any.


            This value will be a non-null string if one of your custom stop
            sequences was generated.
          title: Stop Sequence
        raw_output:
          anyOf:
            - $ref: '#/components/schemas/AnthropicRawOutput'
            - type: 'null'
          default: null
      required:
        - id
        - type
        - role
        - content
        - model
        - stop_reason
        - stop_sequence
      title: Message
      type: object
      x-stainless-python-custom-imports:
        - from .content_block import ContentBlock as ContentBlock
    AnthropicContentBlock:
      discriminator:
        mapping:
          redacted_thinking: '#/components/schemas/AnthropicResponseRedactedThinkingBlock'
          text: '#/components/schemas/AnthropicResponseTextBlock'
          thinking: '#/components/schemas/AnthropicResponseThinkingBlock'
          tool_use: '#/components/schemas/AnthropicResponseToolUseBlock'
        propertyName: type
      oneOf:
        - $ref: '#/components/schemas/AnthropicResponseTextBlock'
        - $ref: '#/components/schemas/AnthropicResponseThinkingBlock'
        - $ref: '#/components/schemas/AnthropicResponseRedactedThinkingBlock'
        - $ref: '#/components/schemas/AnthropicResponseToolUseBlock'
    AnthropicResponseTextBlock:
      properties:
        citations:
          anyOf:
            - items:
                discriminator:
                  mapping:
                    char_location: '#/components/schemas/AnthropicResponseCharLocationCitation'
                    content_block_location: >-
                      #/components/schemas/AnthropicResponseContentBlockLocationCitation
                    page_location: '#/components/schemas/AnthropicResponsePageLocationCitation'
                    search_result_location: >-
                      #/components/schemas/AnthropicResponseSearchResultLocationCitation
                    web_search_result_location: >-
                      #/components/schemas/AnthropicResponseWebSearchResultLocationCitation
                  propertyName: type
                oneOf:
                  - $ref: '#/components/schemas/AnthropicResponseCharLocationCitation'
                  - $ref: '#/components/schemas/AnthropicResponsePageLocationCitation'
                  - $ref: >-
                      #/components/schemas/AnthropicResponseContentBlockLocationCitation
                  - $ref: >-
                      #/components/schemas/AnthropicResponseWebSearchResultLocationCitation
                  - $ref: >-
                      #/components/schemas/AnthropicResponseSearchResultLocationCitation
              type: array
            - type: 'null'
          default: null
          description: >-
            Citations supporting the text block.


            The type of citation returned will depend on the type of document
            being cited. Citing a PDF results in `page_location`, plain text
            results in `char_location`, and content document results in
            `content_block_location`.
          title: Citations
        text:
          maxLength: 5000000
          minLength: 0
          title: Text
          type: string
        type:
          const: text
          default: text
          title: Type
          type: string
      required:
        - citations
        - text
        - type
      title: ResponseTextBlock
      type: object
    AnthropicResponseCharLocationCitation:
      properties:
        cited_text:
          title: Cited Text
          type: string
        document_index:
          minimum: 0
          title: Document Index
          type: integer
        document_title:
          anyOf:
            - type: string
            - type: 'null'
          title: Document Title
        end_char_index:
          title: End Char Index
          type: integer
        file_id:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          title: File Id
        start_char_index:
          minimum: 0
          title: Start Char Index
          type: integer
        type:
          const: char_location
          default: char_location
          title: Type
          type: string
      required:
        - cited_text
        - document_index
        - document_title
        - end_char_index
        - file_id
        - start_char_index
        - type
      title: ResponseCharLocationCitation
      type: object
    AnthropicResponsePageLocationCitation:
      properties:
        cited_text:
          title: Cited Text
          type: string
        document_index:
          minimum: 0
          title: Document Index
          type: integer
        document_title:
          anyOf:
            - type: string
            - type: 'null'
          title: Document Title
        end_page_number:
          title: End Page Number
          type: integer
        file_id:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          title: File Id
        start_page_number:
          minimum: 1
          title: Start Page Number
          type: integer
        type:
          const: page_location
          default: page_location
          title: Type
          type: string
      required:
        - cited_text
        - document_index
        - document_title
        - end_page_number
        - file_id
        - start_page_number
        - type
      title: ResponsePageLocationCitation
      type: object
    AnthropicResponseContentBlockLocationCitation:
      properties:
        cited_text:
          title: Cited Text
          type: string
        document_index:
          minimum: 0
          title: Document Index
          type: integer
        document_title:
          anyOf:
            - type: string
            - type: 'null'
          title: Document Title
        end_block_index:
          title: End Block Index
          type: integer
        file_id:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          title: File Id
        start_block_index:
          minimum: 0
          title: Start Block Index
          type: integer
        type:
          const: content_block_location
          default: content_block_location
          title: Type
          type: string
      required:
        - cited_text
        - document_index
        - document_title
        - end_block_index
        - file_id
        - start_block_index
        - type
      title: ResponseContentBlockLocationCitation
      type: object
    AnthropicResponseWebSearchResultLocationCitation:
      properties:
        cited_text:
          title: Cited Text
          type: string
        encrypted_index:
          title: Encrypted Index
          type: string
        title:
          anyOf:
            - maxLength: 512
              type: string
            - type: 'null'
          title: Title
        type:
          const: web_search_result_location
          default: web_search_result_location
          title: Type
          type: string
        url:
          title: Url
          type: string
      required:
        - cited_text
        - encrypted_index
        - title
        - type
        - url
      title: ResponseWebSearchResultLocationCitation
      type: object
    AnthropicResponseSearchResultLocationCitation:
      properties:
        cited_text:
          title: Cited Text
          type: string
        end_block_index:
          title: End Block Index
          type: integer
        search_result_index:
          minimum: 0
          title: Search Result Index
          type: integer
        source:
          title: Source
          type: string
        start_block_index:
          minimum: 0
          title: Start Block Index
          type: integer
        title:
          anyOf:
            - type: string
            - type: 'null'
          title: Title
        type:
          const: search_result_location
          default: search_result_location
          title: Type
          type: string
      required:
        - cited_text
        - end_block_index
        - search_result_index
        - source
        - start_block_index
        - title
        - type
      title: ResponseSearchResultLocationCitation
      type: object
    AnthropicResponseThinkingBlock:
      properties:
        signature:
          title: Signature
          type: string
        thinking:
          title: Thinking
          type: string
        type:
          const: thinking
          default: thinking
          title: Type
          type: string
      required:
        - signature
        - thinking
        - type
      title: ResponseThinkingBlock
      type: object
    AnthropicResponseRedactedThinkingBlock:
      properties:
        data:
          title: Data
          type: string
        type:
          const: redacted_thinking
          default: redacted_thinking
          title: Type
          type: string
      required:
        - data
        - type
      title: ResponseRedactedThinkingBlock
      type: object
    AnthropicResponseToolUseBlock:
      properties:
        id:
          pattern: ^[a-zA-Z0-9_-]+$
          title: Id
          type: string
        input:
          additionalProperties: true
          title: Input
          type: object
        name:
          minLength: 1
          title: Name
          type: string
        type:
          const: tool_use
          default: tool_use
          title: Type
          type: string
      required:
        - id
        - input
        - name
        - type
      title: ResponseToolUseBlock
      type: object
    AnthropicBase64PDFSource:
      additionalProperties: false
      properties:
        data:
          format: byte
          title: Data
          type: string
        media_type:
          const: application/pdf
          title: Media Type
          type: string
        type:
          const: base64
          title: Type
          type: string
      required:
        - data
        - media_type
        - type
      title: Base64PDFSource
      type: object
    AnthropicPlainTextSource:
      additionalProperties: false
      properties:
        data:
          title: Data
          type: string
        media_type:
          const: text/plain
          title: Media Type
          type: string
        type:
          const: text
          title: Type
          type: string
      required:
        - data
        - media_type
        - type
      title: PlainTextSource
      type: object
    AnthropicModel:
      type: string
      title: Model
      description: >-
        The model that will complete your prompt. See the [Fireworks Model
        Library](https://app.fireworks.ai/models) for available models.
    AnthropicStopReason:
      enum:
        - end_turn
        - max_tokens
        - stop_sequence
        - tool_use
        - pause_turn
        - refusal
      type: string
    AnthropicErrorResponse:
      properties:
        error:
          discriminator:
            mapping:
              api_error: '#/components/schemas/AnthropicAPIError'
              authentication_error: '#/components/schemas/AnthropicAuthenticationError'
              billing_error: '#/components/schemas/AnthropicBillingError'
              invalid_request_error: '#/components/schemas/AnthropicInvalidRequestError'
              not_found_error: '#/components/schemas/AnthropicNotFoundError'
              overloaded_error: '#/components/schemas/AnthropicOverloadedError'
              permission_error: '#/components/schemas/AnthropicPermissionError'
              rate_limit_error: '#/components/schemas/AnthropicRateLimitError'
              timeout_error: '#/components/schemas/AnthropicGatewayTimeoutError'
            propertyName: type
          oneOf:
            - $ref: '#/components/schemas/AnthropicInvalidRequestError'
            - $ref: '#/components/schemas/AnthropicAuthenticationError'
            - $ref: '#/components/schemas/AnthropicBillingError'
            - $ref: '#/components/schemas/AnthropicPermissionError'
            - $ref: '#/components/schemas/AnthropicNotFoundError'
            - $ref: '#/components/schemas/AnthropicRateLimitError'
            - $ref: '#/components/schemas/AnthropicGatewayTimeoutError'
            - $ref: '#/components/schemas/AnthropicAPIError'
            - $ref: '#/components/schemas/AnthropicOverloadedError'
          title: Error
        request_id:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          title: Request Id
        type:
          const: error
          default: error
          title: Type
          type: string
      required:
        - error
        - request_id
        - type
      title: ErrorResponse
      type: object
    AnthropicInvalidRequestError:
      properties:
        message:
          default: Invalid request
          title: Message
          type: string
        type:
          const: invalid_request_error
          default: invalid_request_error
          title: Type
          type: string
      required:
        - message
        - type
      title: InvalidRequestError
      type: object
    AnthropicAuthenticationError:
      properties:
        message:
          default: Authentication error
          title: Message
          type: string
        type:
          const: authentication_error
          default: authentication_error
          title: Type
          type: string
      required:
        - message
        - type
      title: AuthenticationError
      type: object
    AnthropicBillingError:
      properties:
        message:
          default: Billing error
          title: Message
          type: string
        type:
          const: billing_error
          default: billing_error
          title: Type
          type: string
      required:
        - message
        - type
      title: BillingError
      type: object
    AnthropicPermissionError:
      properties:
        message:
          default: Permission denied
          title: Message
          type: string
        type:
          const: permission_error
          default: permission_error
          title: Type
          type: string
      required:
        - message
        - type
      title: PermissionError
      type: object
    AnthropicNotFoundError:
      properties:
        message:
          default: Not found
          title: Message
          type: string
        type:
          const: not_found_error
          default: not_found_error
          title: Type
          type: string
      required:
        - message
        - type
      title: NotFoundError
      type: object
    AnthropicRateLimitError:
      properties:
        message:
          default: Rate limited
          title: Message
          type: string
        type:
          const: rate_limit_error
          default: rate_limit_error
          title: Type
          type: string
      required:
        - message
        - type
      title: RateLimitError
      type: object
    AnthropicGatewayTimeoutError:
      properties:
        message:
          default: Request timeout
          title: Message
          type: string
        type:
          const: timeout_error
          default: timeout_error
          title: Type
          type: string
      required:
        - message
        - type
      title: GatewayTimeoutError
      type: object
    AnthropicAPIError:
      properties:
        message:
          default: Internal server error
          title: Message
          type: string
        type:
          const: api_error
          default: api_error
          title: Type
          type: string
      required:
        - message
        - type
      title: APIError
      type: object
    AnthropicOverloadedError:
      properties:
        message:
          default: Overloaded
          title: Message
          type: string
        type:
          const: overloaded_error
          default: overloaded_error
          title: Type
          type: string
      required:
        - message
        - type
      title: OverloadedError
      type: object
    AnthropicCreateMessageParams:
      additionalProperties: false
      example:
        max_tokens: 1024
        messages:
          - content: Hello, world
            role: user
        model: claude-opus-4-6
      properties:
        model:
          $ref: '#/components/schemas/AnthropicModel'
        messages:
          description: >-
            Input messages.


            Models are trained to operate on alternating `user` and `assistant`
            conversational turns. When creating a new `Message`, you specify the
            prior conversational turns with the `messages` parameter, and the
            model then generates the next `Message` in the conversation.
            Consecutive `user` or `assistant` turns in your request will be
            combined into a single turn.


            Each input message must be an object with a `role` and `content`.
            You can specify a single `user`-role message, or you can include
            multiple `user` and `assistant` messages.


            If the final message uses the `assistant` role, the response content
            will continue immediately from the content in that message. This can
            be used to constrain part of the model's response.


            Example with a single `user` message:


            ```json

            [{"role": "user", "content": "Hello"}]

            ```


            Example with multiple conversational turns:


            ```json

            [
              {"role": "user", "content": "Hello there."},
              {"role": "assistant", "content": "Hi, I'm here to help. How can I help you?"},
              {"role": "user", "content": "Can you explain LLMs in plain English?"},
            ]

            ```


            Example with a partially-filled response from the model:


            ```json

            [
              {"role": "user", "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"},
              {"role": "assistant", "content": "The best answer is ("},
            ]

            ```


            Each input message `content` may be either a single `string` or an
            array of content blocks, where each block has a specific `type`.
            Using a `string` for `content` is shorthand for an array of one
            content block of type `"text"`. The following input messages are
            equivalent:


            ```json

            {"role": "user", "content": "Hello"}

            ```


            ```json

            {"role": "user", "content": [{"type": "text", "text": "Hello"}]}

            ```


            See [input
            examples](https://docs.claude.com/en/api/messages-examples).


            Note that if you want to include a [system
            prompt](/guides/querying-text-models), you can use the top-level
            `system` parameter — there is no `"system"` role for input messages
            in the Messages API.


            There is a limit of 100,000 messages in a single request.
          items:
            $ref: '#/components/schemas/AnthropicInputMessage'
          title: Messages
          type: array
        max_tokens:
          description: >-
            The maximum number of tokens to generate before stopping.


            Note that models may stop _before_ reaching this maximum. This
            parameter only specifies the absolute maximum number of tokens to
            generate.


            Different models have different maximum values for this parameter. 
            See [models](https://app.fireworks.ai/models) for details.
          examples:
            - 1024
          minimum: 1
          title: Max Tokens
          type: integer
        metadata:
          $ref: '#/components/schemas/AnthropicMetadata'
          description: An object describing metadata about the request.
        output_config:
          $ref: '#/components/schemas/AnthropicOutputConfig'
          description: >-
            Configuration options for the model's output, such as the output
            format.
        stop_sequences:
          description: >-
            Custom text sequences that will cause the model to stop generating.


            Models will normally stop when they have naturally completed their
            turn, which will result in a response `stop_reason` of `"end_turn"`.


            If you want the model to stop generating when it encounters custom
            strings of text, you can use the `stop_sequences` parameter. If the
            model encounters one of the custom sequences, the response
            `stop_reason` value will be `"stop_sequence"` and the response
            `stop_sequence` value will contain the matched stop sequence.
          items:
            type: string
          title: Stop Sequences
          type: array
        stream:
          description: >-
            Whether to incrementally stream the response using server-sent
            events.


            See [streaming](/guides/querying-text-models) for details.
          title: Stream
          type: boolean
        system:
          anyOf:
            - type: string
              x-stainless-skip:
                - go
                - cli
            - items:
                $ref: '#/components/schemas/AnthropicRequestTextBlock'
              type: array
          description: >-
            System prompt.


            A system prompt is a way of providing context and instructions to
            the model, such as specifying a particular goal or role. See the
            [guide to system prompts](/guides/querying-text-models).
          examples:
            - - text: Today's date is 2024-06-01.
                type: text
            - Today's date is 2023-01-01.
          title: System
        temperature:
          description: >-
            Amount of randomness injected into the response.


            Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature`
            closer to `0.0` for analytical / multiple choice, and closer to
            `1.0` for creative and generative tasks.


            Note that even with `temperature` of `0.0`, the results will not be
            fully deterministic.
          examples:
            - 1
          maximum: 1
          minimum: 0
          title: Temperature
          type: number
        thinking:
          $ref: '#/components/schemas/AnthropicThinkingConfigParam'
        tool_choice:
          $ref: '#/components/schemas/AnthropicToolChoice'
        tools:
          description: >-
            Definitions of tools that the model may use.


            If you include `tools` in your API request, the model may return
            `tool_use` content blocks that represent the model's use of those
            tools. You can then run those tools using the tool input generated
            by the model and then optionally return results back to the model
            using `tool_result` content blocks.


            Each tool definition includes:


            * `name`: Name of the tool.

            * `description`: Optional, but strongly-recommended description of
            the tool.

            * `input_schema`: [JSON
            schema](https://json-schema.org/draft/2020-12) for the tool `input`
            shape that the model will produce in `tool_use` output content
            blocks.


            For example, if you defined `tools` as:


            ```json

            [
              {
                "name": "get_stock_price",
                "description": "Get the current stock price for a given ticker symbol.",
                "input_schema": {
                  "type": "object",
                  "properties": {
                    "ticker": {
                      "type": "string",
                      "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
                    }
                  },
                  "required": ["ticker"]
                }
              }
            ]

            ```


            And then asked the model "What's the S&P 500 at today?", the model
            might produce `tool_use` content blocks in the response like this:


            ```json

            [
              {
                "type": "tool_use",
                "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
                "name": "get_stock_price",
                "input": { "ticker": "^GSPC" }
              }
            ]

            ```


            You might then run your `get_stock_price` tool with `{"ticker":
            "^GSPC"}` as an input, and return the following back to the model in
            a subsequent `user` message:


            ```json

            [
              {
                "type": "tool_result",
                "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
                "content": "259.75 USD"
              }
            ]

            ```


            Tools can be used for workflows that include running client-side
            tools and functions, or more generally whenever you want the model
            to produce a particular JSON structure of output.


            See the [guide](/guides/function-calling) for more details.
          examples:
            - description: Get the current weather in a given location
              input_schema:
                properties:
                  location:
                    description: The city and state, e.g. San Francisco, CA
                    type: string
                  unit:
                    description: Unit for the output - one of (celsius, fahrenheit)
                    type: string
                required:
                  - location
                type: object
              name: get_weather
          items:
            oneOf:
              - $ref: '#/components/schemas/AnthropicTool'
          title: Tools
          type: array
        top_k:
          description: >-
            Only sample from the top K options for each subsequent token.


            Used to remove "long tail" low probability responses. [Learn more
            technical details
            here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).


            Recommended for advanced use cases only. You usually only need to
            use `temperature`.
          examples:
            - 5
          minimum: 0
          title: Top K
          type: integer
        top_p:
          description: >-
            Use nucleus sampling.


            In nucleus sampling, we compute the cumulative distribution over all
            the options for each subsequent token in decreasing probability
            order and cut it off once it reaches a particular probability
            specified by `top_p`. You should either alter `temperature` or
            `top_p`, but not both.


            Recommended for advanced use cases only. You usually only need to
            use `temperature`.
          examples:
            - 0.7
          maximum: 1
          minimum: 0
          title: Top P
          type: number
        raw_output:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Raw Output
          description: Return raw output from the model.
          default: false
      required:
        - model
        - messages
      title: CreateMessageParams
      type: object
    AnthropicInputMessage:
      additionalProperties: false
      properties:
        content:
          anyOf:
            - type: string
              x-stainless-skip:
                - go
                - cli
            - items:
                $ref: '#/components/schemas/AnthropicInputContentBlock'
              type: array
              example:
                - type: text
                  text: What is a quaternion?
          title: Content
        role:
          enum:
            - user
            - assistant
          title: Role
          type: string
      required:
        - content
        - role
      title: InputMessage
      type: object
      discriminator:
        propertyName: role
    AnthropicInputContentBlock:
      discriminator:
        mapping:
          document: '#/components/schemas/AnthropicRequestDocumentBlock'
          image: '#/components/schemas/AnthropicRequestImageBlock'
          redacted_thinking: '#/components/schemas/AnthropicRequestRedactedThinkingBlock'
          text: '#/components/schemas/AnthropicRequestTextBlock'
          thinking: '#/components/schemas/AnthropicRequestThinkingBlock'
          tool_result: '#/components/schemas/AnthropicRequestToolResultBlock'
          tool_use: '#/components/schemas/AnthropicRequestToolUseBlock'
        propertyName: type
      oneOf:
        - $ref: '#/components/schemas/AnthropicRequestTextBlock'
          description: Regular text content.
        - $ref: '#/components/schemas/AnthropicRequestImageBlock'
          description: >-
            Image content specified directly as base64 data or as a reference
            via a URL.
        - $ref: '#/components/schemas/AnthropicRequestDocumentBlock'
          description: >-
            Document content, either specified directly as base64 data, as text,
            or as a reference via a URL.
        - $ref: '#/components/schemas/AnthropicRequestThinkingBlock'
          description: A block specifying internal thinking by the model.
        - $ref: '#/components/schemas/AnthropicRequestRedactedThinkingBlock'
          description: A block specifying internal, redacted thinking by the model.
        - $ref: '#/components/schemas/AnthropicRequestToolUseBlock'
          description: A block indicating a tool use by the model.
        - $ref: '#/components/schemas/AnthropicRequestToolResultBlock'
          description: A block specifying the results of a tool use by the model.
      x-stainless-python-extend-union:
        - ContentBlock
      x-stainless-python-extend-union-imports:
        - from .content_block import ContentBlock
      x-stainless-go-variant-constructor:
        naming: new_{variant}_block
    AnthropicRequestTextBlock:
      additionalProperties: false
      properties:
        cache_control:
          anyOf:
            - discriminator:
                mapping:
                  ephemeral: '#/components/schemas/AnthropicCacheControlEphemeral'
                propertyName: type
              oneOf:
                - $ref: '#/components/schemas/AnthropicCacheControlEphemeral'
            - type: 'null'
          description: Create a cache control breakpoint at this content block.
          title: Cache Control
        citations:
          anyOf:
            - items:
                discriminator:
                  mapping:
                    char_location: '#/components/schemas/AnthropicRequestCharLocationCitation'
                    content_block_location: >-
                      #/components/schemas/AnthropicRequestContentBlockLocationCitation
                    page_location: '#/components/schemas/AnthropicRequestPageLocationCitation'
                    search_result_location: >-
                      #/components/schemas/AnthropicRequestSearchResultLocationCitation
                    web_search_result_location: >-
                      #/components/schemas/AnthropicRequestWebSearchResultLocationCitation
                  propertyName: type
                oneOf:
                  - $ref: '#/components/schemas/AnthropicRequestCharLocationCitation'
                  - $ref: '#/components/schemas/AnthropicRequestPageLocationCitation'
                  - $ref: >-
                      #/components/schemas/AnthropicRequestContentBlockLocationCitation
                  - $ref: >-
                      #/components/schemas/AnthropicRequestWebSearchResultLocationCitation
                  - $ref: >-
                      #/components/schemas/AnthropicRequestSearchResultLocationCitation
              type: array
            - type: 'null'
          title: Citations
        text:
          minLength: 1
          title: Text
          type: string
        type:
          const: text
          title: Type
          type: string
      required:
        - text
        - type
      title: RequestTextBlock
      type: object
    AnthropicCacheControlEphemeral:
      additionalProperties: false
      properties:
        ttl:
          description: |-
            The time-to-live for the cache control breakpoint.

            This may be one the following values:
            - `5m`: 5 minutes
            - `1h`: 1 hour

            Defaults to `5m`.
          enum:
            - 5m
            - 1h
          title: Ttl
          type: string
          x-stainless-renameMap:
            ttl_5m: 5m
            ttl_1h: 1h
        type:
          const: ephemeral
          title: Type
          type: string
      required:
        - type
      title: CacheControlEphemeral
      type: object
      x-stainless-go-constant-constructor: true
    AnthropicRequestCharLocationCitation:
      additionalProperties: false
      properties:
        cited_text:
          title: Cited Text
          type: string
        document_index:
          minimum: 0
          title: Document Index
          type: integer
        document_title:
          anyOf:
            - maxLength: 255
              minLength: 1
              type: string
            - type: 'null'
          title: Document Title
        end_char_index:
          title: End Char Index
          type: integer
        start_char_index:
          minimum: 0
          title: Start Char Index
          type: integer
        type:
          const: char_location
          title: Type
          type: string
      required:
        - cited_text
        - document_index
        - document_title
        - end_char_index
        - start_char_index
        - type
      title: RequestCharLocationCitation
      type: object
    AnthropicRequestPageLocationCitation:
      additionalProperties: false
      properties:
        cited_text:
          title: Cited Text
          type: string
        document_index:
          minimum: 0
          title: Document Index
          type: integer
        document_title:
          anyOf:
            - maxLength: 255
              minLength: 1
              type: string
            - type: 'null'
          title: Document Title
        end_page_number:
          title: End Page Number
          type: integer
        start_page_number:
          minimum: 1
          title: Start Page Number
          type: integer
        type:
          const: page_location
          title: Type
          type: string
      required:
        - cited_text
        - document_index
        - document_title
        - end_page_number
        - start_page_number
        - type
      title: RequestPageLocationCitation
      type: object
    AnthropicRequestContentBlockLocationCitation:
      additionalProperties: false
      properties:
        cited_text:
          title: Cited Text
          type: string
        document_index:
          minimum: 0
          title: Document Index
          type: integer
        document_title:
          anyOf:
            - maxLength: 255
              minLength: 1
              type: string
            - type: 'null'
          title: Document Title
        end_block_index:
          title: End Block Index
          type: integer
        start_block_index:
          minimum: 0
          title: Start Block Index
          type: integer
        type:
          const: content_block_location
          title: Type
          type: string
      required:
        - cited_text
        - document_index
        - document_title
        - end_block_index
        - start_block_index
        - type
      title: RequestContentBlockLocationCitation
      type: object
    AnthropicRequestWebSearchResultLocationCitation:
      additionalProperties: false
      properties:
        cited_text:
          title: Cited Text
          type: string
        encrypted_index:
          title: Encrypted Index
          type: string
        title:
          anyOf:
            - maxLength: 512
              minLength: 1
              type: string
            - type: 'null'
          title: Title
        type:
          const: web_search_result_location
          title: Type
          type: string
        url:
          maxLength: 2048
          minLength: 1
          title: Url
          type: string
      required:
        - cited_text
        - encrypted_index
        - title
        - type
        - url
      title: RequestWebSearchResultLocationCitation
      type: object
    AnthropicRequestSearchResultLocationCitation:
      additionalProperties: false
      properties:
        cited_text:
          title: Cited Text
          type: string
        end_block_index:
          title: End Block Index
          type: integer
        search_result_index:
          minimum: 0
          title: Search Result Index
          type: integer
        source:
          title: Source
          type: string
        start_block_index:
          minimum: 0
          title: Start Block Index
          type: integer
        title:
          anyOf:
            - type: string
            - type: 'null'
          title: Title
        type:
          const: search_result_location
          title: Type
          type: string
      required:
        - cited_text
        - end_block_index
        - search_result_index
        - source
        - start_block_index
        - title
        - type
      title: RequestSearchResultLocationCitation
      type: object
    AnthropicRequestImageBlock:
      additionalProperties: false
      properties:
        cache_control:
          anyOf:
            - discriminator:
                mapping:
                  ephemeral: '#/components/schemas/AnthropicCacheControlEphemeral'
                propertyName: type
              oneOf:
                - $ref: '#/components/schemas/AnthropicCacheControlEphemeral'
            - type: 'null'
          description: Create a cache control breakpoint at this content block.
          title: Cache Control
        source:
          discriminator:
            mapping:
              base64: '#/components/schemas/AnthropicBase64ImageSource'
              url: '#/components/schemas/AnthropicURLImageSource'
            propertyName: type
          oneOf:
            - $ref: '#/components/schemas/AnthropicBase64ImageSource'
            - $ref: '#/components/schemas/AnthropicURLImageSource'
          title: Source
        type:
          const: image
          title: Type
          type: string
      required:
        - source
        - type
      title: RequestImageBlock
      type: object
    AnthropicBase64ImageSource:
      additionalProperties: false
      properties:
        data:
          format: byte
          title: Data
          type: string
        media_type:
          enum:
            - image/jpeg
            - image/png
            - image/gif
            - image/webp
          title: Media Type
          type: string
        type:
          const: base64
          title: Type
          type: string
      required:
        - data
        - media_type
        - type
      title: Base64ImageSource
      type: object
    AnthropicURLImageSource:
      additionalProperties: false
      properties:
        type:
          const: url
          title: Type
          type: string
        url:
          title: Url
          type: string
      required:
        - type
        - url
      title: URLImageSource
      type: object
    AnthropicRequestDocumentBlock:
      additionalProperties: false
      properties:
        cache_control:
          anyOf:
            - discriminator:
                mapping:
                  ephemeral: '#/components/schemas/AnthropicCacheControlEphemeral'
                propertyName: type
              oneOf:
                - $ref: '#/components/schemas/AnthropicCacheControlEphemeral'
            - type: 'null'
          description: Create a cache control breakpoint at this content block.
          title: Cache Control
        citations:
          anyOf:
            - $ref: '#/components/schemas/AnthropicRequestCitationsConfig'
            - type: 'null'
        context:
          anyOf:
            - minLength: 1
              type: string
            - type: 'null'
          title: Context
        source:
          discriminator:
            mapping:
              base64: '#/components/schemas/AnthropicBase64PDFSource'
              content: '#/components/schemas/AnthropicContentBlockSource'
              text: '#/components/schemas/AnthropicPlainTextSource'
              url: '#/components/schemas/AnthropicURLPDFSource'
            propertyName: type
          oneOf:
            - $ref: '#/components/schemas/AnthropicBase64PDFSource'
            - $ref: '#/components/schemas/AnthropicPlainTextSource'
            - $ref: '#/components/schemas/AnthropicContentBlockSource'
            - $ref: '#/components/schemas/AnthropicURLPDFSource'
          title: Source
        title:
          anyOf:
            - maxLength: 500
              minLength: 1
              type: string
            - type: 'null'
          title: Title
        type:
          const: document
          title: Type
          type: string
      required:
        - source
        - type
      title: RequestDocumentBlock
      type: object
    AnthropicRequestCitationsConfig:
      additionalProperties: false
      properties:
        enabled:
          title: Enabled
          type: boolean
      title: RequestCitationsConfig
      type: object
    AnthropicContentBlockSource:
      additionalProperties: false
      properties:
        content:
          anyOf:
            - type: string
            - items:
                discriminator:
                  mapping:
                    image: '#/components/schemas/AnthropicRequestImageBlock'
                    text: '#/components/schemas/AnthropicRequestTextBlock'
                  propertyName: type
                oneOf:
                  - $ref: '#/components/schemas/AnthropicRequestTextBlock'
                  - $ref: '#/components/schemas/AnthropicRequestImageBlock'
                x-stainless-naming:
                  go:
                    type_name: ContentBlockSourceContentItem
                title: content_block_source_content_item
              type: array
              title: content_block_source_content
          title: Content
        type:
          const: content
          title: Type
          type: string
      required:
        - content
        - type
      title: ContentBlockSource
      type: object
    AnthropicURLPDFSource:
      additionalProperties: false
      properties:
        type:
          const: url
          title: Type
          type: string
        url:
          title: Url
          type: string
      required:
        - type
        - url
      title: URLPDFSource
      type: object
    AnthropicRequestThinkingBlock:
      additionalProperties: false
      properties:
        signature:
          title: Signature
          type: string
        thinking:
          title: Thinking
          type: string
        type:
          const: thinking
          title: Type
          type: string
      required:
        - signature
        - thinking
        - type
      title: RequestThinkingBlock
      type: object
    AnthropicRequestRedactedThinkingBlock:
      additionalProperties: false
      properties:
        data:
          title: Data
          type: string
        type:
          const: redacted_thinking
          title: Type
          type: string
      required:
        - data
        - type
      title: RequestRedactedThinkingBlock
      type: object
    AnthropicRequestToolUseBlock:
      additionalProperties: false
      properties:
        cache_control:
          anyOf:
            - discriminator:
                mapping:
                  ephemeral: '#/components/schemas/AnthropicCacheControlEphemeral'
                propertyName: type
              oneOf:
                - $ref: '#/components/schemas/AnthropicCacheControlEphemeral'
            - type: 'null'
          description: Create a cache control breakpoint at this content block.
          title: Cache Control
        id:
          pattern: ^[a-zA-Z0-9_-]+$
          title: Id
          type: string
        input:
          additionalProperties: true
          title: Input
          type: object
        name:
          maxLength: 200
          minLength: 1
          title: Name
          type: string
        type:
          const: tool_use
          title: Type
          type: string
      required:
        - id
        - input
        - name
        - type
      title: RequestToolUseBlock
      type: object
    AnthropicRequestToolResultBlock:
      additionalProperties: false
      properties:
        cache_control:
          anyOf:
            - discriminator:
                mapping:
                  ephemeral: '#/components/schemas/AnthropicCacheControlEphemeral'
                propertyName: type
              oneOf:
                - $ref: '#/components/schemas/AnthropicCacheControlEphemeral'
            - type: 'null'
          description: Create a cache control breakpoint at this content block.
          title: Cache Control
        content:
          anyOf:
            - type: string
              x-stainless-skip:
                - go
                - cli
            - items:
                discriminator:
                  mapping:
                    document: '#/components/schemas/AnthropicRequestDocumentBlock'
                    image: '#/components/schemas/AnthropicRequestImageBlock'
                    text: '#/components/schemas/AnthropicRequestTextBlock'
                  propertyName: type
                oneOf:
                  - $ref: '#/components/schemas/AnthropicRequestTextBlock'
                  - $ref: '#/components/schemas/AnthropicRequestImageBlock'
                  - $ref: '#/components/schemas/AnthropicRequestDocumentBlock'
                title: Block
              type: array
              x-stainless-naming:
                python:
                  type_name: Content
                ruby:
                  type_name: Content
                php:
                  type_name: Content
          title: Content
        is_error:
          title: Is Error
          type: boolean
        tool_use_id:
          pattern: ^[a-zA-Z0-9_-]+$
          title: Tool Use Id
          type: string
        type:
          const: tool_result
          title: Type
          type: string
      required:
        - tool_use_id
        - type
      title: RequestToolResultBlock
      type: object
    AnthropicMetadata:
      additionalProperties: false
      properties:
        user_id:
          anyOf:
            - maxLength: 256
              type: string
            - type: 'null'
          description: >-
            An external identifier for the user who is associated with the
            request.


            This should be a uuid, hash value, or other opaque identifier. This
            id may be used to help detect abuse. Do not include any identifying
            information such as name, email address, or phone number.
          examples:
            - 13803d75-b4b5-4c3e-b2a2-6f21399b021b
          title: User Id
      title: Metadata
      type: object
    AnthropicOutputConfig:
      additionalProperties: false
      properties:
        effort:
          anyOf:
            - $ref: '#/components/schemas/AnthropicEffortLevel'
            - type: 'null'
          description: >-
            How much effort the model should put into its response. Higher
            effort levels may result in more thorough analysis but take longer.


            Valid values are `low`, `medium`, `high`, or `max`.


            **Fireworks behavior:** The `effort` level is converted to Fireworks
            [`reasoning_effort`](/api-reference/post-chatcompletions). Values
            `low`, `medium`, and `high` map directly. `max` is mapped to `high`
            since Fireworks does not have a `max` effort level.
        format:
          anyOf:
            - $ref: '#/components/schemas/AnthropicJsonOutputFormat'
            - type: 'null'
          description: >-
            A schema to specify the model's output format in responses. See
            [structured
            outputs](/structured-responses/structured-output-grammar-based)
      title: OutputConfig
      type: object
    AnthropicEffortLevel:
      description: All possible effort levels.
      enum:
        - low
        - medium
        - high
        - max
      title: EffortLevel
      type: string
    AnthropicJsonOutputFormat:
      additionalProperties: false
      properties:
        schema:
          additionalProperties: true
          description: The JSON schema of the format
          title: Schema
          type: object
        type:
          const: json_schema
          title: Type
          type: string
      required:
        - schema
        - type
      title: JsonOutputFormat
      type: object
    AnthropicThinkingConfigParam:
      description: >-
        Configuration for enabling the model's extended thinking.


        When enabled, responses include `thinking` content blocks showing the
        model's thinking process before the final answer. Requires a minimum
        budget of 1,024 tokens and counts towards your `max_tokens` limit.


        See [reasoning](/guides/reasoning) for details.


        **Note:** The `adaptive` thinking type is not supported yet.
      discriminator:
        mapping:
          adaptive: '#/components/schemas/AnthropicThinkingConfigAdaptive'
          disabled: '#/components/schemas/AnthropicThinkingConfigDisabled'
          enabled: '#/components/schemas/AnthropicThinkingConfigEnabled'
        propertyName: type
      oneOf:
        - $ref: '#/components/schemas/AnthropicThinkingConfigEnabled'
        - $ref: '#/components/schemas/AnthropicThinkingConfigDisabled'
        - $ref: '#/components/schemas/AnthropicThinkingConfigAdaptive'
      title: Thinking
    AnthropicThinkingConfigEnabled:
      additionalProperties: false
      properties:
        budget_tokens:
          description: >-
            Determines how many tokens the model can use for its internal
            reasoning process. Larger budgets can enable more thorough analysis
            for complex problems, improving response quality.


            Must be ≥1024 and less than `max_tokens`.


            See [reasoning](/guides/reasoning) for details.
          minimum: 1024
          title: Budget Tokens
          type: integer
        type:
          const: enabled
          title: Type
          type: string
      required:
        - budget_tokens
        - type
      title: ThinkingConfigEnabled
      type: object
    AnthropicThinkingConfigDisabled:
      additionalProperties: false
      properties:
        type:
          const: disabled
          title: Type
          type: string
      required:
        - type
      title: ThinkingConfigDisabled
      type: object
      x-stainless-go-constant-constructor: true
    AnthropicThinkingConfigAdaptive:
      additionalProperties: false
      properties:
        type:
          const: adaptive
          title: Type
          type: string
      required:
        - type
      title: ThinkingConfigAdaptive
      type: object
      description: '**Not supported yet.**'
    AnthropicToolChoice:
      description: >-
        How the model should use the provided tools. The model can use a
        specific tool, any available tool, decide by itself, or not use tools at
        all.
      discriminator:
        mapping:
          any: '#/components/schemas/AnthropicToolChoiceAny'
          auto: '#/components/schemas/AnthropicToolChoiceAuto'
          none: '#/components/schemas/AnthropicToolChoiceNone'
          tool: '#/components/schemas/AnthropicToolChoiceTool'
        propertyName: type
      oneOf:
        - $ref: '#/components/schemas/AnthropicToolChoiceAuto'
        - $ref: '#/components/schemas/AnthropicToolChoiceAny'
        - $ref: '#/components/schemas/AnthropicToolChoiceTool'
        - $ref: '#/components/schemas/AnthropicToolChoiceNone'
      title: Tool Choice
    AnthropicToolChoiceAuto:
      additionalProperties: false
      description: The model will automatically decide whether to use tools.
      properties:
        disable_parallel_tool_use:
          description: >-
            Whether to disable parallel tool use.


            Defaults to `false`. If set to `true`, the model will output at most
            one tool use.
          title: Disable Parallel Tool Use
          type: boolean
        type:
          const: auto
          title: Type
          type: string
      required:
        - type
      title: ToolChoiceAuto
      type: object
    AnthropicToolChoiceAny:
      additionalProperties: false
      description: The model will use any available tools.
      properties:
        disable_parallel_tool_use:
          description: >-
            Whether to disable parallel tool use.


            Defaults to `false`. If set to `true`, the model will output exactly
            one tool use.
          title: Disable Parallel Tool Use
          type: boolean
        type:
          const: any
          title: Type
          type: string
      required:
        - type
      title: ToolChoiceAny
      type: object
    AnthropicToolChoiceTool:
      additionalProperties: false
      description: The model will use the specified tool with `tool_choice.name`.
      properties:
        disable_parallel_tool_use:
          description: >-
            Whether to disable parallel tool use.


            Defaults to `false`. If set to `true`, the model will output exactly
            one tool use.
          title: Disable Parallel Tool Use
          type: boolean
        name:
          description: The name of the tool to use.
          title: Name
          type: string
        type:
          const: tool
          title: Type
          type: string
      required:
        - name
        - type
      title: ToolChoiceTool
      type: object
    AnthropicToolChoiceNone:
      additionalProperties: false
      description: The model will not be allowed to use tools.
      properties:
        type:
          const: none
          title: Type
          type: string
      required:
        - type
      title: ToolChoiceNone
      type: object
      x-stainless-go-constant-constructor: true
    AnthropicTool:
      additionalProperties: false
      properties:
        type:
          anyOf:
            - type: 'null'
            - const: custom
              type: string
          title: Type
        description:
          description: >-
            Description of what this tool does.


            Tool descriptions should be as detailed as possible. The more
            information that the model has about what the tool is and how to use
            it, the better it will perform. You can use natural language
            descriptions to reinforce important aspects of the tool input JSON
            schema.
          examples:
            - Get the current weather in a given location
          title: Description
          type: string
        name:
          description: >-
            Name of the tool.


            This is how the tool will be called by the model and in `tool_use`
            blocks.
          maxLength: 128
          minLength: 1
          pattern: ^[a-zA-Z0-9_-]{1,128}$
          title: Name
          type: string
        input_schema:
          $ref: '#/components/schemas/AnthropicInputSchema'
          description: >-
            [JSON schema](https://json-schema.org/draft/2020-12) for this tool's
            input.


            This defines the shape of the `input` that your tool accepts and
            that the model will produce.
          examples:
            - properties:
                location:
                  description: The city and state, e.g. San Francisco, CA
                  type: string
                unit:
                  description: Unit for the output - one of (celsius, fahrenheit)
                  type: string
              required:
                - location
              type: object
          x-stainless-skip:
            - cli
        strict:
          description: When true, guarantees schema validation on tool names and inputs
          title: Strict
          type: boolean
      required:
        - name
        - input_schema
      title: Tool
      type: object
    AnthropicInputSchema:
      additionalProperties: true
      properties:
        properties:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Properties
        required:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          title: Required
        type:
          const: object
          title: Type
          type: string
      required:
        - type
      title: InputSchema
      type: object
    AnthropicRawOutput:
      additionalProperties: false
      description: >-
        Fireworks extension that returns low-level details of what the model
        sees, including the formatted prompt and function calls.
      properties:
        prompt_fragments:
          description: >-
            Pieces of the prompt (like individual messages) before truncation
            and concatenation. Depending on prompt_truncate_len some of the
            messages might be dropped. Contains a mix of strings to be tokenized
            and individual tokens (if dictated by the conversation template)
          items:
            anyOf:
              - type: string
              - type: integer
          title: Prompt Fragments
          type: array
        prompt_token_ids:
          description: Fully processed prompt as seen by the model
          items:
            type: integer
          title: Prompt Token Ids
          type: array
        completion:
          description: >-
            Raw completion produced by the model before any tool calls are
            parsed
          title: Completion
          type: string
        completion_token_ids:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          default: null
          description: Token IDs for the raw completion
          title: Completion Token Ids
        images:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          default: null
          description: Images in the prompt
          title: Images
        grammar:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: >-
            Grammar used for constrained decoding, can be either user provided
            (directly or JSON schema) or inferred by the chat template
          title: Grammar
      required:
        - prompt_fragments
        - prompt_token_ids
        - completion
      title: RawOutput
      type: object
    AnthropicMessageStartEvent:
      type: object
      properties:
        type:
          const: message_start
          type: string
        message:
          $ref: '#/components/schemas/AnthropicMessage'
      required:
        - type
        - message
    AnthropicContentBlockStartEvent:
      type: object
      properties:
        type:
          const: content_block_start
          type: string
        index:
          type: integer
        content_block:
          $ref: '#/components/schemas/AnthropicContentBlock'
      required:
        - type
        - index
        - content_block
    AnthropicContentBlockDeltaEvent:
      type: object
      properties:
        type:
          const: content_block_delta
          type: string
        index:
          type: integer
        delta:
          type: object
          properties:
            type:
              type: string
              enum:
                - text_delta
                - thinking_delta
                - signature_delta
                - input_json_delta
                - citations_delta
            text:
              type: string
            thinking:
              type: string
            signature:
              type: string
            partial_json:
              type: string
          required:
            - type
      required:
        - type
        - index
        - delta
    AnthropicContentBlockStopEvent:
      type: object
      properties:
        type:
          const: content_block_stop
          type: string
        index:
          type: integer
      required:
        - type
        - index
    AnthropicMessageDeltaEvent:
      type: object
      properties:
        type:
          const: message_delta
          type: string
        delta:
          type: object
          properties:
            stop_reason:
              $ref: '#/components/schemas/AnthropicStopReason'
            stop_sequence:
              anyOf:
                - type: string
                - type: 'null'
        usage:
          type: object
          properties:
            output_tokens:
              type: integer
          required:
            - output_tokens
      required:
        - type
        - delta
        - usage
    AnthropicMessageStopEvent:
      type: object
      properties:
        type:
          const: message_stop
          type: string
      required:
        - type
    AnthropicPingEvent:
      type: object
      properties:
        type:
          const: ping
          type: string
      required:
        - type
    AnthropicMessageStreamEvent:
      discriminator:
        propertyName: type
        mapping:
          message_start: '#/components/schemas/AnthropicMessageStartEvent'
          content_block_start: '#/components/schemas/AnthropicContentBlockStartEvent'
          content_block_delta: '#/components/schemas/AnthropicContentBlockDeltaEvent'
          content_block_stop: '#/components/schemas/AnthropicContentBlockStopEvent'
          message_delta: '#/components/schemas/AnthropicMessageDeltaEvent'
          message_stop: '#/components/schemas/AnthropicMessageStopEvent'
          ping: '#/components/schemas/AnthropicPingEvent'
      oneOf:
        - $ref: '#/components/schemas/AnthropicMessageStartEvent'
        - $ref: '#/components/schemas/AnthropicContentBlockStartEvent'
        - $ref: '#/components/schemas/AnthropicContentBlockDeltaEvent'
        - $ref: '#/components/schemas/AnthropicContentBlockStopEvent'
        - $ref: '#/components/schemas/AnthropicMessageDeltaEvent'
        - $ref: '#/components/schemas/AnthropicMessageStopEvent'
        - $ref: '#/components/schemas/AnthropicPingEvent'
      description: A Server-Sent Event from the Messages API stream.
x-tagGroups:
  - name: Gateway REST API
    tags:
      - gateway.openapi_Gateway
      - gateway-extra.openapi_Gateway
  - name: Fireworks Responses API
    tags:
      - responses.openapi_other
  - name: Fireworks Text Completion API
    tags:
      - text-completion.openapi_other
  - name: Fireworks AI Anthropic Compatible Messages API
    tags:
      - anthropic-messages.openapi_other