asyncapi: '2.6.0'
id: 'urn:ai:requesty:router:v1:chat-completions:sse'
info:
  title: Requesty Chat Completions Streaming (HTTP + SSE)
  version: '1.0.0'
  description: |
    AsyncAPI 2.6 description of Requesty's **chat completion streaming** surface.

    Requesty does not publish a WebSocket API. The only asynchronous /
    event-style transport documented at https://docs.requesty.ai is **HTTP
    Server-Sent Events (SSE)** delivered over the same OpenAI-compatible REST
    endpoint (`POST /chat/completions`) when the request body sets
    `stream: true`. SSE is a one-way, server-to-client HTTP streaming channel;
    it is **not** WebSocket.

    Tokens are sent as data-only server-sent events as they become available,
    with the stream terminated by a `data: [DONE]` message, matching the
    OpenAI chat completion streaming convention that Requesty is compatible
    with.

    This AsyncAPI document models only the streamed events emitted by
    Requesty's SSE response. The request body itself (model, messages, tools,
    etc.) is modeled in the companion OpenAPI document at
    `openapi/requesty-openapi.yml`.
  contact:
    name: API Evangelist
    email: kin@apievangelist.com
    url: https://apievangelist.com
  license:
    name: API documentation - Requesty Terms of Service
    url: https://www.requesty.ai/terms
  x-transport-notes:
    transport: HTTP Server-Sent Events (SSE)
    protocol: https
    direction: server-to-client (one-way)
    mediaType: text/event-stream
    triggeredBy: 'POST https://router.requesty.ai/v1/chat/completions with request body { "stream": true }'
    terminator: 'data: [DONE]'
    notWebSocket: true
    source: https://docs.requesty.ai/api-reference/endpoint/chat-completions-create
defaultContentType: text/event-stream
servers:
  requesty:
    url: router.requesty.ai/v1
    protocol: https
    description: |
      Requesty's OpenAI-compatible router base. Chat completion streaming is
      delivered as HTTP Server-Sent Events over this base when `stream: true`
      is set on the JSON request body. AsyncAPI 2.6 does not define a dedicated
      SSE protocol identifier; `https` is used here and the SSE transport is
      documented in `info.x-transport-notes` and on each channel.
    security:
      - bearerAuth: []
  requestyEu:
    url: router.eu.requesty.ai/v1
    protocol: https
    description: EU data residency router; same SSE behavior as the global router.
    security:
      - bearerAuth: []
channels:
  /chat/completions:
    description: |
      Chat completion SSE stream. The client opens this channel by issuing
      `POST /chat/completions` with `Content-Type: application/json` and a JSON
      body containing `stream: true`. The server responds with
      `Content-Type: text/event-stream` and emits a sequence of `data:` lines,
      each carrying one JSON-serialized `chat.completion.chunk` object,
      followed by a final `data: [DONE]` line.
    bindings:
      http:
        type: request
        method: POST
        bindingVersion: '0.3.0'
      x-sse:
        mediaType: text/event-stream
        eventField: 'data'
        terminator: '[DONE]'
    subscribe:
      operationId: streamChatCompletionChunks
      summary: Subscribe to streamed chat completion chunks (SSE).
      description: |
        After `POST /chat/completions` is issued with `stream: true`, the
        server emits an ordered sequence of SSE `data:` events. Each `data:`
        line either carries a JSON-serialized `ChatCompletionChunk` or the
        literal sentinel `[DONE]` marking end of stream.
      bindings:
        http:
          type: response
          bindingVersion: '0.3.0'
      message:
        oneOf:
          - $ref: '#/components/messages/ChatCompletionChunk'
          - $ref: '#/components/messages/StreamDone'
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: 'Requesty API key'
      description: |
        Standard Requesty bearer token. Set the
        `Authorization: Bearer <REQUESTY_API_KEY>` header on the
        `POST /chat/completions` request that opens the SSE stream.
  messages:
    ChatCompletionChunk:
      name: ChatCompletionChunk
      title: Streamed chat completion chunk
      summary: |
        A single SSE `data:` event carrying one JSON `chat.completion.chunk`
        object. Many of these are emitted per request, in order.
      contentType: application/json
      description: |
        Sent as `data: {json}\n\n` on the SSE stream. The JSON object's
        `object` field is always the literal string `chat.completion.chunk`.
      payload:
        $ref: '#/components/schemas/ChatCompletionChunk'
      examples:
        - name: openingChunk
          summary: First chunk - establishes role
          payload:
            id: chatcmpl-abc123
            object: chat.completion.chunk
            created: 1781913600
            model: openai/gpt-4o-mini
            choices:
              - index: 0
                delta:
                  role: assistant
                  content: ''
                finish_reason: null
        - name: contentChunk
          summary: Token delta
          payload:
            id: chatcmpl-abc123
            object: chat.completion.chunk
            created: 1781913600
            model: openai/gpt-4o-mini
            choices:
              - index: 0
                delta:
                  content: 'Hello'
                finish_reason: null
        - name: finalChunk
          summary: Final chunk - finish_reason set
          payload:
            id: chatcmpl-abc123
            object: chat.completion.chunk
            created: 1781913600
            model: openai/gpt-4o-mini
            choices:
              - index: 0
                delta: {}
                finish_reason: stop
    StreamDone:
      name: StreamDone
      title: Stream terminator
      summary: |
        The literal SSE event `data: [DONE]` that marks end of stream. Not
        JSON; the payload is the string `[DONE]`.
      contentType: text/plain
      description: |
        Clients must stop reading the stream when this sentinel is observed.
      payload:
        $ref: '#/components/schemas/StreamDoneSentinel'
      examples:
        - name: done
          summary: End-of-stream sentinel
          payload: '[DONE]'
  schemas:
    StreamDoneSentinel:
      type: string
      enum:
        - '[DONE]'
      description: |
        End-of-stream sentinel. The full SSE line is `data: [DONE]`. The
        payload value modeled here is the string literal `[DONE]`.
    ChatCompletionChunk:
      type: object
      description: Represents a streamed chunk of an OpenAI-compatible chat completion response routed by Requesty.
      required:
        - choices
        - created
        - id
        - model
        - object
      properties:
        id:
          type: string
          description: A unique identifier for the chat completion. Each chunk has the same ID.
        choices:
          type: array
          description: A list of chat completion choices.
          items:
            $ref: '#/components/schemas/ChatCompletionChunkChoice'
        created:
          type: integer
          description: Unix timestamp (seconds) of when the chat completion was created.
        model:
          type: string
          description: The routed model used to generate the completion.
        object:
          type: string
          enum:
            - chat.completion.chunk
          description: The object type, which is always `chat.completion.chunk`.
        usage:
          $ref: '#/components/schemas/Usage'
    ChatCompletionChunkChoice:
      type: object
      required:
        - delta
        - index
      properties:
        index:
          type: integer
          description: The index of the choice in the list of choices.
        delta:
          $ref: '#/components/schemas/ChatCompletionStreamResponseDelta'
        finish_reason:
          type: string
          nullable: true
          enum:
            - stop
            - length
            - tool_calls
            - content_filter
          description: |
            Reason the model stopped generating tokens. Null on all chunks
            except the final content chunk.
    ChatCompletionStreamResponseDelta:
      type: object
      description: A chat completion delta generated by streamed model responses.
      properties:
        role:
          type: string
          enum:
            - system
            - user
            - assistant
            - tool
          description: Role of the author of this message. Typically only emitted on the first chunk of a choice.
        content:
          type: string
          nullable: true
          description: The contents of the chunk message (token slice).
        tool_calls:
          type: array
          description: Streaming tool-call fragments.
          items:
            type: object
            properties:
              index:
                type: integer
              id:
                type: string
              type:
                type: string
                enum:
                  - function
              function:
                type: object
                properties:
                  name:
                    type: string
                  arguments:
                    type: string
    Usage:
      type: object
      nullable: true
      description: |
        Usage statistics for the request, including Requesty's USD cost. Sent
        on the final chunk; null on intermediate chunks.
      properties:
        prompt_tokens:
          type: integer
        completion_tokens:
          type: integer
        total_tokens:
          type: integer
        cost:
          type: number
          description: Requesty USD cost for the request.