asyncapi: '2.6.0'
id: 'urn:com:inkeep:v1:chat-completions:sse'
info:
  title: Inkeep Chat Completions Streaming (HTTP + SSE)
  version: '1.0.0'
  description: |
    AsyncAPI 2.6 description of Inkeep's **chat completion streaming** surface.

    Inkeep does not publish a WebSocket API. The only asynchronous /
    event-style transport documented at
    https://docs.inkeep.com/cloud/ai-api/chat-completions-api and the RAG mode
    reference is **HTTP Server-Sent Events (SSE)** delivered over the same REST
    endpoint (`POST /chat/completions`) when the request body sets
    `stream: true`. SSE is a one-way, server-to-client HTTP streaming channel;
    it is **not** WebSocket.

    Inkeep's chat completions are OpenAI-compatible, so the streamed events are
    `chat.completion.chunk` objects emitted as `data:` lines and terminated by
    a final `data: [DONE]` sentinel.

    This AsyncAPI document models only the streamed events. The request body
    (model/mode, messages, response_format, etc.) is modeled in the companion
    OpenAPI document at `openapi/inkeep-openapi.yml`. The Analytics API
    (conversations, feedback, events) is synchronous REST and is not modeled
    here.
  contact:
    name: API Evangelist
    email: kin@apievangelist.com
    url: https://apievangelist.com
  license:
    name: API documentation - Inkeep Terms
    url: https://inkeep.com/terms
  x-transport-notes:
    transport: HTTP Server-Sent Events (SSE)
    protocol: https
    direction: server-to-client (one-way)
    mediaType: text/event-stream
    triggeredBy: 'POST https://api.inkeep.com/v1/chat/completions with request body { "stream": true }'
    terminator: 'data: [DONE]'
    notWebSocket: true
    source: https://docs.inkeep.com/cloud/ai-api/chat-completions-api
defaultContentType: text/event-stream
servers:
  inkeep:
    url: api.inkeep.com/v1
    protocol: https
    description: |
      Inkeep's OpenAI-compatible REST base. Chat completion streaming is
      delivered as HTTP Server-Sent Events over this base when `stream: true`
      is set on the JSON request body. AsyncAPI 2.6 does not define a dedicated
      SSE protocol identifier; `https` is used here and the SSE transport is
      documented in `info.x-transport-notes` and on each channel.
    security:
      - bearerAuth: []
channels:
  /chat/completions:
    description: |
      Chat completion SSE stream. The client opens this channel by issuing
      `POST /chat/completions` with `Content-Type: application/json` and a JSON
      body containing `stream: true`. The server responds with
      `Content-Type: text/event-stream` and emits a sequence of `data:` lines,
      each carrying one JSON-serialized `chat.completion.chunk` object,
      followed by a final `data: [DONE]` line.
    bindings:
      http:
        type: request
        method: POST
        bindingVersion: '0.3.0'
      x-sse:
        mediaType: text/event-stream
        eventField: 'data'
        terminator: '[DONE]'
    subscribe:
      operationId: streamChatCompletionChunks
      summary: Subscribe to streamed chat completion chunks (SSE).
      description: |
        After `POST /chat/completions` is issued with `stream: true`, the
        server emits an ordered sequence of SSE `data:` events. Each `data:`
        line either carries a JSON-serialized `ChatCompletionChunk` or the
        literal sentinel `[DONE]` marking end of stream.
      bindings:
        http:
          type: response
          bindingVersion: '0.3.0'
      message:
        oneOf:
          - $ref: '#/components/messages/ChatCompletionChunk'
          - $ref: '#/components/messages/StreamDone'
components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: 'Inkeep API key'
      description: |
        Standard Inkeep bearer token. Set the
        `Authorization: Bearer <INKEEP_API_KEY>` header on the
        `POST /chat/completions` request that opens the SSE stream.
  messages:
    ChatCompletionChunk:
      name: ChatCompletionChunk
      title: Streamed chat completion chunk
      summary: |
        A single SSE `data:` event carrying one JSON `chat.completion.chunk`
        object. Many of these are emitted per request, in order.
      contentType: application/json
      description: |
        Sent as `data: {json}\n\n` on the SSE stream. The JSON object's
        `object` field is always the literal string `chat.completion.chunk`.
      payload:
        $ref: '#/components/schemas/ChatCompletionChunk'
      examples:
        - name: openingChunk
          summary: First chunk - establishes role
          payload:
            id: chatcmpl-abc123
            object: chat.completion.chunk
            created: 1781913600
            model: inkeep-qa-expert
            choices:
              - index: 0
                delta:
                  role: assistant
                  content: ''
                finish_reason: null
        - name: contentChunk
          summary: Token delta
          payload:
            id: chatcmpl-abc123
            object: chat.completion.chunk
            created: 1781913600
            model: inkeep-qa-expert
            choices:
              - index: 0
                delta:
                  content: 'Hello'
                finish_reason: null
        - name: finalChunk
          summary: Final chunk - finish_reason set
          payload:
            id: chatcmpl-abc123
            object: chat.completion.chunk
            created: 1781913600
            model: inkeep-qa-expert
            choices:
              - index: 0
                delta: {}
                finish_reason: stop
    StreamDone:
      name: StreamDone
      title: Stream terminator
      summary: |
        The literal SSE event `data: [DONE]` that marks end of stream. Not
        JSON; the payload is the string `[DONE]`.
      contentType: text/plain
      description: |
        Clients must stop reading the stream when this sentinel is observed.
      payload:
        $ref: '#/components/schemas/StreamDoneSentinel'
      examples:
        - name: done
          summary: End-of-stream sentinel
          payload: '[DONE]'
  schemas:
    StreamDoneSentinel:
      type: string
      enum:
        - '[DONE]'
      description: |
        End-of-stream sentinel. The full SSE line is `data: [DONE]`. The
        payload value modeled here is the string literal `[DONE]`.
    ChatCompletionChunk:
      type: object
      description: |
        Represents a streamed chunk of an OpenAI-compatible chat completion
        response.
      required:
        - choices
        - created
        - id
        - model
        - object
      properties:
        id:
          type: string
          description: A unique identifier for the chat completion. Each chunk has the same ID.
        choices:
          type: array
          description: A list of chat completion choices.
          items:
            $ref: '#/components/schemas/ChatCompletionChunkChoice'
        created:
          type: integer
          description: Unix timestamp (seconds) of when the chat completion was created.
        model:
          type: string
          description: The Inkeep model / mode used to generate the completion.
        object:
          type: string
          enum:
            - chat.completion.chunk
          description: The object type, which is always `chat.completion.chunk`.
    ChatCompletionChunkChoice:
      type: object
      required:
        - delta
        - finish_reason
        - index
      properties:
        index:
          type: integer
          description: The index of the choice in the list of choices.
        delta:
          $ref: '#/components/schemas/ChatCompletionStreamResponseDelta'
        finish_reason:
          type: string
          nullable: true
          enum:
            - stop
            - length
            - tool_calls
          description: |
            Reason the model stopped generating tokens. Null on all chunks
            except the final chunk.
    ChatCompletionStreamResponseDelta:
      type: object
      description: A chat completion delta generated by streamed model responses.
      properties:
        role:
          type: string
          enum:
            - system
            - user
            - assistant
            - tool
          description: |
            Role of the author of this message. Typically only emitted on the
            first chunk of a choice.
        content:
          type: string
          nullable: true
          description: The contents of the chunk message (token slice).