openapi: 3.0.1
info:
  title: AI Gateway API
  version: '1.0'
servers:
  - url: https://api.us-east-1.langdb.ai
    description: LangDB API Server
paths:
  /v1/chat/completions:
    post:
      operationId: createChatCompletion
      tags:
        - Completions
      summary: Create chat completion
      parameters:
        - $ref: '#/components/parameters/XProjectId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateChatCompletionRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CreateChatCompletionResponse'
      x-oaiMeta:
        name: Create chat completion
        group: chat
        returns: |
          Returns a [chat completion](/docs/api-reference/chat/object) object, or a streamed sequence of [chat completion chunk](/docs/api-reference/chat/streaming) objects if the request is streamed.
        path: create
        examples:
          - title: Default
            request:
              curl: |
                curl https://api.staging.langdb.ai/v1/chat/completions \
                  -H "Content-Type: application/json" \
                  -H "Authorization: Bearer $LANGDB_API_KEY" \
                  -d '{
                    "model": "VAR_model_id",
                    "messages": [
                      {
                        "role": "system",
                        "content": "You are a helpful assistant."
                      },
                      {
                        "role": "user",
                        "content": "Hello!"
                      }
                    ]
                  }'
              python: |
                from openai import OpenAI
                client = OpenAI()

                completion = client.chat.completions.create(
                  model="VAR_model_id",
                  messages=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": "Hello!"}
                  ]
                )

                print(completion.choices[0].message)
              node.js: |-
                import OpenAI from "openai";

                const openai = new OpenAI();

                async function main() {
                  const completion = await openai.chat.completions.create({
                    messages: [{ role: "system", content: "You are a helpful assistant." }],
                    model: "VAR_model_id",
                  });

                  console.log(completion.choices[0]);
                }

                main();
            response: |
              {
                "id": "chatcmpl-123",
                "object": "chat.completion",
                "created": 1677652288,
                "model": "gpt-4o-mini",
                "system_fingerprint": "fp_44709d6fcb",
                "choices": [{
                  "index": 0,
                  "message": {
                    "role": "assistant",
                    "content": "\n\nHello there, how may I assist you today?",
                  },
                  "logprobs": null,
                  "finish_reason": "stop"
                }],
                "usage": {
                  "prompt_tokens": 9,
                  "completion_tokens": 12,
                  "total_tokens": 21,
                  "completion_tokens_details": {
                    "reasoning_tokens": 0,
                    "accepted_prediction_tokens": 0,
                    "rejected_prediction_tokens": 0
                  }
                }
              }
      security:
        - BearerAuth: []

  /v1/embeddings:
    post:
      operationId: generateEmbeddings
      tags:
        - Completions
      summary: Create embeddings
      description: |
        Creates an embedding vector representing the input text or token arrays.

      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/EmbeddingsRequest"
            examples:
              singleInputFloat:
                summary: A single text input, returning float array
                value:
                  input: "The food was delicious and the waiter was kind."
                  model: "text-embedding-ada-002"
                  encoding_format: "float"
                  dimensions: 1536
              multipleInputsBase64:
                summary: Multiple text strings, returning base64-encoded vectors
                value:
                  input:
                    - "First text to embed"
                    - "Second text to embed"
                  model: "text-embedding-3-small"
                  encoding_format: "base64"
      responses:
        '200':
          description: Successful response with embeddings
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/EmbeddingsResponse"
      security:
        - BearerAuth: []


components:
  parameters:
    XProjectId:
      name: X-Project-Id
      in: header
      description: "LangDB project ID"
      required: true
      schema:
        type: string
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
    ProjectIdAuth:
      type: apiKey
      in: header
      name: X-Project-Id
  schemas:
    CreateChatCompletionRequest:
      type: object
      required:
        - model
        - messages
      properties:
        model:
          type: string
          description: ID of the model to use. This can be either a specific model ID or a virtual model identifier.
          example: gpt-4o  # You can keep a simple example here or remove it if you'd like
        messages:
          type: array
          description: A list of messages in the conversation.
          items:
            type: object
            properties:
              role:
                type: string
                enum:
                  - system
                  - user
                  - assistant
                  - tool
              name:
                type: string
                description: Optional name for the participant (e.g., function name).
              content:
                oneOf:
                  - type: string
                    description: Text content.
                  - type: array
                    description: Array of content parts for multimodal inputs.
                    items:
                      type: object
                      required:
                        - type
                      properties:
                        type:
                          type: string
                          description: The type of the content part (e.g., text or image_url).
                          enum:
                            - text
                            - image_url
                        text:
                          type: string
                          description: The text content, required when type=text.
                        image_url:
                          type: object
                          description: Image URL payload, required when type=image_url.
                          properties:
                            url:
                              type: string
                            detail:
                              type: string
                              enum:
                                - low
                                - high
                                - auto
              tool_call_id:
                type: string
                description: Tool call ID this message is responding to (for role=tool).
              tool_calls:
                type: array
                description: Tool calls requested by the assistant.
                items:
                  type: object
                  properties:
                    id:
                      type: string
                    type:
                      type: string
                      enum:
                        - function
                    function:
                      type: object
                      properties:
                        name:
                          type: string
                        arguments:
                          type: string
              function_call:
                description: Deprecated. For legacy function calling responses.
                oneOf:
                  - type: string
                  - type: object
                    properties:
                      name:
                        type: string
        temperature:
          type: number
          minimum: 0
          maximum: 2
          description: Sampling temperature.
          example: 0.8
        top_p:
          type: number
          minimum: 0
          maximum: 1
          description: Nucleus sampling probability.
        max_tokens:
          type: integer
          minimum: 1
          description: The maximum number of tokens that can be generated in the chat completion.
        n:
          type: integer
          minimum: 1
          default: 1
          description: How many chat completion choices to generate for each input message.
        stop:
          description: Up to 4 sequences where the API will stop generating further tokens.
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        presence_penalty:
          type: number
          minimum: -2
          maximum: 2
          description: Penalize new tokens based on whether they appear in the text so far.
        frequency_penalty:
          type: number
          minimum: -2
          maximum: 2
          description: Penalize new tokens based on their existing frequency in the text so far.
        logit_bias:
          type: object
          description: Modify the likelihood of specified tokens appearing in the completion.
          additionalProperties:
            type: number
        logprobs:
          type: boolean
          description: Whether to return log probabilities of the output tokens.
        top_logprobs:
          type: integer
          minimum: 1
          maximum: 20
          description: The number of most likely tokens to return at each position, for which the log probabilities are returned. Requires logprobs=true.
        seed:
          type: integer
          description: If specified, the backend will make a best effort to return deterministic results.
        response_format:
          description: Format for the model's response.
          oneOf:
            - type: string
              enum:
                - text
                - json_object
            - type: object
              properties:
                type:
                  type: string
                  enum:
                    - text
                    - json_object
        tools:
          type: array
          description: A list of tools the model may call. Currently, only functions are supported as a tool.
          items:
            $ref: '#/components/schemas/ChatCompletionTool'
        tool_choice:
          $ref: '#/components/schemas/ChatCompletionToolChoiceOption'
        parallel_tool_calls:
          $ref: '#/components/schemas/ParallelToolCalls'
        functions:
          description: Deprecated. Old-style function definitions.
          type: array
          items:
            type: object
            required:
              - name
            properties:
              name:
                type: string
              description:
                type: string
              parameters:
                type: object
        stream:
          type: boolean
          description: Whether to stream back partial progress.
          default: false
        stream_options:
          type: object
          nullable: true
          default: null
          description: Options for streaming responses. Only set when stream=true.
          properties:
            include_obfuscation:
              type: boolean
              description: |
                When true, adds obfuscation fields to streaming deltas to normalize payload sizes and mitigate side-channel attacks.
                Set to false to reduce bandwidth overhead when trusted links are used.
            include_usage:
              type: boolean
              description: |
                When set, an additional final chunk with total token usage is streamed before data: [DONE].
                Other chunks will include a usage field with null values.
                If the stream is interrupted, you may not receive the final usage chunk.
        user:
          type: string
          deprecated: true
          description: |
            Deprecated. This field is being replaced by `safety_identifier` and `prompt_cache_key`.
            Use `prompt_cache_key` to maintain caching optimizations. A stable identifier for your end-users
            was previously used to boost cache hit rates by better bucketing similar requests and to help detect and prevent abuse.
        safety_identifier:
          type: string
          description: |
            Stable identifier for your end-users, used to help detect and prevent abuse. Prefer this over `user`.
            For caching optimization, combine with `prompt_cache_key`.
        prompt_cache_key:
          type: string
          description: Used to cache responses for similar requests to optimize cache hit rates. LangDB supports prompt caching; see https://docs.langdb.ai/features/prompt-caching. Can be used instead of the user field for cache bucketing.
        mcp_servers:
          type: array
          description: Model Context Protocol servers to use during the request. These enable capabilities like web search.
          items:
            type: object
            required:
              - server_url
              - type
            properties:
              server_url:
                type: string
                description: URL for the MCP server connection.
                example: "wss://your-mcp-server.com/ws?config=your_encoded_config"
              type:
                type: string
                description: Connection type for MCP server.
                enum: ["ws", "sse"]
                example: "ws"
        router:
          type: object
          description: Advanced routing configuration for dynamic model selection based on conditions.
          required:
            - type
            - routes
          properties:
            type:
              type: string
              description: Type of routing strategy.
              enum: ["conditional"]
              example: "conditional"
            routes:
              type: array
              description: Array of routing rules that define conditions and target models.
              items:
                $ref: '#/components/schemas/RouterRule'
        extra:
          type: object
          description: Additional configuration options for the completion request.
          properties:
            guards:
              type: array
              description: List of guard identifiers to apply to this request. Guards provide content filtering and validation.
              items:
                type: string
                description: Identifier for a specific guard to apply.
              example:
                - word_count_validator_bd4bdnun
                - toxicity_detection_4yj4cdvu
            user:
              type: object
              description: User-specific information to associate with this request. This can be used for analytics and personalization.
              properties:
                id:
                  type: string
                  description: Unique identifier for the user.
                  example: "7"
                name:
                  type: string
                  description: Name of the user.
                  example: "mrunmay"
                tags:
                  type: array
                  description: List of tags associated with the user. Can be used for categorization or filtering.
                  items:
                    type: string
                  example: ["coding", "software"]


      # Top-level example showing a complete, valid request object
      example:
        model: router/dynamic
        messages:
          - role: user
            content: "Write a haiku about recursion in programming."
        temperature: 0.8
        max_tokens: 1000
        top_p: 0.9
        frequency_penalty: 0.1
        presence_penalty: 0.2
        stream: false
        response_format: json_object
        mcp_servers:
          - server_url: "wss://your-mcp-server.com/ws?config=your_encoded_config"
            type: "ws"
        router:
          type: "conditional"
          routes:
            - conditions:
                all:
                  - extra.user.tier: { $eq: "premium" }
              name: "premium_user"
              targets:
                $any: ["openai/gpt-4.1-mini", "xai/grok-4", "anthropic/claude-sonnet-4"]
                filter:
                  error_rate: { $lt: 0.01 }
                sort_by: "ttft"
                sort_order: "min"
            - name: "basic_user"
              targets: "openai/gpt-4.1-nano"
        extra:
          guards:
            - word_count_validator_bd4bdnun
            - toxicity_detection_4yj4cdvu
          user:
            id: "7"
            name: "mrunmay"
            tier: "premium"
            tags: ["coding", "software"]

    CreateChatCompletionResponse:
      type: object
      description: Represents a chat completion response returned by model, based on the provided input.
      required:
        - choices
        - created
        - id
        - model
        - object
      properties:
        id:
          type: string
          description: A unique identifier for the chat completion.
        choices:
          type: array
          description: A list of chat completion choices. Can be more than one if `n` is greater than 1.
          items:
            type: object
            required:
              - finish_reason
              - index
              - message
            properties:
              finish_reason:
                type: string
                description: |
                  The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,
                  `length` if the maximum number of tokens specified in the request was reached,
                  `content_filter` if content was omitted due to a flag from our content filters,
                  `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.
                enum:
                  - stop
                  - length
                  - tool_calls
                  - content_filter
                  - function_call
              index:
                type: integer
                description: The index of the choice in the list of choices.
              message:
                type: object
                properties:
                  role:
                    type: string
                    enum:
                      - assistant
                  content:
                    type: string
                    nullable: true
                  tool_calls:
                    type: array
                    items:
                      type: object
                      properties:
                        id:
                          type: string
                        type:
                          type: string
                          enum:
                            - function
                        function:
                          type: object
                          properties:
                            name:
                              type: string
                            arguments:
                              type: string
                  function_call:
                    description: Deprecated. For legacy function calling responses.
                    type: object
                    nullable: true
                    properties:
                      name:
                        type: string
                      arguments:
                        type: string
                required:
                  - role
                  - content
              logprobs:
                description: Log probability information for the choice.
                type: object
                nullable: true
                properties:
                  content:
                    description: A list of message content tokens with log probability information.
                    type: array
                    items:
                      type: object
                      properties:
                        token:
                          type: string
                        logprob:
                          type: number
                    nullable: true
                  refusal:
                    description: A list of message refusal tokens with log probability information.
                    type: array
                    items:
                      type: object
                      properties:
                        token:
                          type: string
                        logprob:
                          type: number
                    nullable: true
                required:
                  - content
                  - refusal
        created:
          type: integer
          description: The Unix timestamp (in seconds) of when the chat completion was created.
        model:
          type: string
          description: The model used for the chat completion.
        system_fingerprint:
          type: string
          description: |
            This fingerprint represents the backend configuration that the model runs with.

            Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.
        object:
          type: string
          description: The object type, which is always `chat.completion`.
          enum:
            - chat.completion
        usage:
          type: object
          description: Usage statistics for the completion request.
          properties:
            prompt_tokens:
              type: integer
              description: Number of tokens in the prompt.
            completion_tokens:
              type: integer
              description: Number of tokens in the completion.
            total_tokens:
              type: integer
              description: Total number of tokens used.
            prompt_tokens_details:
              type: object
              properties:
                cached_tokens:
                  type: integer
                cache_creation_tokens:
                  type: integer
                audio_tokens:
                  type: integer
            completion_tokens_details:
              type: object
              properties:
                reasoning_tokens:
                  type: integer
                accepted_prediction_tokens:
                  type: integer
                rejected_prediction_tokens:
                  type: integer
                audio_tokens:
                  type: integer
            cost:
              type: number
              description: Total cost for the request in the provider's billing unit.

    EmbeddingsRequest:
      type: object
      required:
        - model
        - input
      properties:
        model:
          type: string
          description: ID of the model to use for generating embeddings.
          example: text-embedding-ada-002
        input:
          oneOf:
            - type: string
              description: The text to embed.
            - type: array
              items:
                type: string
              description: Array of text strings to embed.
        encoding_format:
          type: string
          enum: [float, base64]
          default: float
          description: The format to return the embeddings in.
        dimensions:
          type: integer
          description: The number of dimensions the resulting embeddings should have.
          minimum: 1
          maximum: 1536
          example: 1536

    EmbeddingsResponse:
      type: object
      required:
        - data
        - model
        - usage
      properties:
        data:
          type: array
          items:
            type: object
            required:
              - embedding
              - index
            properties:
              embedding:
                oneOf:
                  - type: array
                    items:
                      type: number
                    description: The embedding vector, returned when encoding_format=float.
                  - type: string
                    description: The base64-encoded embedding vector, returned when encoding_format=base64.
              index:
                type: integer
                description: The index of this embedding in the input array.
        model:
          type: string
          description: The model used for generating the embeddings.
        usage:
          type: object
          required:
            - prompt_tokens
            - total_tokens
          properties:
            prompt_tokens:
              type: integer
              description: The number of tokens in the prompt.
            total_tokens:
              type: integer
              description: The total number of tokens used (same as prompt_tokens since there is no completion).

    ChatCompletionTool:
      type: object
      properties:
        type:
          type: string
          enum:
            - function
          description: The type of the tool. Currently, only `function` is supported.
        function:
          $ref: '#/components/schemas/FunctionObject'
      required:
        - type
        - function

    ChatCompletionToolChoiceOption:
      description: >
        Controls which (if any) tool is called by the model.
        `none` means the model will not call any tool and instead generates a message.
        `auto` means the model can pick between generating a message or calling one or more tools.
        `required` means the model must call one or more tools.
      oneOf:
        - type: string
          enum:
            - none
            - auto
            - required
        - $ref: '#/components/schemas/ChatCompletionAllowedToolsChoice'
        - $ref: '#/components/schemas/ChatCompletionNamedToolChoice'

    ChatCompletionNamedToolChoice:
      type: object
      description: Specifies a tool the model should use.
      properties:
        type:
          type: string
          enum:
            - function
          description: The type of the tool. Currently, only `function` is supported.
        function:
          type: object
          properties:
            name:
              type: string
          required:
            - name
      required:
        - type
        - function

    FunctionObject:
      type: object
      properties:
        description:
          type: string
          description: A description of what the function does.
        name:
          type: string
          description: The name of the function to be called.
        parameters:
          $ref: '#/components/schemas/FunctionParameters'
        strict:
          type: boolean
          nullable: true
          description: Whether to enable strict schema adherence.
      required:
        - name

    FunctionParameters:
      type: object
      description: The parameters the functions accepts, described as a JSON Schema object.

    ParallelToolCalls:
      description: Whether to enable parallel function calling during tool use.
      type: boolean
      default: true

    ChatCompletionAllowedTools:
      type: object
      title: Allowed tools
      description: Constrains the tools available to the model to a pre-defined set.
      properties:
        mode:
          type: string
          enum:
            - auto
            - required
          description: >
            `auto` allows the model to pick from among the allowed tools and generate a message.
            `required` requires the model to call one or more of the allowed tools.
        tools:
          type: array
          description: A list of tool definitions that the model should be allowed to call.
          items:
            type: object
            description: A tool definition that the model should be allowed to call.
            additionalProperties: true
      required:
        - mode
        - tools

    ChatCompletionAllowedToolsChoice:
      type: object
      title: Allowed tools
      description: Constrains the tools available to the model to a pre-defined set.
      properties:
        type:
          type: string
          enum:
            - allowed_tools
          description: Allowed tool configuration type. Always `allowed_tools`.
        allowed_tools:
          $ref: '#/components/schemas/ChatCompletionAllowedTools'
      required:
        - type
        - allowed_tools

    RouterRule:
      type: object
      description: A routing rule that defines conditions and target models for dynamic routing.
      required:
        - name
        - conditions
        - targets
      properties:
        name:
          type: string
          description: Unique identifier for this routing rule.
          example: "premium_user"
        conditions:
          $ref: '#/components/schemas/RouteConditions'
        targets:
          oneOf:
            - type: string
              description: Single target model identifier.
              example: "openai/gpt-4.1-nano"
            - $ref: '#/components/schemas/RouteTargets'

    RouteConditions:
      type: object
      description: Logical conditions for determining when a route should be activated.
      properties:
        all:
          type: array
          description: All conditions must be true (AND logic).
          items:
            $ref: '#/components/schemas/ConditionExpression'
        any:
          type: array
          description: At least one condition must be true (OR logic).
          items:
            $ref: '#/components/schemas/ConditionExpression'

    RouteTargets:
      type: object
      description: Target model configuration for routing with filtering and sorting options.
      properties:
        $any:
          type: array
          description: Array of target models to choose from with fallback support.
          items:
            type: string
            description: Model identifier (e.g., "openai/gpt-4o", "openai/*", "gpt-4o").
        filter:
          type: object
          description: Filter models based on real-time metrics.
          additionalProperties: true
        sort_by:
          type: string
          description: Sort candidate models by metric.
          enum:
            - price
            - ttft
            - throughput
            - latency
        sort_order:
          type: string
          description: Sort order for candidate models.
          enum:
            - min
            - max

    ConditionExpression:
      type: object
      description: A single condition expression for evaluating request metadata or user information.
      additionalProperties:
        oneOf:
          - type: object
            description: Comparison operators for field evaluation.
            properties:
              $eq:
                description: Equals comparison.
              $neq:
                description: Not equals comparison.
              $in:
                type: array
                description: Value is in the provided array.
                items:
                  description: Array item for comparison.
              $lt:
                description: Less than comparison.
              $gt:
                description: Greater than comparison.
          - type: string
            description: Direct string value comparison.
          - type: number
            description: Direct numeric value comparison.
          - type: boolean
            description: Direct boolean value comparison.