{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "#/components/schemas/ChatCompletionRequest",
  "title": "ChatCompletionRequest",
  "additionalProperties": false,
  "properties": {
    "messages": {
      "type": "array",
      "items": {
        "$ref": "#/components/schemas/ChatCompletionMessage"
      },
      "description": "A list of messages representing the conversation history. Supports roles: `system`, `user`, `assistant`, and `tool`."
    },
    "model": {
      "title": "Model",
      "type": "string",
      "description": "The model slug to use for completion, such as `deepseek-ai/DeepSeek-V4-Pro`. Find available models at [Model APIs](https://app.baseten.co/model-apis/create)."
    },
    "frequency_penalty": {
      "default": 0,
      "title": "Frequency Penalty",
      "description": "Penalizes tokens based on how frequently they appear in the text so far. Positive values decrease repetition. Support varies by model.",
      "type": "number"
    },
    "logit_bias": {
      "default": null,
      "title": "Logit Bias",
      "description": "A map of token IDs to bias values (-100 to 100). Use this to increase or decrease the likelihood of specific tokens appearing in the output.",
      "additionalProperties": {
        "type": "number"
      },
      "type": "object"
    },
    "logprobs": {
      "default": false,
      "title": "Logprobs",
      "description": "If `true`, returns log probabilities of the output tokens. Log probability support varies by model.",
      "type": "boolean"
    },
    "top_logprobs": {
      "default": 0,
      "title": "Top Logprobs",
      "description": "Number of most likely tokens to return at each position (0-20). Requires `logprobs: true`. Log probability support varies by model.",
      "type": "integer"
    },
    "max_tokens": {
      "default": 4096,
      "maximum": 262144,
      "minimum": 1,
      "title": "Max Tokens",
      "type": "integer",
      "description": "Maximum number of tokens to generate. If your request input plus `max_tokens` exceeds the model's context length, `max_tokens` is truncated. If your request exceeds the context length by more than 16k tokens or if `max_tokens` signals no preference, context reservation is throttled to 49512 tokens. Higher `max_tokens` values slightly deprioritize request scheduling."
    },
    "n": {
      "default": 1,
      "title": "N",
      "description": "Number of completions to generate. Only `1` is supported.",
      "type": "integer"
    },
    "presence_penalty": {
      "default": 0,
      "title": "Presence Penalty",
      "description": "Penalizes tokens based on whether they have appeared in the text so far. Positive values encourage the model to discuss new topics. Support varies by model.",
      "type": "number"
    },
    "response_format": {
      "anyOf": [
        {
          "$ref": "#/components/schemas/ResponseFormatText"
        },
        {
          "$ref": "#/components/schemas/ResponseFormatJson"
        },
        {
          "$ref": "#/components/schemas/ResponseFormatJsonObject"
        },
        {
          "$ref": "#/components/schemas/ResponseFormatGrammar"
        },
        {
          "$ref": "#/components/schemas/ResponseFormatStructuralTag"
        }
      ],
      "default": null,
      "title": "Response Format",
      "description": "Specifies the output format. Use `{\"type\": \"json_object\"}` for JSON mode, or `{\"type\": \"json_schema\", \"json_schema\": {...}}` for structured outputs with a specific schema."
    },
    "seed": {
      "default": null,
      "title": "Seed",
      "description": "Random seed for deterministic generation. Determinism is not guaranteed across different hardware or model versions.",
      "type": "integer"
    },
    "stop": {
      "anyOf": [
        {
          "maxLength": 1000,
          "minLength": 1,
          "type": "string"
        },
        {
          "items": {
            "maxLength": 1000,
            "minLength": 1,
            "type": "string"
          },
          "maxItems": 32,
          "type": "array"
        }
      ],
      "title": "Stop",
      "description": "Up to 32 sequences where the API stops generating further tokens. Can be a string or array of strings."
    },
    "stream": {
      "default": false,
      "title": "Stream",
      "description": "If `true`, responses are streamed back as server-sent events (SSE) as they are generated.",
      "type": "boolean"
    },
    "stream_options": {
      "default": null,
      "description": "Options for streaming responses. Set `include_usage: true` to receive token usage statistics in the final chunk.",
      "$ref": "#/components/schemas/StreamOptions"
    },
    "temperature": {
      "default": null,
      "title": "Temperature",
      "description": "Controls randomness in the output. Lower values like 0.2 produce more focused and deterministic responses. Higher values like 1.5 produce more creative and varied output.",
      "maximum": 4,
      "minimum": 0,
      "type": "number"
    },
    "top_p": {
      "default": 1,
      "title": "Top P",
      "description": "Nucleus sampling: only consider tokens with cumulative probability up to this value. Lower values like 0.1 produce more focused output.",
      "exclusiveMinimum": 0,
      "maximum": 1,
      "type": "number"
    },
    "tools": {
      "default": null,
      "title": "Tools",
      "description": "A list of tools (functions) the model may call. Each tool should have a `type: \"function\"` and a `function` object with `name`, `description`, and `parameters`.",
      "items": {
        "$ref": "#/components/schemas/ChatCompletionToolsParam"
      },
      "type": "array"
    },
    "tool_choice": {
      "anyOf": [
        {
          "enum": [
            "none",
            "required",
            "auto"
          ],
          "type": "string"
        },
        {
          "$ref": "#/components/schemas/ChatCompletionNamedToolChoiceParam"
        }
      ],
      "default": null,
      "title": "Tool Choice",
      "description": "Controls which tool (if any) the model calls.\n\n- `none`: Never call a tool.\n- `auto`: Model decides whether to call a tool.\n- `required`: Model must call at least one tool.\n- `{\"type\": \"function\", \"function\": {\"name\": \"...\"}}`: Call a specific function."
    },
    "parallel_tool_calls": {
      "default": true,
      "title": "Parallel Tool Calls",
      "description": "If `true`, the model can call multiple tools in a single response.",
      "type": "boolean"
    },
    "user": {
      "default": null,
      "title": "User",
      "description": "A unique identifier for the end-user, useful for tracking and abuse detection.",
      "type": "string"
    },
    "best_of": {
      "default": null,
      "title": "Best Of",
      "description": "Number of candidate sequences to generate and return the best from. Only a value of 1 is supported.",
      "maximum": 1,
      "minimum": 1,
      "type": "integer"
    },
    "top_k": {
      "default": 50,
      "title": "Top K",
      "description": "Limits token selection to the top K most probable tokens at each step. Lower values like 10 produce more focused output. Set to -1 to disable.",
      "type": "integer"
    },
    "top_p_min": {
      "default": 0,
      "title": "Top P Min",
      "type": "number",
      "description": "Minimum value for dynamic `top_p`. When set, `top_p` dynamically adjusts but does not go below this value."
    },
    "min_p": {
      "default": 0,
      "title": "Min P",
      "type": "number",
      "description": "Minimum probability threshold for token selection. Filters out tokens with probability below `min_p * max_probability`."
    },
    "repetition_penalty": {
      "default": 1,
      "title": "Repetition Penalty",
      "type": "number",
      "description": "Multiplicative penalty for repeated tokens. Values greater than 1.0 discourage repetition, values less than 1.0 encourage it."
    },
    "length_penalty": {
      "default": 1,
      "title": "Length Penalty",
      "type": "number",
      "description": "Exponential penalty applied to sequence length during beam search. Values greater than 1.0 favor longer sequences."
    },
    "early_stopping": {
      "default": false,
      "title": "Early Stopping",
      "type": "boolean",
      "description": "If `true`, stops generation when at least `n` complete candidates are found."
    },
    "bad": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "items": {
            "type": "string"
          },
          "type": "array"
        }
      ],
      "title": "Bad",
      "description": "Words or phrases to avoid in the output. Support varies by model."
    },
    "bad_token_ids": {
      "title": "Bad Token Ids",
      "description": "Token IDs to avoid in the output. Support varies by model.",
      "items": {
        "type": "integer"
      },
      "type": "array"
    },
    "stop_token_ids": {
      "title": "Stop Token Ids",
      "description": "List of token IDs that cause generation to stop when encountered.",
      "items": {
        "type": "integer"
      },
      "type": "array"
    },
    "include_stop_str_in_output": {
      "default": false,
      "title": "Include Stop Str In Output",
      "type": "boolean",
      "description": "If `true`, includes the matched stop string in the output."
    },
    "ignore_eos": {
      "default": false,
      "title": "Ignore Eos",
      "type": "boolean",
      "description": "If `true`, continues generating past the end-of-sequence token."
    },
    "min_tokens": {
      "default": 0,
      "title": "Min Tokens",
      "type": "integer",
      "description": "Minimum number of tokens to generate before stopping. Useful for ensuring responses are not too short."
    },
    "skip_special_tokens": {
      "default": true,
      "title": "Skip Special Tokens",
      "type": "boolean",
      "description": "If `true`, removes special tokens from the generated output."
    },
    "spaces_between_special_tokens": {
      "default": true,
      "title": "Spaces Between Special Tokens",
      "type": "boolean",
      "description": "If `true`, adds spaces between special tokens in the output."
    },
    "truncate_prompt_tokens": {
      "default": null,
      "title": "Truncate Prompt Tokens",
      "description": "If set, truncates the prompt to this many tokens. Useful for handling inputs that may exceed context limits.",
      "minimum": 1,
      "type": "integer"
    },
    "echo": {
      "default": false,
      "description": "If `true` and the last message role matches the generation role, prepends that message to the output.",
      "title": "Echo",
      "type": "boolean"
    },
    "add_generation_prompt": {
      "default": true,
      "description": "If `true`, adds the generation prompt from the chat template, such as `<|assistant|>`. Set to `false` for completion-style generation.",
      "title": "Add Generation Prompt",
      "type": "boolean"
    },
    "add_special_tokens": {
      "default": false,
      "description": "If `true`, adds special tokens like BOS to the prompt beyond what the chat template adds. For most models, the chat template handles special tokens, so this should be `false`.",
      "title": "Add Special Tokens",
      "type": "boolean"
    },
    "documents": {
      "default": null,
      "description": "A list of documents for RAG (retrieval-augmented generation). Each document is a dict with string keys and values that the model can reference.",
      "title": "Documents",
      "items": {
        "additionalProperties": {
          "type": "string"
        },
        "type": "object"
      },
      "type": "array"
    },
    "chat_template": {
      "default": null,
      "description": "A custom Jinja template for formatting the conversation. If not provided, uses the model's default template.",
      "title": "Chat Template",
      "type": "string"
    },
    "chat_template_args": {
      "default": null,
      "description": "Additional arguments to pass to the chat template renderer.",
      "title": "Chat Template Args",
      "additionalProperties": true,
      "type": "object"
    },
    "disaggregated_params": {
      "default": null,
      "description": "Advanced parameters for disaggregated serving. Used internally for distributed inference.",
      "$ref": "#/components/schemas/DisaggregatedParams"
    }
  },
  "required": [
    "messages",
    "model"
  ],
  "type": "object",
  "description": "Request body for creating a chat completion."
}