{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "#/components/schemas/ChatCompletionRequest", "title": "ChatCompletionRequest", "type": "object", "required": [ "model", "messages" ], "properties": { "model": { "type": "string", "description": "ID of the model to use. You can use the Models API to see all available models.", "example": "mistral-large-latest" }, "messages": { "type": "array", "description": "A list of messages comprising the conversation so far. Each message has a role and content.", "items": { "$ref": "#/components/schemas/ChatMessage" } }, "temperature": { "type": "number", "description": "Sampling temperature between 0.0 and 1.5. Higher values like 0.7 produce more random output, while lower values like 0.2 produce more focused and deterministic output.", "minimum": 0.0, "maximum": 1.5, "default": 0.7 }, "top_p": { "type": "number", "description": "Nucleus sampling parameter. The model considers tokens with top_p probability mass. A value of 0.1 means only the top 10% of tokens are considered.", "minimum": 0.0, "maximum": 1.0, "default": 1.0 }, "max_tokens": { "type": "integer", "description": "The maximum number of tokens to generate in the chat completion. The total token count of the prompt plus max_tokens cannot exceed the model's context length.", "minimum": 1 }, "stream": { "type": "boolean", "description": "Whether to stream back partial progress as server-sent events. If true, tokens are sent as data-only events as they become available, terminated by a data: [DONE] message.", "default": false }, "stop": { "oneOf": [ { "type": "string" }, { "type": "array", "items": { "type": "string" } } ], "description": "Stop generation if this token is detected, or if one of these tokens is detected when providing an array." }, "random_seed": { "type": "integer", "description": "The seed to use for random sampling. If set, different calls will generate deterministic results." }, "response_format": { "type": "object", "description": "An object specifying the format that the model must output. Setting to json_object enables JSON mode.", "properties": { "type": { "type": "string", "enum": [ "text", "json_object" ], "description": "The format type. Use json_object to enable JSON mode." } } }, "tools": { "type": "array", "description": "A list of tools the model may call. Currently only functions are supported as a tool.", "items": { "$ref": "#/components/schemas/Tool" } }, "tool_choice": { "type": "string", "description": "Controls which tool is called by the model. Can be auto, none, any, or required.", "enum": [ "auto", "none", "any", "required" ] }, "presence_penalty": { "type": "number", "description": "Penalizes repetition of words or phrases. A higher value encourages the model to use a wider variety of words and phrases.", "minimum": -2.0, "maximum": 2.0, "default": 0.0 }, "frequency_penalty": { "type": "number", "description": "Penalizes repetition based on frequency in the generated text. A higher value discourages repeating frequently used words.", "minimum": -2.0, "maximum": 2.0, "default": 0.0 }, "safe_prompt": { "type": "boolean", "description": "Whether to inject a safety prompt before all conversations.", "default": false } } }