{ "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "ChatCompletionRequest", "type": "object", "properties": { "model": { "type": "string", "description": "Model ID to use. Can be a Hugging Face model ID (e.g., meta-llama/Llama-3-70b-chat-hf) or a provider-specific identifier." }, "messages": { "type": "array", "description": "List of messages comprising the conversation" }, "frequency_penalty": { "type": "number", "description": "Penalize tokens based on frequency in text so far" }, "logprobs": { "type": "boolean", "description": "Whether to return log probabilities" }, "max_tokens": { "type": "integer", "description": "Maximum number of tokens to generate" }, "presence_penalty": { "type": "number", "description": "Penalize tokens based on presence in text so far" }, "reasoning_effort": { "type": "string", "description": "Constrains effort on reasoning for models that support it. Common values are none, minimal, low, medium, high, xhigh." }, "response_format": { "type": "string" }, "seed": { "type": "integer", "description": "Random seed for reproducibility" }, "stop": { "type": "array", "description": "Up to 4 sequences where generation will stop" }, "stream": { "type": "boolean", "description": "Whether to stream partial responses using SSE" }, "stream_options": { "type": "object" }, "temperature": { "type": "number", "description": "Sampling temperature" }, "tool_choice": { "type": "string", "description": "Controls tool usage" }, "tool_prompt": { "type": "string", "description": "Prompt prepended before tools" }, "tools": { "type": "array", "description": "List of tools the model may call" }, "top_logprobs": { "type": "integer", "description": "Number of most likely tokens to return per position" }, "top_p": { "type": "number", "description": "Nucleus sampling parameter" } } }