{ "openapi": "3.1.0", "info": { "title": "Baseten Anthropic-Compatible Inference API", "version": "1.0.0", "description": "Anthropic Messages API-compatible endpoint for Baseten Model APIs. Point the Anthropic SDK or any Messages API client at this endpoint to call supported models." }, "servers": [ { "url": "https://inference.baseten.co", "description": "Baseten Inference API." } ], "security": [ { "ApiKeyAuth": [] } ], "paths": { "/v1/messages": { "post": { "operationId": "createMessage", "summary": "Create a message", "description": "Creates a response message for the provided conversation. Compatible with the Anthropic Messages API, so the Anthropic SDK works against this endpoint by changing `base_url` and overriding the default `x-api-key` header with `Authorization`.", "tags": [ "Messages" ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MessagesRequest" } } } }, "responses": { "200": { "description": "Successful response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MessagesResponse" } } } }, "400": { "description": "Bad request: invalid parameters." }, "401": { "description": "Unauthorized: invalid or missing API key. The Anthropic SDK's default `x-api-key` header is not accepted — pass `Authorization: Bearer ` or `Authorization: Api-Key `." }, "404": { "description": "Model not found or does not support the Messages API." }, "429": { "description": "Rate limit exceeded." }, "500": { "description": "Internal server error." } }, "x-codeSamples": [ { "lang": "python", "label": "Python", "source": "import anthropic\nimport os\n\nAPI_KEY = os.environ[\"BASETEN_API_KEY\"]\n\nclient = anthropic.Anthropic(\n base_url=\"https://inference.baseten.co\",\n api_key=API_KEY,\n default_headers={\"Authorization\": f\"Bearer {API_KEY}\"},\n)\n\nresponse = client.messages.create(\n model=\"deepseek-ai/DeepSeek-V4-Pro\",\n max_tokens=1024,\n messages=[\n {\"role\": \"user\", \"content\": \"Hello!\"}\n ],\n)\n\nprint(response.content[0].text)" }, { "lang": "bash", "label": "cURL", "source": "curl https://inference.baseten.co/v1/messages \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Api-Key $BASETEN_API_KEY\" \\\n -d '{\n \"model\": \"deepseek-ai/DeepSeek-V4-Pro\",\n \"max_tokens\": 1024,\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Hello!\"}\n ]\n }'" } ] } } }, "components": { "schemas": { "MessagesRequest": { "additionalProperties": false, "type": "object", "required": [ "model", "messages", "max_tokens" ], "title": "MessagesRequest", "description": "Request body for creating a message.", "properties": { "model": { "type": "string", "title": "Model", "description": "The model slug to use. Find available models at [Model APIs](https://app.baseten.co/model-apis/create)." }, "messages": { "type": "array", "title": "Messages", "description": "The conversation history as an ordered list of input messages. Alternating `user` and `assistant` roles are expected; the final message must be from the `user`.", "items": { "$ref": "#/components/schemas/InputMessage" } }, "max_tokens": { "type": "integer", "minimum": 1, "title": "Max Tokens", "description": "The maximum number of tokens to generate in the response. Required by the Messages API. The response may be shorter if it finishes naturally or hits a stop sequence." }, "system": { "anyOf": [ { "type": "string" }, { "type": "array", "items": { "$ref": "#/components/schemas/TextBlock" } } ], "title": "System", "description": "A system prompt that sets the model's behavior. Pass either a single string or an array of text content blocks." }, "temperature": { "type": "number", "minimum": 0, "maximum": 1, "default": 1, "title": "Temperature", "description": "Controls randomness. Lower values are more deterministic. Range: 0 to 1." }, "top_p": { "type": "number", "exclusiveMinimum": 0, "maximum": 1, "title": "Top P", "description": "Nucleus sampling: only consider tokens with cumulative probability up to this value." }, "top_k": { "type": "integer", "minimum": 0, "title": "Top K", "description": "Limits token selection to the top K most probable tokens at each step." }, "stop_sequences": { "type": "array", "items": { "type": "string" }, "title": "Stop Sequences", "description": "Custom text sequences that will stop generation. When a stop sequence is hit, `stop_reason` is `stop_sequence` and `stop_sequence` contains the matched string." }, "stream": { "type": "boolean", "default": false, "title": "Stream", "description": "If `true`, the response is streamed as server-sent events. Each event has a `type` such as `message_start`, `content_block_delta`, or `message_stop`." }, "tools": { "type": "array", "items": { "$ref": "#/components/schemas/ToolDefinition" }, "title": "Tools", "description": "A list of tools the model may call. Each tool has a `name`, `description`, and `input_schema` (a JSON Schema object)." }, "tool_choice": { "$ref": "#/components/schemas/ToolChoice", "description": "Controls which tool (if any) the model must call." }, "metadata": { "type": "object", "title": "Metadata", "description": "An object describing metadata about the request. Supports `user_id` for abuse detection.", "properties": { "user_id": { "type": "string", "description": "An external identifier for the end user." } } } } }, "InputMessage": { "additionalProperties": false, "type": "object", "required": [ "role", "content" ], "title": "InputMessage", "description": "A message in the conversation.", "properties": { "role": { "type": "string", "enum": [ "user", "assistant" ], "description": "The role of the message author. System prompts are passed via the top-level `system` field, not as a message." }, "content": { "anyOf": [ { "type": "string" }, { "type": "array", "items": { "anyOf": [ { "$ref": "#/components/schemas/TextBlock" }, { "$ref": "#/components/schemas/ToolUseBlock" }, { "$ref": "#/components/schemas/ToolResultBlock" } ] } } ], "description": "The message content. Pass a string for text-only messages or an array of content blocks for messages that include tool calls or tool results." } } }, "TextBlock": { "additionalProperties": false, "type": "object", "required": [ "type", "text" ], "title": "TextBlock", "description": "A text content block.", "properties": { "type": { "const": "text", "type": "string", "description": "The block type, always `text`." }, "text": { "type": "string", "description": "The text content." } } }, "ToolUseBlock": { "additionalProperties": false, "type": "object", "required": [ "type", "id", "name", "input" ], "title": "ToolUseBlock", "description": "A tool use content block generated by the model.", "properties": { "type": { "const": "tool_use", "type": "string", "description": "The block type, always `tool_use`." }, "id": { "type": "string", "description": "A unique identifier for this tool call. Use this as `tool_use_id` in the matching tool result." }, "name": { "type": "string", "description": "The name of the tool being called." }, "input": { "type": "object", "additionalProperties": true, "description": "The arguments passed to the tool, matching the tool's `input_schema`." } } }, "ToolResultBlock": { "additionalProperties": false, "type": "object", "required": [ "type", "tool_use_id", "content" ], "title": "ToolResultBlock", "description": "A tool result content block supplied by the user in response to a tool call.", "properties": { "type": { "const": "tool_result", "type": "string", "description": "The block type, always `tool_result`." }, "tool_use_id": { "type": "string", "description": "The `id` of the `tool_use` block this result responds to." }, "content": { "anyOf": [ { "type": "string" }, { "type": "array", "items": { "$ref": "#/components/schemas/TextBlock" } } ], "description": "The tool's output. Pass a string for simple results or an array of text blocks for structured output." }, "is_error": { "type": "boolean", "default": false, "description": "Set to `true` if the tool call raised an error." } } }, "ToolDefinition": { "additionalProperties": false, "type": "object", "required": [ "name", "input_schema" ], "title": "ToolDefinition", "description": "A tool the model can call.", "properties": { "name": { "type": "string", "description": "A unique name for the tool." }, "description": { "type": "string", "description": "A description of what the tool does. The model uses this to decide when to call it." }, "input_schema": { "type": "object", "additionalProperties": true, "description": "A JSON Schema object describing the tool's expected input." } } }, "ToolChoice": { "oneOf": [ { "type": "object", "required": [ "type" ], "properties": { "type": { "const": "auto", "type": "string", "description": "The model decides whether to call a tool." }, "disable_parallel_tool_use": { "type": "boolean", "default": false, "description": "If `true`, restricts the model to calling at most one tool per response." } } }, { "type": "object", "required": [ "type" ], "properties": { "type": { "const": "any", "type": "string", "description": "The model must call a tool but may pick which one." } } }, { "type": "object", "required": [ "type", "name" ], "properties": { "type": { "const": "tool", "type": "string", "description": "The model must call the named tool." }, "name": { "type": "string", "description": "The exact name of the tool to call." } } }, { "type": "object", "required": [ "type" ], "properties": { "type": { "const": "none", "type": "string", "description": "The model must not call any tool." } } } ], "title": "ToolChoice", "description": "Controls which tool (if any) the model calls." }, "MessagesResponse": { "additionalProperties": false, "type": "object", "required": [ "id", "type", "role", "content", "model", "stop_reason", "usage" ], "title": "MessagesResponse", "description": "The message response returned by the model.", "properties": { "id": { "type": "string", "description": "A unique identifier for this message, such as `msg_abc123`." }, "type": { "const": "message", "type": "string", "description": "The object type, always `message`." }, "role": { "const": "assistant", "type": "string", "description": "The role of the generated message, always `assistant`." }, "content": { "type": "array", "items": { "anyOf": [ { "$ref": "#/components/schemas/TextBlock" }, { "$ref": "#/components/schemas/ToolUseBlock" } ] }, "description": "An array of content blocks generated by the model. Text responses contain a single `text` block; responses that invoke tools contain `tool_use` blocks." }, "model": { "type": "string", "description": "The model slug that produced the response." }, "stop_reason": { "type": "string", "enum": [ "end_turn", "max_tokens", "stop_sequence", "tool_use" ], "description": "Why the model stopped generating: `end_turn` (natural stop), `max_tokens` (hit the `max_tokens` limit), `stop_sequence` (matched a `stop_sequences` entry), or `tool_use` (model invoked a tool)." }, "stop_sequence": { "type": "string", "nullable": true, "description": "The stop sequence that was matched, if `stop_reason` is `stop_sequence`. Otherwise `null`." }, "usage": { "$ref": "#/components/schemas/Usage", "description": "Token usage statistics for the request." } } }, "Usage": { "additionalProperties": true, "type": "object", "title": "Usage", "description": "Token usage statistics.", "properties": { "input_tokens": { "type": "integer", "description": "The number of input tokens processed." }, "output_tokens": { "type": "integer", "description": "The number of output tokens generated." } } } }, "securitySchemes": { "ApiKeyAuth": { "type": "apiKey", "in": "header", "name": "Authorization", "description": "Pass your Baseten API key using either the `Api-Key` or `Bearer` scheme: `Authorization: Api-Key YOUR_API_KEY` or `Authorization: Bearer YOUR_API_KEY`. The Anthropic SDK's default `x-api-key` header is not accepted; override `default_headers` to send `Authorization` instead." } } } }