{ "openapi": "3.0.0", "info": { "title": "Azure OpenAI Service API", "description": "Azure OpenAI APIs for completions and search", "version": "2023-05-15" }, "servers": [ { "url": "https://{endpoint}/openai", "variables": { "endpoint": { "default": "your-resource-name.openai.azure.com" } } } ], "security": [ { "bearer": [ "api.read" ] }, { "apiKey": [] } ], "paths": { "/deployments/{deployment-id}/completions": { "post": { "summary": "Creates a completion for the provided prompt, parameters and chosen model.", "operationId": "Completions_Create", "parameters": [ { "in": "path", "name": "deployment-id", "required": true, "schema": { "type": "string", "example": "davinci", "description": "Deployment id of the model which was deployed." } }, { "in": "query", "name": "api-version", "required": true, "schema": { "type": "string", "example": "2023-05-15", "description": "api version" } } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "properties": { "prompt": { "description": "The prompt(s) to generate completions for, encoded as a string or array of strings.\nNote that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. Maximum allowed size of string list is 2048.", "oneOf": [ { "type": "string", "default": "", "example": "This is a test.", "nullable": true }, { "type": "array", "items": { "type": "string", "default": "", "example": "This is a test.", "nullable": false }, "description": "Array size minimum of 1 and maximum of 2048" } ] }, "max_tokens": { "description": "The token count of your prompt plus max_tokens cannot exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096). Has minimum of 0.", "type": "integer", "default": 16, "example": 16, "nullable": true }, "temperature": { "description": "What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.\nWe generally recommend altering this or top_p but not both.", "type": "number", "default": 1, "example": 1, "nullable": true }, "top_p": { "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or temperature but not both.", "type": "number", "default": 1, "example": 1, "nullable": true }, "logit_bias": { "description": "Defaults to null. Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass {\"50256\" : -100} to prevent the <|endoftext|> token from being generated.", "type": "object", "nullable": false }, "user": { "description": "A unique identifier representing your end-user, which can help monitoring and detecting abuse", "type": "string", "nullable": false }, "n": { "description": "How many completions to generate for each prompt. Minimum of 1 and maximum of 128 allowed.\nNote: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop.", "type": "integer", "default": 1, "example": 1, "nullable": true }, "stream": { "description": "Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.", "type": "boolean", "nullable": true, "default": false }, "logprobs": { "description": "Include the log probabilities on the logprobs most likely tokens, as well the chosen tokens. For example, if logprobs is 5, the API will return a list of the 5 most likely tokens. The API will always return the logprob of the sampled token, so there may be up to logprobs+1 elements in the response.\nMinimum of 0 and maximum of 5 allowed.", "type": "integer", "default": null, "nullable": true }, "model": { "type": "string", "example": "davinci", "nullable": true, "description": "ID of the model to use. You can use the Models_List operation to see all of your available models, or see our Models_Get overview for descriptions of them." }, "suffix": { "type": "string", "nullable": true, "description": "The suffix that comes after a completion of inserted text." }, "echo": { "description": "Echo back the prompt in addition to the completion", "type": "boolean", "default": false, "nullable": true }, "stop": { "description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.", "oneOf": [ { "type": "string", "default": "<|endoftext|>", "example": "\n", "nullable": true }, { "type": "array", "items": { "type": "string", "example": [ "\n" ], "nullable": false }, "description": "Array minimum size of 1 and maximum of 4" } ] }, "completion_config": { "type": "string", "nullable": true }, "presence_penalty": { "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", "type": "number", "default": 0 }, "frequency_penalty": { "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", "type": "number", "default": 0 }, "best_of": { "description": "Generates best_of completions server-side and returns the \"best\" (the one with the highest log probability per token). Results cannot be streamed.\nWhen used with n, best_of controls the number of candidate completions and n specifies how many to return – best_of must be greater than n.\nNote: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop. Has maximum value of 128.", "type": "integer" } } }, "example": { "prompt": "Negate the following sentence.The price for bubblegum increased on thursday.\n\n Negated Sentence:", "max_tokens": 50 } } } }, "responses": { "200": { "description": "OK", "content": { "application/json": { "schema": { "type": "object", "properties": { "id": { "type": "string" }, "object": { "type": "string" }, "created": { "type": "integer" }, "model": { "type": "string" }, "choices": { "type": "array", "items": { "type": "object", "properties": { "text": { "type": "string" }, "index": { "type": "integer" }, "logprobs": { "type": "object", "properties": { "tokens": { "type": "array", "items": { "type": "string" } }, "token_logprobs": { "type": "array", "items": { "type": "number" } }, "top_logprobs": { "type": "array", "items": { "type": "object", "additionalProperties": { "type": "number" } } }, "text_offset": { "type": "array", "items": { "type": "integer" } } } }, "finish_reason": { "type": "string" } } } }, "usage": { "type": "object", "properties": { "completion_tokens": { "type": "number", "format": "int32" }, "prompt_tokens": { "type": "number", "format": "int32" }, "total_tokens": { "type": "number", "format": "int32" } }, "required": [ "prompt_tokens", "total_tokens", "completion_tokens" ] } }, "required": [ "id", "object", "created", "model", "choices" ] }, "example": { "model": "davinci", "object": "text_completion", "id": "cmpl-4509KAos68kxOqpE2uYGw81j6m7uo", "created": 1637097562, "choices": [ { "index": 0, "text": "The price for bubblegum decreased on thursday.", "logprobs": null, "finish_reason": "stop" } ] } } }, "headers": { "apim-request-id": { "description": "Request ID for troubleshooting purposes", "schema": { "type": "string" } } } }, "default": { "description": "Service unavailable", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/errorResponse" } } }, "headers": { "apim-request-id": { "description": "Request ID for troubleshooting purposes", "schema": { "type": "string" } } } } } } }, "/deployments/{deployment-id}/embeddings": { "post": { "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.", "operationId": "embeddings_create", "parameters": [ { "in": "path", "name": "deployment-id", "required": true, "schema": { "type": "string", "example": "ada-search-index-v1" }, "description": "The deployment id of the model which was deployed." }, { "in": "query", "name": "api-version", "required": true, "schema": { "type": "string", "example": "2023-05-15", "description": "api version" } } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "additionalProperties": true, "properties": { "input": { "description": "Input text to get embeddings for, encoded as a string. To get embeddings for multiple inputs in a single request, pass an array of strings. Each input must not exceed 2048 tokens in length.\nUnless you are embedding code, we suggest replacing newlines (\\n) in your input with a single space, as we have observed inferior results when newlines are present.", "oneOf": [ { "type": "string", "default": "", "example": "This is a test.", "nullable": true }, { "type": "array", "minItems": 1, "maxItems": 2048, "items": { "type": "string", "minLength": 1, "example": "This is a test.", "nullable": false } } ] }, "user": { "description": "A unique identifier representing your end-user, which can help monitoring and detecting abuse.", "type": "string", "nullable": false }, "input_type": { "description": "input type of embedding search to use", "type": "string", "example": "query" }, "model": { "type": "string", "description": "ID of the model to use. You can use the Models_List operation to see all of your available models, or see our Models_Get overview for descriptions of them.", "nullable": false } }, "required": [ "input" ] } } } }, "responses": { "200": { "description": "OK", "content": { "application/json": { "schema": { "type": "object", "properties": { "object": { "type": "string" }, "model": { "type": "string" }, "data": { "type": "array", "items": { "type": "object", "properties": { "index": { "type": "integer" }, "object": { "type": "string" }, "embedding": { "type": "array", "items": { "type": "number" } } }, "required": [ "index", "object", "embedding" ] } }, "usage": { "type": "object", "properties": { "prompt_tokens": { "type": "integer" }, "total_tokens": { "type": "integer" } }, "required": [ "prompt_tokens", "total_tokens" ] } }, "required": [ "object", "model", "data", "usage" ] } } } } } } }, "/deployments/{deployment-id}/chat/completions": { "post": { "summary": "Creates a completion for the chat message", "operationId": "ChatCompletions_Create", "parameters": [ { "in": "path", "name": "deployment-id", "required": true, "schema": { "type": "string", "description": "Deployment id of the model which was deployed." } }, { "in": "query", "name": "api-version", "required": true, "schema": { "type": "string", "example": "2023-05-15", "description": "api version" } } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "properties": { "messages": { "description": "The messages to generate chat completions for, in the chat format.", "type": "array", "minItems": 1, "items": { "type": "object", "properties": { "role": { "type": "string", "enum": [ "system", "user", "assistant" ], "description": "The role of the author of this message." }, "content": { "type": "string", "description": "The contents of the message" }, "name": { "type": "string", "description": "The name of the user in a multi-user chat" } }, "required": [ "role", "content" ] } }, "temperature": { "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\nWe generally recommend altering this or `top_p` but not both.", "type": "number", "minimum": 0, "maximum": 2, "default": 1, "example": 1, "nullable": true }, "top_p": { "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or `temperature` but not both.", "type": "number", "minimum": 0, "maximum": 1, "default": 1, "example": 1, "nullable": true }, "n": { "description": "How many chat completion choices to generate for each input message.", "type": "integer", "minimum": 1, "maximum": 128, "default": 1, "example": 1, "nullable": true }, "stream": { "description": "If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.", "type": "boolean", "nullable": true, "default": false }, "stop": { "description": "Up to 4 sequences where the API will stop generating further tokens.", "oneOf": [ { "type": "string", "nullable": true }, { "type": "array", "items": { "type": "string", "nullable": false }, "minItems": 1, "maxItems": 4, "description": "Array minimum size of 1 and maximum of 4" } ], "default": null }, "max_tokens": { "description": "The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens).", "type": "integer", "default": "inf" }, "presence_penalty": { "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", "type": "number", "default": 0, "minimum": -2, "maximum": 2 }, "frequency_penalty": { "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", "type": "number", "default": 0, "minimum": -2, "maximum": 2 }, "logit_bias": { "description": "Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.", "type": "object", "nullable": true }, "user": { "description": "A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.", "type": "string", "example": "user-1234", "nullable": false } }, "required": [ "messages" ] }, "example": { "model": "gpt-35-turbo", "messages": [ { "role": "user", "content": "Hello!" } ] } } } }, "responses": { "200": { "description": "OK", "content": { "application/json": { "schema": { "type": "object", "properties": { "id": { "type": "string" }, "object": { "type": "string" }, "created": { "type": "integer", "format": "unixtime" }, "model": { "type": "string" }, "choices": { "type": "array", "items": { "type": "object", "properties": { "index": { "type": "integer" }, "message": { "type": "object", "properties": { "role": { "type": "string", "enum": [ "system", "user", "assistant" ], "description": "The role of the author of this message." }, "content": { "type": "string", "description": "The contents of the message" } }, "required": [ "role", "content" ] }, "finish_reason": { "type": "string" } } } }, "usage": { "type": "object", "properties": { "prompt_tokens": { "type": "integer" }, "completion_tokens": { "type": "integer" }, "total_tokens": { "type": "integer" } }, "required": [ "prompt_tokens", "completion_tokens", "total_tokens" ] } }, "required": [ "id", "object", "created", "model", "choices" ] }, "example": { "id": "chatcmpl-123", "object": "chat.completion", "created": 1677652288, "choices": [ { "index": 0, "message": { "role": "assistant", "content": "\n\nHello there, how may I assist you today?" }, "finish_reason": "stop" } ], "usage": { "prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21 } } } } } } } } }, "components": { "schemas": { "errorResponse": { "type": "object", "properties": { "error": { "type": "object", "properties": { "code": { "type": "string" }, "message": { "type": "string" }, "param": { "type": "string" }, "type": { "type": "string" } } } } } }, "securitySchemes": { "bearer": { "type": "oauth2", "flows": { "implicit": { "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize", "scopes": {} } }, "x-tokenInfoFunc": "api.middleware.auth.bearer_auth", "x-scopeValidateFunc": "api.middleware.auth.validate_scopes" }, "apiKey": { "type": "apiKey", "name": "api-key", "in": "header" } } } }