openapi: 3.1.0 info: title: Emissary API description: |- v0.1.0 termsOfService: https://www.withemissary.com/terms-of-service contact: email: contact@withemissary.com license: name: Apache 2.0 url: http://www.apache.org/licenses/LICENSE-2.0.html version: 0.1.0 security: - api_key: [] servers: - url: https://api.withemissary.com description: Emissary API server tags: - name: Base Models description: Deals with base models - name: Projects description: Deals with projects (workspace) - name: Dataset description: Datasets are used to upload documents that can be used for fine-tuning - name: Training Jobs description: Deals with training jobs - name: Inference Engines description: Inference Engine API endpoints - name: Deployments description: Deals with deployments - name: Inference description: Deals with inference request to the deployed model paths: '/v1/models': get: tags: - Base Models summary: List base models description: List and describe the various models available in the API operationId: listBaseModels responses: '200': description: List of base models content: application/json: schema: type: array items: "$ref": "#/components/schemas/BaseModelSummary" '400': $ref: '#/components/responses/InvalidInput' '401': $ref: '#/components/responses/Unauthorized' '/v1/models/{base_model_name}': get: tags: - Base Models summary: Retrieve base model description: Retrieves a model instance, providing basic information about the model such as the hyper parameters that can be adjusted operationId: retrieveBaseModelByName parameters: - name: base_model_name in: path required: true description: The name of the base model to use for this request schema: type: string responses: '200': description: The base model object matching the specified name. content: application/json: schema: "$ref": "#/components/schemas/BaseModelSummary" '404': $ref: '#/components/responses/NotFound' '/v1/projects': # path parameter in curly braces post: tags: - Projects summary: Create project description: Create a new project. Projects can be created and used to organize datasets, training jobs, and deployments. operationId: createProject requestBody: # description: Provide your project name if you want to specify it. content: application/json: schema: type: object properties: name: type: string example: my_project description: The name of the project required: false responses: '201': description: The created Project's summary object content: application/json: schema: "$ref": "#/components/schemas/ProjectSummary" '400': $ref: '#/components/responses/InvalidInput' get: tags: - Projects # documentation for GET operation for this path summary: List projects # you can use GFM in operation description too: https://guides.github.com/features/mastering-markdown description: | Returns a list of projects. # operationId should be unique across the whole specification operationId: listProjects # security schemas applied to this operation responses: '200': description: List of Project's summary object. content: application/json: schema: type: array items: "$ref": "#/components/schemas/ProjectSummary" '401': $ref: '#/components/responses/Unauthorized' '/v1/projects/{project_id}': get: tags: - Projects summary: Retrieve project description: Retrieve a project. operationId: retrieveProjectById parameters: - name: project_id in: path schema: type: string description: The ID of the project. required: true responses: '200': description: The ProjectDetail object matching the specified ID. content: application/json: schema: "$ref": "#/components/schemas/ProjectDetail" '404': $ref: '#/components/responses/NotFound' delete: tags: - Projects summary: Delete project description: Delete a project. operationId: deleteProjectById parameters: - name: project_id in: path schema: type: string description: The ID of the project. required: true responses: '204': description: Project deleted successfully '404': $ref: '#/components/responses/NotFound' '/v1/projects/{project_id}/datasets': post: tags: - Datasets summary: Upload dataset description: Upload dataset to the project (upto 5MB). Upload through UI if your dataset size is larger than 5MB. operationId: createDataset parameters: - $ref: '#/components/parameters/ProjectId' requestBody: required: true content: multipart/form-data: schema: type: object properties: file: type: string format: binary description: The dataset file (binary format) to upload name: type: string example: my_dataset description: The name of the dataset (optional) required: - file responses: '201': description: The Dataset's summary object. content: application/json: schema: "$ref": "#/components/schemas/DatasetSummary" '400': $ref: '#/components/responses/InvalidInput' get: tags: - Datasets summary: List datasets description: Returns a list of datasets operationId: listDatasets parameters: - $ref: '#/components/parameters/ProjectId' responses: '200': description: List of Dataset's summary object. content: application/json: schema: type: array items: "$ref": "#/components/schemas/DatasetSummary" '401': $ref: '#/components/responses/Unauthorized' '/v1/projects/{project_id}/datasets/{dataset_id}': get: tags: - Datasets summary: Retrieve dataset description: Retrieve a dataset. operationId: retrieveDatasetById parameters: - $ref: '#/components/parameters/ProjectId' - name: dataset_id in: path required: true description: The ID of the dataset. schema: type: string responses: '200': description: The DatasetDetail object matching the specified ID. content: application/json: schema: "$ref": "#/components/schemas/DatasetDetail" '404': $ref: '#/components/responses/NotFound' delete: tags: - Datasets summary: Delete dataset description: Delete a dataset. operationId: deleteDatasetById parameters: - $ref: '#/components/parameters/ProjectId' - name: dataset_id in: path required: true description: The ID of the dataset schema: type: string responses: '204': description: Dataset deleted successfully '404': $ref: '#/components/responses/NotFound' '/v1/projects/{project_id}/training-jobs': post: tags: - Training Jobs summary: Create SFT training job description: Initiate fine-tuning (training job). operationId: createTrainingJob parameters: - $ref: '#/components/parameters/ProjectId' requestBody: required: true content: application/json: schema: type: object properties: base_model_option: type: string enum: - pre-trained - fine-tuned example: "pre-trained" description: Base Model option to be used (default = "pre-trained"), you can either start from the pure base model (pre-trained) or from any checkpoint for your fine-tuned model (fine-tuned) base_model: type: string example: "Llama-3.2-1B-Instruct" description: The base model to fine-tune. To see full list of supported base model please visit [Base Models](/fine-tuning/parameters#available-base-models). If you are training on fine-tuned base model option, specify the training job name you want to use. checkpoint: type: number example: 5 description: If base_model_option == "fine-tuned", specify the checkpoint you want to use. train_dataset_id: type: string example: "ds-12345" description: The training dataset ID to use for fine-tuning test_dataset_id: type: string example: "ds-67890" description: The testing dataset ID to use for fine-tuning (optional) train_test_split_ratio: type: number example: 20 description: The ratio (%) to split the training dataset for testing (optional) test_functions: type: array example: ["test_similarity_shared"] items: type: string description: List of test function names to be used (optional) name: type: string example: "training-job-1" description: The name of the training job (optional) description: type: string example: "Fine-tuning the model on my dataset" description: A description of the training job (optional) task_type: type: string enum: - text-generation - classification - embedding - regression example: "text-generation" description: The task type for the training job (default = "text-generation") parameters: type: object properties: downsample_size: type: number example: 20 learning_rate: type: number example: 0.0002 num_train_epochs: type: number example: 3 per_device_train_batch_size: type: number example: 2 per_device_eval_batch_size: type: number example: 2 dynamic_loss: type: boolean example: false description: (Default = false) Set to true if you want to enable dynamic loss function description: The hyper parameters to use for fine-tuning (optional) rebalance_data: description: (Optional) Currently only supported for classification task type. oneOf: - title: ratio type: object description: | A mapping from category labels (as keys) to the ratio for resampling the dataset. The total size of the dataset will remain consistently. additionalProperties: type: number format: float example: label1: 0.3 label2: 0.7 - title: number type: object description: | A mapping from category labels (as keys) to the number for resampling the dataset. The exact number of data will be created or deleted to fit the number that is provided additionalProperties: type: number example: label1: 300 label2: 300 label3: 500 hf_model_link: type: string example: "https://huggingface.co/my_model" description: Link to the Hugging Face model / Use only when you use the HF_MODEL as a base required: - base_model - train_dataset_id responses: '201': description: Returns the TrainingJob's summary object. content: application/json: schema: "$ref": "#/components/schemas/TrainingJobSummary" '400': $ref: '#/components/responses/InvalidInput' get: tags: - Training Jobs summary: List training jobs description: Returns a list of training jobs operationId: listTrainingJobs parameters: - $ref: '#/components/parameters/ProjectId' responses: '200': description: List of TrainingJob's summary object. content: application/json: schema: type: array items: "$ref": "#/components/schemas/TrainingJobSummary" '401': $ref: '#/components/responses/Unauthorized' '/v1/projects/{project_id}/training-jobs/grpo': post: tags: - Training Jobs summary: Create GRPO training job description: Initiate GRPO fine-tuning (training job). operationId: createGRPOTrainingJob parameters: - $ref: '#/components/parameters/ProjectId' requestBody: required: true content: application/json: schema: type: object properties: base_model_option: type: string enum: - pre-trained - fine-tuned example: "pre-trained" description: Base Model option to be used (default = "pre-trained"), you can either start from the pure base model (pre-trained) or from any checkpoint for your fine-tuned model (fine-tuned) base_model: type: string example: "Llama-3.2-1B-Instruct" description: The base model to fine-tune. To see full list of supported base model please visit [Base Models](/fine-tuning/parameters#available-base-models). If you are training on fine-tuned base model option, specify the training job name you want to use. checkpoint: type: number example: 5 description: (Required if base_model_option == "fine-tuned"), specify the checkpoint you want to use. train_dataset_id: type: string example: "ds-12345" description: The training dataset ID to use for fine-tuning test_functions: type: array example: [ "test_similarity_shared" ] items: type: string description: Name of test function to be used reward: type: object description: (GRPO) Reward weight map properties: test_function_1: type: float example: 0.2 test_function_2: type: float example: 0.8 test_dataset_id: type: string example: "ds-67890" description: The testing dataset ID to use for fine-tuning (optional) train_test_split_ratio: type: number example: 20 description: The ratio (%) to split the training dataset for testing (optional) name: type: string example: "training-1" description: The name of the training job (optional) description: type: string example: "Fine-tuning the model on my dataset" description: A description of the training job (optional) task_type: type: string enum: - text-generation - classification - embedding - regression example: "text-generation" description: The task type for the training job (default = "text-generation") parameters: type: object properties: learning_rate: type: number example: 0.0002 num_train_epochs: type: number example: 15 num_generations: type: number example: 4 per_device_train_batch_size: type: number example: 4 hf_model_link: type: string example: "https://huggingface.co/my_model" description: Link to the Hugging Face model / Use only when you use the HF_MODEL as a base required: - base_model - train_dataset_id - test_functions - reward responses: '201': description: Returns the TrainingJob's summary object. content: application/json: schema: "$ref": "#/components/schemas/TrainingJobSummary" '400': $ref: '#/components/responses/InvalidInput' '/v1/projects/{project_id}/training-jobs/{training_job_id}': get: tags: - Training Jobs summary: Retrieve training job description: Retrieve a training job. operationId: retrieveTrainingJobById parameters: - $ref: '#/components/parameters/ProjectId' - name: training_job_id in: path required: true description: The ID of the training job. schema: type: string responses: '200': description: The TrainingJobDetail object matching the specified ID. content: application/json: schema: "$ref": "#/components/schemas/TrainingJobDetail" '404': $ref: '#/components/responses/NotFound' delete: tags: - Training Jobs summary: Delete training job description: Delete a training job. operationId: deleteTrainingJobById parameters: - $ref: '#/components/parameters/ProjectId' - name: training_job_id in: path required: true description: The ID of the training job schema: type: string responses: '204': description: Training job deleted successfully '404': $ref: '#/components/responses/NotFound' '/v1/projects/{project_id}/training-jobs/{training_job_id}/cancel': post: tags: - Training Jobs summary: Cancel training job description: Cancel a training job. operationId: cancelTrainingJobById parameters: - $ref: '#/components/parameters/ProjectId' - name: training_job_id in: path required: true description: The ID of the training job schema: type: string responses: '204': description: Training job canceled successfully '404': $ref: '#/components/responses/NotFound' '/v1/projects/{project_id}/training-jobs/{training_job_id}/checkpoints': get: tags: - Training Jobs summary: List checkpoints description: Returns a list of checkpoints for a training job. operationId: listCheckpoints parameters: - $ref: '#/components/parameters/ProjectId' - name: training_job_id in: path required: true description: The ID of the training job schema: type: string responses: '200': description: List of TrainingJobCheckpoint object. content: application/json: schema: type: array items: "$ref": "#/components/schemas/TrainingJobCheckpoint" '404': $ref: '#/components/responses/NotFound' '/v1/engines': post: tags: - Inference Engines summary: Create Inference Engine description: Create a new inference engine. operationId: createEngine requestBody: required: true content: application/json: schema: type: object properties: name: type: string example: "inference-test-engine" base_model: type: string example: "Llama-3.2-1B-Instruct" task_type: type: string example: "text-generation" resource_management_config: oneOf: - title: Inactive Timeout description: Specifies the period of inactivity, in seconds, after which the deployment will be automatically deactivated if no incoming requests are received. While deactivated, the deployment incurs no charges. When a request is sent to a deactivated deployment, it will be reactivated to process the request; this reactivation process typically introduces a delay of approximately 8 to 20 minutes before the deployment becomes fully operational. type: object properties: management_type: type: string example: "inactive_timeout" management_config: type: object properties: inactive_duration: type: integer example: 3600 description: Period of inactivity - title: Schedule description: Specifies the schedule which the engine will be deployed with deployments (if existing on the engine). type: object properties: management_type: type: string example: "schedule" management_config: type: object properties: timezone: type: string example: "America/Los_Angeles" description: The IANA timezone for the schedule start_time: type: string example: "10:00" description: The start time for the schedule in HH:MM format end_time: type: string example: "18:00" description: The end time for the schedule in HH:MM format days: type: array example: [ "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday" ] items: type: string enum: - monday - tuesday - wednesday - thursday - friday - saturday - sunday description: The days of the week for the schedule required: - base_model responses: '201': description: The created Inference Engine's summary object content: application/json: schema: "$ref": "#/components/schemas/InferenceEngineSummary" '400': $ref: '#/components/responses/InvalidInput' '401': $ref: '#/components/responses/Unauthorized' get: tags: - Inference Engines summary: List Inference Engines description: Returns a list of inference engines operationId: listEngines parameters: - name: base_model in: query description: Filter engines by base model required: false schema: type: string - name: task_type in: query description: Filter engines by task type required: false schema: type: string responses: '200': description: List of Inference Engine's summary object. content: application/json: schema: type: array items: "$ref": "#/components/schemas/InferenceEngineSummary" '401': $ref: '#/components/responses/Unauthorized' '/v1/engines/{engine_id}': get: tags: - Inference Engines summary: Retrieve Inference Engine description: Retrieve Inference Engine's Details operationId: getEngine parameters: - name: engine_id in: path required: true description: The ID of the Inference Engine schema: type: string responses: '200': description: The InferenceEngineDetail object matching the specified ID. content: application/json: schema: "$ref": "#/components/schemas/InferenceEngineDetail" '404': $ref: '#/components/responses/NotFound' delete: tags: - Inference Engines summary: Delete Inference Engine description: Delete an inference engine. operationId: deleteEngineById parameters: - name: engine_id in: path required: true description: The ID of the Inference Engine schema: type: string responses: '204': description: Inference Engine deleted successfully '404': $ref: '#/components/responses/NotFound' '/v1/engines/{engine_id}/deploy': post: tags: - Inference Engines summary: Create Deployment on Inference Engine description: Create deployment on Inference Engine operationId: deployOnEngine parameters: - name: engine_id in: path required: true description: The ID of the Inference Engine schema: type: string requestBody: required: true content: application/json: schema: type: object properties: name: type: string example: "test-deployment" project_id: type: string example: "ms-123465" description: Model Service ID training_id: type: string example: "tr-12345" description: Training Job ID to be deployed checkpoint: type: integer example: 1 description: Checkpoint of the training job to be deployed required: - project_id - training_id - checkpoint responses: '201': description: The created Deployment's summary object content: application/json: schema: "$ref": "#/components/schemas/DeploymentSummary" '400': $ref: '#/components/responses/InvalidInput' '401': $ref: '#/components/responses/Unauthorized' '/v1/engines/{engine_id}/undeploy': post: tags: - Inference Engines summary: Undeploy from Inference Engine description: Undeploy the deployment from the Inference Engine operationId: unDeployFromEngine parameters: - name: engine_id in: path required: true description: The ID of the Inference Engine schema: type: string requestBody: required: true content: application/json: schema: type: object properties: deployment_name: type: string example: "my-deployment" required: - deployment_name responses: '204': description: Undeployed Successfully '400': $ref: '#/components/responses/InvalidInput' '401': $ref: '#/components/responses/Unauthorized' '/v1/engines/{engine_id}/reactivate': post: tags: - Inference Engines summary: Reactivate Inference Engine description: Reactivate a inactive inference engine. operationId: reactivateEngineById parameters: - name: engine_id in: path required: true description: The ID of the Inference Engine schema: type: string responses: '204': description: Inference Engine reactivated successfully '400': description: When reactivate request on engine which is already deployed or in reactivation '404': $ref: '#/components/responses/NotFound' '/v1/engines/{engine_id}/deactivate': post: tags: - Inference Engines summary: Deactivate Inference Engine description: Deactivate an inference engine. operationId: deactivateEngineById parameters: - name: engine_id in: path required: true description: The ID of the Inference Engine schema: type: string responses: '204': description: Inference Engine deactivated successfully '400': description: When deactivate request on engine which is not Active '404': $ref: '#/components/responses/NotFound' '/v1/projects/{project_id}/deployments': post: tags: - Deployments summary: Create deployment description: Create a new deployment. operationId: createDeployment parameters: - $ref: '#/components/parameters/ProjectId' requestBody: required: true content: application/json: schema: type: object properties: training_job_id: type: string example: "tr-12345" description: The training job ID to use for deployment or the model name for base model deployment checkpoint: type: integer example: 4 description: The checkpoint number to use for deployment (0 for base model deployment) name: type: string example: "deployment-1" description: The name of the deployment description: type: string example: "Deployment for my training job" description: A description of the deployment inactive_timeout: type: integer example: 3600 description: Inactive Timeout (seconds) (default = 3600). Specifies the period of inactivity, in seconds, after which the deployment will be automatically deactivated if no incoming requests are received. While deactivated, the deployment incurs no charges. When a request is sent to a deactivated deployment, it will be reactivated to process the request; this reactivation process typically introduces a delay of approximately 8 to 20 minutes before the deployment becomes fully operational. schedule: type: object properties: timezone: type: string example: "America/Los_Angeles" description: The IANA timezone for the schedule start_time: type: string example: "10:00" description: The start time for the schedule in HH:MM format end_time: type: string example: "18:00" description: The end time for the schedule in HH:MM format days: type: array example: ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"] items: type: string enum: - monday - tuesday - wednesday - thursday - friday - saturday - sunday description: The days of the week for the schedule required: - training_job_id - checkpoint responses: '201': description: The created Deployment's summary object content: application/json: schema: "$ref": "#/components/schemas/DeploymentSummary" '400': $ref: '#/components/responses/InvalidInput' get: tags: - Deployments summary: List deployments description: Returns a list of deployments operationId: listDeployments parameters: - $ref: '#/components/parameters/ProjectId' responses: '200': description: List of Deployment's summary object. content: application/json: schema: type: array items: "$ref": "#/components/schemas/DeploymentSummary" '401': $ref: '#/components/responses/Unauthorized' '/v1/projects/{project_id}/deployments/{deployment_id}': get: tags: - Deployments summary: Retrieve deployment description: Retrieve a deployment. operationId: retrieveDeploymentById parameters: - $ref: '#/components/parameters/ProjectId' - name: deployment_id in: path required: true description: The ID of the deployment. schema: type: string responses: '200': description: The DeploymentDetail object matching the specified ID. content: application/json: schema: "$ref": "#/components/schemas/DeploymentDetail" '404': $ref: '#/components/responses/NotFound' delete: tags: - Deployments summary: Delete deployment description: Delete a deployment. operationId: deleteDeploymentById parameters: - $ref: '#/components/parameters/ProjectId' - name: deployment_id in: path required: true description: The ID of the deployment schema: type: string responses: '204': description: Deployment deleted successfully '404': $ref: '#/components/responses/NotFound' '/v1/projects/{project_id}/deployments/{deployment_id}/reactivate': post: tags: - Deployments summary: Reactivate deployment description: Reactivate a deactivated deployment. operationId: reactivateDeploymentById parameters: - $ref: '#/components/parameters/ProjectId' - name: deployment_id in: path required: true description: The ID of the deployment schema: type: string responses: '204': description: Deployment reactivated successfully '400': description: When reactivate request on deployment which is already deployed or in reactivation '404': $ref: '#/components/responses/NotFound' '/v1/completions': post: tags: - Inference summary: Completions description: Get completions from a deployment using the provided input. operationId: getCompletionsFromDeployment requestBody: required: true description: Provide you prompt input for completions content: application/json: schema: type: object properties: model: type: string example: "Llama-3.2-1B-Instruct" description: "Deployment name or shared serverless base model name" prompt: type: string example: "The capital of France is" description: The input prompt for the deployment temperature: type: number example: 0.7 description: The sampling temperature to use max_completion_tokens: type: integer example: 500 description: The maximum number of new tokens to generate top_p: type: number example: 0.9 description: The cumulative probability for token selection top_k: type: integer example: 50 description: The number of highest probability tokens to keep for top-k-filtering required: - model - prompt responses: '200': description: Successful operation content: application/json: schema: "$ref": "#/components/schemas/CompletionObject" '404': $ref: '#/components/responses/NotFound' '400': $ref: '#/components/responses/InvalidInput' '/v1/chat/completions': post: tags: - Inference summary: Chat Completions description: Get chat completions from a serverless deployment. operationId: getChatCompletions requestBody: required: true description: Provide your model and chat input for completions content: application/json: schema: type: object properties: model: type: string example: "Llama-3.2-1B-Completion" description: "Deployment name or shared serverless base model name" messages: type: array items: type: object properties: role: type: string enum: - system - user - assistant example: "user" description: The role of the message sender content: type: string example: "Hello, how are you?" description: The content of the message required: - role - content temperature: type: number example: 0.7 description: The sampling temperature to use max_tokens: type: integer example: 64 description: The maximum number of new tokens to generate top_p: type: number example: 0.9 description: The cumulative probability for token selection top_k: type: integer example: 50 description: The number of highest probability tokens to keep for top-k-filtering required: - model - messages responses: '200': description: Returns a chat completion object, or a streamed sequence of chat completion chunk objects if the request is streamed. content: application/json: schema: "$ref": "#/components/schemas/ChatCompletionObject" '404': $ref: '#/components/responses/NotFound' '400': $ref: '#/components/responses/InvalidInput' '/v1/embeddings': post: tags: - Inference summary: Embeddings description: Get embeddings from a deployment using the provided input. operationId: getEmbeddingsFromDeployment requestBody: required: true description: Provide your input for embeddings content: application/json: schema: type: object properties: model: type: string example: "Llama-3.2-1B-Instruct" description: "Deployment name or shared serverless base model name" input: type: string example: "What is the capital of France?" description: The input for which to get embeddings target: type: string example: "Paris is the capital of France." description: The target sentence for comparison cosine: type: boolean example: true description: Whether to return cosine similarity manhattan: type: boolean example: false description: Whether to return Manhattan distance euclidean: type: boolean example: false description: Whether to return Euclidean distance required: - model - input - target responses: '200': description: Successful operation content: application/json: schema: type: object properties: id: type: string example: "rq-12345" description: The unique identifier for the embedding response: type: object properties: input_embedding: type: array items: type: number description: The embedding vector for the input target_embedding: type: array items: type: number description: The embedding vector for the target cosine_score: type: number description: The cosine similarity between the embeddings if target not none manhattan_distance: type: number description: The Manhattan distance between the embeddings if manhattan = true euclidean_distance: type: number description: The Euclidean distance between the embeddings if euclidean = true '404': $ref: '#/components/responses/NotFound' '400': $ref: '#/components/responses/InvalidInput' '/v1/classification': post: tags: - Inference summary: Classification description: Get classification from a deployment using the provided input. operationId: getClassificationFromDeployment requestBody: required: true description: Provide your input for classification content: application/json: schema: type: object properties: model: type: string example: "Llama-3.2-1B-Instruct" description: "Deployment name or shared serverless base model name" input: type: string example: "category1 related text" description: The input for which to get classification data_format: type: string enum: - logits - probs description: | (Optional) Specifies the format of the classification output data. - If set to `logits` (default), the raw, unnormalized scores from the model are returned. - If set to `probs`, the output will be normalized probabilities. required: - model - input responses: '200': description: Successful operation content: application/json: schema: type: object properties: id: type: string example: "classify-12345" description: The unique identifier for the classification model: type: string example: "dp-12345" data: type: array items: oneOf: - title: logits type: object properties: index: type: number example: 0 logits: type: object description: | A mapping from category labels (as keys) to their corresponding logit values. Present when `data_format` is set to `logits` (default). additionalProperties: type: number format: float example: 0.9453125 example: cat: 0.9453125 dog: 0.01875 rabbit: 0.0859375 turtle: 0.015625 required: - index - logits - title: probs type: object properties: index: type: number example: 0 probs: type: object description: | A mapping from category labels (as keys) to their corresponding probability values (between 0 and 1). Present when `data_format` is set to `probs`. additionalProperties: type: number format: float example: 0.82 example: cat: 0.82 dog: 0.02 rabbit: 0.13 turtle: 0.03 required: - index - probs usage: type: object properties: completion_tokens: type: number example: 0 prompt_tokens: type: number example: 1587 prompt_token_details: type: string total_tokens: type: number example: 1587 '404': $ref: '#/components/responses/NotFound' '400': $ref: '#/components/responses/InvalidInput' '/v1/testing-functions': get: tags: - Testing Functions summary: List Testing Functions description: Returns a list of test functions operationId: listTestFunctions responses: '200': description: List of Testing Function's summary object. content: application/json: schema: type: array items: "$ref": "#/components/schemas/TestingFunctionSummary" '401': $ref: '#/components/responses/Unauthorized' # An object to hold reusable parts that can be used across the definition components: parameters: ProjectId: name: project_id in: path schema: type: string description: The ID of the project required: true schemas: CompletionObject: type: object properties: id: type: string example: "cmpl-8565e8d5db17438291a189255c36c558" object: type: string example: "text_completion" created: type: number example: 1741570283 model: type: string example: "Llama-3.2-1B-Instruct" choices: type: array items: type: object properties: index: type: number example: 0 text: type: string example: " Paris. The French language is a Romance language and the official language of France." logprobs: type: [ string, null ] prompt_logprobs: type: [ string, null ] stop_reason: type: [ string, null ] finish_reason: type: string example: "length" usage: type: object properties: prompt_tokens: type: number example: 6 completion_tokens: type: number example: 16 total_tokens: type: number example: 44 prompt_tokens_details: type: [ string, null ] ChatCompletionObject: type: object properties: id: type: string example: "chatcmpl-B9MHDbslfkBeAs8l4bebGdFOJ6PeG" object: type: string example: "chat.completion" created: type: number example: 1741570283 model: type: string example: "Llama-3.2-1B-Instruct" choices: type: array items: type: object properties: index: type: number example: 0 message: type: object properties: role: type: string example: "assistant" content: type: string example: "How can I help you today?" refusal: type: [string, null] finish_reason: type: string example: "stop" usage: type: object properties: prompt_tokens: type: number example: 36 completion_tokens: type: number example: 8 total_tokens: type: number example: 44 prompt_tokens_details: type: object properties: cached_tokens: type: number example: 0 audio_tokens: type: number example: 0 completion_tokens_details: type: object properties: reasoning_tokens: type: number example: 0 audio_tokens: type: number example: 0 accepted_prediction_tokens: type: number example: 0 rejected_prediction_tokens: type: number example: 0 service_tier: type: string example: "default" system_fingerprint: type: string example: "fp_fc9f1d7035" BaseModelSummary: type: object properties: name: type: string example: "Llama-3.2-1B-Instruct" type: type: string enum: - completion - classification - embeddings - chat example: "completion" parameter_template: type: object additionalProperties: oneOf: - type: string - type: number - type: integer support_serverless: type: boolean example: false ProjectSummary: type: object description: Project Summary Object properties: id: type: string example: "ms-12345" description: The unique identifier for the project name: type: string example: "my_project" description: The name of the project ProjectDetail: type: object description: Project Object properties: id: type: string example: "ms-12345" description: The unique identifier for the project name: type: string example: "my_project" description: The name of the project datasets: type: array items: "$ref": '#/components/schemas/DatasetSummary' training_jobs: type: array items: "$ref": "#/components/schemas/TrainingJobSummary" deployments: type: array items: "$ref": "#/components/schemas/DeploymentSummary" DatasetSummary: type: object description: Dataset Summary Object properties: id: type: string example: "ds-12345" description: The unique identifier for the dataset name: type: string example: "my_dataset" description: The name of the dataset created_at: type: integer example: 1633036800 description: The UNIX timestamp when the dataset was created is_uploaded: type: boolean example: true DatasetDetail: type: object description: Dataset Object properties: id: type: string example: "ds-12345" description: The unique identifier for the dataset name: type: string example: "my_dataset" description: The name of the dataset is_uploaded: type: boolean example: true description: Whether the dataset has been uploaded dataset_download_url: type: string example: "s3:/my_dataset" description: The URL to download the dataset upload_by: type: string example: "user-12345" description: The user ID who uploaded the dataset created_at: type: integer example: 1633036800 description: The timestamp when the dataset was created updated_at: type: integer example: 1633036800 description: The timestamp when the dataset was last updated TestingFunctionSummary: type: object description: Testing Function Summary Object properties: name: type: string example: "similarity_score_shared" description: Name of the test function (matched with exact python method name) content: type: string example: "def similarity_score_shared(input_text: str, expected_output: str, predicted_output: str): \n ..." description: Contents of test function dependencies: type: array example: [ "external-library" ] items: type: string description: List of external dependencies that needs to be downloaded with pip to run TrainingJobSummary: type: object description: Training Job Summary Object properties: id: type: string example: "tr-12345" description: The unique identifier for the training job name: type: string example: "training-1" description: The name of the training job training_technique: type: string enum: - SFT - GRPO example: "SFT" description: "Training technique that has been used for this training job" task_type: type: string enum: - text-generation - classification - embedding - regression example: "classification" base_model_option: type: string enum: - pre-trained - fine-tuned example: "pre-trained" description: "Base model option used for training " base_model: type: string example: "Llama-3.2-1B-Instruct" description: The base model used for training base_fine_tuned_model: type: string example: "training-job-1" description: (base_model_option == "fine-tuned") Fine-tuned model that is used as a base model for the training job base_fine_tuned_model_checkpoint: type: int example: 5 description: (base_model_option == "fine-tuned") Check point that is used as a base model for the training job train_dataset_name: type: string example: "train-dataset-1" description: Name of the dataset used for fine-tuning test_functions: type: array example: [ "test_similarity_shared" ] items: type: string description: List of test function names used in the training job reward: type: object description: (GRPO) Reward weight map properties: test_function_1: type: float example: 0.2 test_function_2: type: float example: 0.8 status: type: string enum: - Pending - Running - Success - Testing - Failed - TimedOut - Cancelled example: "Running" description: The current status of the training job progress: type: integer example: 75 description: The current progress of the training job created_by: type: string example: "user-12345" description: The user ID who created the training job created_at: type: integer example: 1633036800 description: The timestamp when the TrainingJob was created updated_at: type: integer example: 1633036800 description: The timestamp when the TrainingJob was last updated TrainingJobDetail: type: object description: Training Job Object properties: id: type: string example: "tr-12345" description: The unique identifier for the training job name: type: string example: "training-1" description: The name of the training job training_technique: type: string enum: - SFT - GRPO example: "SFT" description: "Training technique that has been used for this training job" task_type: type: string enum: - text-generation - classification - embedding - regression base_model_option: type: string enum: - pre-trained - fine-tuned example: "pre-trained" description: "Base model option used for training " base_model: type: string example: "Llama-3.2-1B-Instruct" description: The base model used for training base_fine_tuned_model: type: string example: "training-job-1" description: (base_model_option == "fine-tuned") Fine-tuned model that is used as a base model for the training job base_fine_tuned_model_checkpoint: type: int example: 5 description: (base_model_option == "fine-tuned") Check point that is used as a base model for the training job test_functions: type: array example: [ "test_similarity_shared" ] items: type: string description: List of test function names used in the training job reward: type: object description: (GRPO) Reward weight map properties: test_function_1: type: float example: 0.2 test_function_2: type: float example: 0.8 status: type: string enum: - Pending - Running - Testing - Success - Failed - TimedOut - Cancelled example: "Running" description: The current status of the training job progress: type: integer example: 75 description: The current progress of the training job train_dataset: "$ref": "#/components/schemas/DatasetSummary" test_dataset: "$ref": "#/components/schemas/DatasetSummary" training_loss: type: array items: type: object properties: timestamp: type: number example: 1633036800.0 loss: type: number example: 0.1234 hyper_parameters: type: object additionalProperties: oneOf: - type: string - type: number - type: integer - type: boolean created_by: type: string example: "user-12345" description: The user ID who created the training job created_at: type: integer example: 1633036800 description: The timestamp when the TrainingJob was created updated_at: type: integer example: 1633036800 description: The timestamp when the TrainingJob was last updated TrainingJobCheckpoint: type: object description: Training Job Checkpoint Object properties: checkpoint: type: integer example: 3 description: The checkpoint number model_download_url: type: string example: "s3:/my_model" description: The URL to download the model checkpoint test_results: type: object properties: general_test_result: type: string example: "s3://" aggregated_test_result: type: object additionalProperties: type: string created_at: type: integer example: 1633036800 description: The timestamp when the checkpoint was created updated_at: type: integer example: 1633036800 description: The timestamp when the checkpoint was created InferenceEngineSummary: type: object description: Inference Engine Summary properties: id: type: string example: "engine-12345" description: The unique identifier for the inference engine name: type: string example: "inference-test-engine" status: type: string enum: - Creating - Active - Inactive example: Creating base_model: type: string example: "Llama-3.2-1B-Instruct" server_type: type: string example: "on-demand" task_type: type: string example: "text-generation" deployment_counts: type: integer example: 1 description: Number of deployments on this inference engine created_at: type: integer example: 1633036800 description: The timestamp when the deployment was created InferenceEngineDetail: type: object description: Inference Engine Object properties: id: type: string example: "engine-12345" description: The unique identifier for the inference engine name: type: string example: "test-inference-engine" status: type: string enum: - Creating - Active - Inactive example: "Active" base_model: type: string example: "Llama-3.2-3B-Instruct" task_type: type: string example: "text-generation" server_type: type: string example: "on-demand" deployments: type: array items: "$ref": "#/components/schemas/DeploymentSummary" base_model_option: type: string example: "pre-trained" last_access_timestamp: type: integer example: 1633036800 resource_management_config: oneOf: - title: Inactive Timeout description: Specifies the period of inactivity, in seconds, after which the deployment will be automatically deactivated if no incoming requests are received. While deactivated, the deployment incurs no charges. When a request is sent to a deactivated deployment, it will be reactivated to process the request; this reactivation process typically introduces a delay of approximately 8 to 20 minutes before the deployment becomes fully operational. type: object properties: management_type: type: string example: "inactive_timeout" management_config: type: object properties: inactive_duration: type: integer example: 3600 description: Period of inactivity - title: Schedule description: Specifies the schedule which the engine will be deployed with deployments (if existing on the engine). type: object properties: management_type: type: string example: "schedule" management_config: type: object properties: timezone: type: string example: "America/Los_Angeles" description: The IANA timezone for the schedule start_time: type: string example: "10:00" description: The start time for the schedule in HH:MM format end_time: type: string example: "18:00" description: The end time for the schedule in HH:MM format days: type: array example: [ "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday" ] items: type: string enum: - monday - tuesday - wednesday - thursday - friday - saturday - sunday description: The days of the week for the schedule created_at: type: integer example: 1633036800 description: The timestamp when the deployment was last updated DeploymentSummary: type: object description: Deployment Summary Object properties: id: type: string example: "dp-12345" description: The unique identifier for the deployment name: type: string example: "deployment-1" description: The name of the deployment status: type: string enum: - Pending - Deploying - Deployed - Failed - Cancelled - Terminated - Deactivated - Reactivating - TimedOut example: "Deployed" description: The current status of the deployment created_at: type: integer example: 1633036800 description: The timestamp when the deployment was created DeploymentDetail: type: object description: Deployment Object properties: id: type: string example: "dp-12345" description: The unique identifier for the deployment name: type: string example: "deployment-1" description: The name of the deployment task_type: type: string example: "classification" description: Task type of the deployment metadata: description: Metadata of the deployment (varies by task_type) oneOf: - title: ClassificationMetadata description: Metadata for classification tasks type: object properties: labels: type: array items: type: string example: ["class1", "class2"] description: type: string example: "Deployment for my training job" description: A description of the deployment base_model: type: string example: "Llama-3.2-1B-Instruct" description: The base model used for deployment status: type: string enum: - Pending - Deploying - Deployed - Failed - Cancelled - Terminated - Deactivated - Reactivating - TimedOut example: "Deployed" description: The current status of the deployment training_id: type: string example: "tr-12345" description: The training job ID used for deployment checkpoint: type: integer example: 4 description: The checkpoint number used for deployment created_at: type: integer example: 1633036800 description: The timestamp when the deployment was created updated_at: type: integer example: 1633036800 description: The timestamp when the deployment was last updated ApiResponse: type: object properties: code: type: integer format: int32 type: type: string message: type: string ApiErrorInvalidInput: type: object required: - status - error properties: status: type: integer format: int32 example: 400 error: type: string example: Bad request ApiErrorNotFound: type: object required: - status - error - code properties: status: type: integer format: int32 example: 404 error: type: string example: Not found code: type: string example: object_not_found ApiErrorUnauthorized: type: object required: - status - error properties: status: type: integer format: int32 example: 401 error: type: string example: Unauthorized responses: Unauthorized: description: Unauthorized error content: application/json: schema: "$ref": "#/components/schemas/ApiErrorUnauthorized" NotFound: description: Not Found error content: application/json: schema: "$ref": "#/components/schemas/ApiErrorNotFound" InvalidInput: description: Invalid Input error content: application/json: schema: "$ref": "#/components/schemas/ApiErrorInvalidInput" headers: ExpiresAfter: description: date in UTC when token expires schema: type: string format: date-time # Security scheme definitions that can be used across the definition. securitySchemes: api_key: type: apiKey name: X-API-Key in: header