{ "openapi": "3.1.0", "info": { "title": "Cedana API", "description": "", "license": { "name": "" }, "version": "2.0.17" }, "paths": { "/v2/actions": { "get": { "tags": [ "Actions" ], "summary": "List actions", "operationId": "list", "parameters": [ { "name": "type", "in": "query", "required": false, "schema": { "type": [ "string", "null" ] } } ], "responses": { "200": { "description": "Returns the actions with status", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/Action" } }, "example": "" } } }, "400": { "description": "Failed list", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to connect/fetch from jobs table from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/actions/from_pod/{id}": { "get": { "tags": [ "Actions" ], "summary": "Get latest action belonging to a pod", "operationId": "latest_pod_action_id", "parameters": [ { "name": "id", "in": "path", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "The action_id of the given pod_id has been fetched successfully", "content": { "text/plain": { "schema": { "type": "string" }, "example": "" } } }, "404": { "description": "Failed to get action_id for the given pod_id", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to connect/fetch from pods_to_actions table from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/actions/upload": { "post": { "tags": [ "Actions" ], "summary": "Upload pod spec", "operationId": "upload_podspec", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CheckpointStatusWithPodSpec" } } }, "required": true }, "responses": { "200": { "description": "Success", "content": { "text/plain": { "schema": { "type": "string" }, "example": "Success" } } }, "404": { "description": "Failed to get action_id within the db", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to connect/fetch from pods_to_actions table from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/checkpoint/path": { "get": { "tags": [ "Actions" ], "summary": "Get path of latest checkpoint", "description": "Returns the path of the latest successful checkpoint for a given checkpoint name", "operationId": "get_latest_checkpoint_for_container_in_action", "responses": { "200": { "description": "Returns the restore path", "content": { "text/plain": { "schema": { "type": "string" }, "example": "" } } }, "400": { "description": "Failed to get action for request", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to connect/fetch from jobs table from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/checkpoint/pod": { "post": { "tags": [ "Actions" ], "summary": "Checkpoint pod", "operationId": "checkpoint_pod", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CheckpointPod" } } }, "required": true }, "responses": { "200": { "description": "Returns the ID for the task", "content": { "text/plain": { "schema": { "type": "string" }, "example": "550e8400-e29b-41d4-a716-446655440000" } } }, "400": { "description": "Invalid request parameters", "content": { "text/plain": { "schema": { "type": "string" } } } }, "404": { "description": "Failed to find checkpointable resource", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to create checkpoint or connect to eventstream", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/checkpoint/status/{action_id}": { "get": { "tags": [ "Actions" ], "summary": "Get status of checkpoint action", "description": "Returns the current status of a checkpoint action", "operationId": "get_checkpoint_status", "parameters": [ { "name": "action_id", "in": "path", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Returns the current status of the checkpoint operation", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/StatusResponse" } } } }, "404": { "description": "Action ID not found", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to retrieve status from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/checkpoints": { "get": { "tags": [ "Checkpoints" ], "summary": "List checkpoints", "description": "Use query params to filter checkpoints. Supports filtering by `ids` (comma-separated UUIDs for single or multiple checkpoints)", "operationId": "list", "parameters": [ { "name": "ids", "in": "query", "description": "Comma-separated list of checkpoint UUIDs to filter by", "required": false, "schema": { "type": [ "string", "null" ] } } ], "responses": { "200": { "description": "Returns checkpoints", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/Checkpoint" } } } } }, "400": { "description": "Invalid query parameters", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to connect/fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] }, "post": { "tags": [ "Checkpoints" ], "summary": "Create checkpoint", "description": "Builds a new checkpoint without the metadata and information about the checkpoint with status initializing", "operationId": "add", "responses": { "200": { "description": "Returns the checkpoint ID for the new checkpoint", "content": { "text/plain": { "schema": { "type": "string" }, "example": "550e8400-e29b-41d4-a716-446655440000" } } }, "500": { "description": "Failed to create checkpoint in database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/checkpoints/deprecate/{id}": { "patch": { "tags": [ "Checkpoints" ], "summary": "Deprecate checkpoint", "description": "Marks checkpoint as deprecated, so that they can be removed on next internal state cleanup", "operationId": "deprecate", "parameters": [ { "name": "id", "in": "path", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Successfully deprecated checkpoint and returns id", "content": { "text/plain": { "schema": { "type": "string" }, "example": "550e8400-e29b-41d4-a716-446655440000" } } }, "400": { "description": "Invalid UUID provided", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to execute request against database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/checkpoints/info/{id}": { "put": { "tags": [ "Checkpoints" ], "summary": "Put checkpoint info", "description": "Add information about the checkpoint", "operationId": "update_info", "parameters": [ { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } } ], "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CheckpointInfo" } } }, "required": true }, "responses": { "200": { "description": "Successfully updated checkpoint info and returns ID", "content": { "text/plain": { "schema": { "type": "string" }, "example": "550e8400-e29b-41d4-a716-446655440000" } } }, "500": { "description": "Failed to execute request against database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/checkpoints/uploaded/{id}": { "post": { "tags": [ "Checkpoints" ], "summary": "Uploaded checkpoint", "description": "Marks checkpoint as successfully uploaded and ready for restore", "operationId": "uploaded", "parameters": [ { "name": "id", "in": "path", "required": true, "schema": { "type": "string" } } ], "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CheckpointSuccessInfo" } } }, "required": true }, "responses": { "200": { "description": "Marks the checkpoint as successfully uploaded", "content": { "text/plain": { "schema": { "type": "string" }, "example": "550e8400-e29b-41d4-a716-446655440000" } } }, "400": { "description": "Invalid request parameters", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to update checkpoint in database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/cluster": { "get": { "tags": [ "Clusters" ], "summary": "List clusters", "description": "Returns all clusters associated with the authenticated user's organization", "operationId": "list", "responses": { "200": { "description": "Returns a list of clusters", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/Cluster" } } } } }, "500": { "description": "Failed to fetch clusters from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] }, "post": { "tags": [ "Clusters" ], "summary": "Create cluster", "description": "Creates or updates info regarding a Kubernetes cluster", "operationId": "create", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateClusterRequest" } } }, "required": true }, "responses": { "200": { "description": "Returns the cluster id for the newly created or updated cluster", "content": { "text/plain": { "schema": { "type": "string" }, "example": "550e8400-e29b-41d4-a716-446655440000" } } }, "400": { "description": "Bad request or missing cluster name", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to create cluster in database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/cluster/count": { "get": { "tags": [ "Resources" ], "summary": "Get clusters total count", "operationId": "clusters_count", "responses": { "200": { "description": "Total count retrieved successfully", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TotalCountResponse" } } } }, "500": { "description": "Database error", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/cluster/sync": { "post": { "tags": [ "Clusters" ], "summary": "Sync cluster", "description": "Syncs cluster resources such as pods, jobs, and nodes", "operationId": "sync", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ClusterSyncRequest" } } }, "required": true }, "responses": { "200": { "description": "Resource id registered internally", "content": { "text/plain": { "schema": { "type": "string" } } } }, "400": { "description": "Bad request", "content": { "text/plain": { "schema": { "type": "string" } } } }, "404": { "description": "Cluster or node not found", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to sync resource to database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/cluster/workload": { "post": { "tags": [ "Clusters" ], "summary": "Create workload", "description": "Takes a cluster name and workload specification, then creates a Kubernetes job\non the specified cluster", "operationId": "create_workload", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/WorkloadReq" } } }, "required": true }, "responses": { "200": { "description": "Added workload to queue", "content": { "text/plain": { "schema": { "type": "string" } } } }, "400": { "description": "Error parsing body", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Server error", "content": { "text/plain": { "schema": { "type": "string" } } } } } } }, "/v2/cluster/{id}": { "delete": { "tags": [ "Clusters" ], "summary": "Delete cluster", "description": "This endpoint deletes a cluster and all its dependent entities in a single transaction\nwhich is rolled back if any part of the deletion fails", "operationId": "delete", "parameters": [ { "name": "id", "in": "path", "required": true, "schema": { "type": "string", "format": "uuid" } } ], "responses": { "200": { "description": "Delete the given cluster, and return Status OK" }, "404": { "description": "Cluster not found", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to delete cluster from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/discover/{name}": { "get": { "tags": [ "Service" ], "summary": "Service discovery", "operationId": "discovery", "parameters": [ { "name": "name", "in": "path", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Returns url for the service named", "content": { "text/plain": { "schema": { "type": "string" } } } }, "404": { "description": "Service not found", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to fetch service configuration", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/events": { "get": { "tags": [ "Events" ], "summary": "List events", "description": "Returns events within the specified time window, optionally filtered by operation or resource type.\nThe metadata field contains structured information about the event that can be used for filtering and display.", "operationId": "list_events", "parameters": [ { "name": "time", "in": "query", "description": "Time window in seconds (default: 3600 = 1 hour)", "required": false, "schema": { "type": "integer", "format": "int32" } }, { "name": "operation", "in": "query", "description": "Filter by operation (e.g., \"checkpoint\", \"restore\")", "required": false, "schema": { "type": [ "string", "null" ] } }, { "name": "resource_type", "in": "query", "description": "Filter by resource type (e.g., \"pod\", \"action\", \"checkpoint\")", "required": false, "schema": { "type": [ "string", "null" ] } } ], "responses": { "200": { "description": "Returns list of events", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/Event" } } } } }, "500": { "description": "Failed to fetch events from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/files/dir/{path}": { "get": { "tags": [ "Files" ], "summary": "Read directory contents", "operationId": "read_dir", "parameters": [ { "name": "path", "in": "path", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Returns the list of files in the directory", "content": { "application/json": { "schema": { "type": "array", "items": { "type": "string" } }, "example": [ "file1.txt", "file2.txt", "subdir/" ] } } }, "500": { "description": "Failed to list directory contents", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/files/{path}": { "get": { "tags": [ "Files" ], "summary": "Download file", "operationId": "get_file", "parameters": [ { "name": "path", "in": "path", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Returns the URL to download the file from", "content": { "text/plain": { "schema": { "type": "string" }, "example": "https://storage.googleapis.com/bucket/file.tar.gz?signature=..." } } }, "500": { "description": "Failed to generate download URL", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] }, "put": { "tags": [ "Files" ], "summary": "Upload file", "operationId": "push_files", "parameters": [ { "name": "path", "in": "path", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Returns the URL to upload the file to", "content": { "text/plain": { "schema": { "type": "string" }, "example": "https://storage.googleapis.com/bucket/file.tar.gz?signature=..." } } }, "500": { "description": "Failed to generate upload URL", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/instances": { "get": { "tags": [ "Cloud Bursting" ], "summary": "List instances", "operationId": "list_instances", "responses": { "200": { "description": "List of instances", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/InstanceInfo" } } } } }, "500": { "description": "Internal server error", "content": { "text/plain": { "schema": { "type": "string" } } } } } }, "post": { "tags": [ "Cloud Bursting" ], "summary": "Create instance", "operationId": "create_instance", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateInstanceRequest" } } }, "required": true }, "responses": { "200": { "description": "Instance created successfully", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CreateInstanceResponse" } } } }, "400": { "description": "Invalid request", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Internal server error", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/instances/bursting-status": { "get": { "tags": [ "Cloud Bursting" ], "summary": "Get configuration status", "operationId": "get_bursting_status", "responses": { "200": { "description": "Bursting configuration status", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/BurstingStatus" } } } } } } }, "/v2/instances/types": { "get": { "tags": [ "Cloud Bursting" ], "summary": "Get available instance types", "operationId": "get_instance_types", "parameters": [ { "name": "num_gpus", "in": "query", "description": "Filter by number of GPUs", "required": false, "schema": { "type": "integer", "format": "int32" } }, { "name": "gpu_type", "in": "query", "description": "Filter by GPU type", "required": false, "schema": { "type": "string" } }, { "name": "sort", "in": "query", "description": "Sort results (e.g., 'price')", "required": false, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Available instance types", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/InstanceType" } } } } }, "500": { "description": "Internal server error", "content": { "text/plain": { "schema": { "type": "string" } } } } } } }, "/v2/instances/{instance_id}": { "delete": { "tags": [ "Cloud Bursting" ], "summary": "Delete instance", "operationId": "delete_instance", "parameters": [ { "name": "instance_id", "in": "path", "description": "Instance ID to delete", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Instance deleted successfully", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to delete instance", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/instances/{instance_id}/status": { "get": { "tags": [ "Cloud Bursting" ], "summary": "Get instance status", "operationId": "get_instance_status", "parameters": [ { "name": "instance_id", "in": "path", "description": "Instance ID", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Instance status", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/InstanceStatus" } } } }, "404": { "description": "Instance not found", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to get status", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/jobs": { "get": { "tags": [ "Resources" ], "summary": "List jobs", "operationId": "job_list", "responses": { "200": { "description": "Returns jobs", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/JobResponse" } } } } }, "500": { "description": "Failed to fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/jobs/count": { "get": { "tags": [ "Resources" ], "summary": "Get jobs total count", "operationId": "jobs_count", "responses": { "200": { "description": "Total count retrieved successfully", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TotalCountResponse" } } } }, "500": { "description": "Database error", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/jobs/namespaces": { "get": { "tags": [ "Resources" ], "summary": "Get job namespaces", "operationId": "job_namespaces", "responses": { "200": { "description": "Available namespaces retrieved successfully", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/NamespacesResponse" } } } }, "500": { "description": "Database error", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/jobs/paginated": { "get": { "tags": [ "Resources" ], "summary": "List jobs (paginated)", "operationId": "job_list_paginated", "parameters": [ { "name": "limit", "in": "query", "required": false, "schema": { "type": [ "integer", "null" ], "format": "int64" } }, { "name": "offset", "in": "query", "required": false, "schema": { "type": [ "integer", "null" ], "format": "int64" } }, { "name": "sort", "in": "query", "required": false, "schema": { "type": [ "string", "null" ] } }, { "name": "ascending", "in": "query", "required": false, "schema": { "type": [ "boolean", "null" ] } }, { "name": "status", "in": "query", "required": false, "schema": { "type": [ "string", "null" ] } }, { "name": "namespace", "in": "query", "required": false, "schema": { "type": [ "string", "null" ] } }, { "name": "job_name", "in": "query", "description": "job name to query against (uses postgres ILIKE pattern search)", "required": false, "schema": { "type": [ "string", "null" ] } } ], "responses": { "200": { "description": "Returns paginated jobs with total count and available filters", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/PaginatedJobResponse" } } } }, "500": { "description": "Failed to fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/jobs/statuses": { "get": { "tags": [ "Resources" ], "summary": "Get job statuses", "operationId": "job_statuses", "responses": { "200": { "description": "Available statuses retrieved successfully", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/StatusesResponse" } } } }, "500": { "description": "Database error", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/logs/k8": { "get": { "tags": [ "Logs" ], "summary": "Get logs (K8s)", "operationId": "get_k8_logs", "parameters": [ { "name": "pod", "in": "query", "required": false, "schema": { "type": [ "string", "null" ] } }, { "name": "limit", "in": "query", "required": false, "schema": { "type": [ "integer", "null" ], "format": "int64" } } ], "responses": { "200": { "description": "Returns k8 pod logs from duckdb", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/PodLogList" } } } } }, "500": { "description": "Failed to connect/fetch from logs table from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/metrics/k8": { "get": { "tags": [ "Metrics" ], "summary": "Get metrics (K8s)", "operationId": "get_k8_metrics", "responses": { "200": { "description": "Returns k8 metrics from duckdb", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/NodeResourceMetricList" } } } } }, "500": { "description": "Failed to connect/fetch from metrics table from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/metrics/slurm": { "get": { "tags": [ "Metrics" ], "summary": "Get metrics (Slurm)", "operationId": "get_slurm_metrics", "responses": { "200": { "description": "Returns top 10 latest SLURM metrics from duckdb", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/SlurmMetric" } } } } }, "500": { "description": "Failed to connect/fetch from SLURM metrics table in database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/metrics/slurm/overview": { "get": { "tags": [ "Metrics" ], "summary": "Get cluster overview (Slurm)", "operationId": "get_slurm_cluster_overview", "responses": { "200": { "description": "Returns latest SLURM cluster overview statistics", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SlurmClusterOverview" } } } }, "500": { "description": "Failed to fetch SLURM cluster overview", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/metrics/slurm/overview/state": { "get": { "tags": [ "Metrics" ], "summary": "Get state breakdown (Slurm)", "operationId": "get_slurm_state_breakdown", "responses": { "200": { "description": "Returns state breakdown for CPUs and nodes", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SlurmStateBreakdown" } } } }, "500": { "description": "Failed to fetch SLURM state breakdown", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/metrics/slurm/partitions": { "get": { "tags": [ "Metrics" ], "summary": "Get partition statistics (Slurm)", "operationId": "get_partition_stats", "responses": { "200": { "description": "Returns list of all partitions with their statistics", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/PartitionStats" } } } } }, "500": { "description": "Failed to fetch partition statistics", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/metrics/slurm/partitions/{name}/timeseries": { "get": { "tags": [ "Metrics" ], "summary": "Get partition time series (Slurm)", "description": "Returns CPU load and free memory time series for the specified partition over the last 10 data\npoints", "operationId": "get_partition_timeseries", "parameters": [ { "name": "name", "in": "path", "description": "Partition name to query timeseries for", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Returns time series data for the specified partition", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/PartitionTimeSeries" } } } }, "500": { "description": "Failed to fetch partition time series", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/metrics/slurm/timeseries/cpu-load": { "get": { "tags": [ "Metrics" ], "summary": "Get CPU load time series (Slurm)", "operationId": "get_cpu_load_timeseries", "responses": { "200": { "description": "Returns CPU load time series (last 10 data points)", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/TimeSeriesDataPoint" } } } } }, "500": { "description": "Failed to fetch CPU load time series", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/metrics/slurm/timeseries/memory": { "get": { "tags": [ "Metrics" ], "summary": "Get memory time series (Slurm)", "operationId": "get_memory_timeseries", "responses": { "200": { "description": "Returns memory time series (last 10 data points for free and allocated)", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/MemoryTimeSeries" } } } }, "500": { "description": "Failed to fetch memory time series", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/nodes": { "get": { "tags": [ "Resources" ], "summary": "List nodes", "description": "Will only return nodes from clusters with status 'active' if no params are provided. Also, nodes\nwith last_sync older than 5 minutes are not returned.", "operationId": "node_list", "parameters": [ { "name": "id", "in": "query", "required": false, "schema": { "type": [ "string", "null" ], "format": "uuid" } } ], "responses": { "200": { "description": "Returns nodes", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/NodeResponse" } } } } }, "500": { "description": "Failed to fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/nodes/count": { "get": { "tags": [ "Resources" ], "summary": "Get nodes total count", "operationId": "nodes_count", "responses": { "200": { "description": "Total count retrieved successfully", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TotalCountResponse" } } } }, "500": { "description": "Database error", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/pods": { "get": { "tags": [ "Resources" ], "summary": "List pods", "description": "Will not return pods with status 'deleted', and only from clusters with status 'active' and\npods belonging to nodes whose last_sync is within the last 5 minutes.", "operationId": "pod_list", "parameters": [ { "name": "id", "in": "query", "required": false, "schema": { "type": [ "string", "null" ], "format": "uuid" } } ], "responses": { "200": { "description": "Returns pods", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/PodResponse" } } } } }, "500": { "description": "Failed to fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/pods/count": { "get": { "tags": [ "Resources" ], "summary": "Get total count of pods", "operationId": "pods_count", "responses": { "200": { "description": "Total count retrieved successfully", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TotalCountResponse" } } } }, "500": { "description": "Database error", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/pods/namespaces": { "get": { "tags": [ "Resources" ], "summary": "List pod namespaces", "operationId": "pod_namespaces", "responses": { "200": { "description": "Returns distinct pod namespaces", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/NamespacesResponse" } } } }, "500": { "description": "Failed to fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/pods/paginated": { "get": { "tags": [ "Resources" ], "summary": "List pods (paginated)", "operationId": "pod_list_paginated", "parameters": [ { "name": "id", "in": "query", "required": false, "schema": { "type": [ "string", "null" ], "format": "uuid" } }, { "name": "limit", "in": "query", "required": false, "schema": { "type": [ "integer", "null" ], "format": "int64" } }, { "name": "offset", "in": "query", "required": false, "schema": { "type": [ "integer", "null" ], "format": "int64" } }, { "name": "sort", "in": "query", "required": false, "schema": { "type": [ "string", "null" ] } }, { "name": "ascending", "in": "query", "required": false, "schema": { "type": [ "boolean", "null" ] } }, { "name": "status", "in": "query", "required": false, "schema": { "type": [ "string", "null" ] } }, { "name": "namespace", "in": "query", "required": false, "schema": { "type": [ "string", "null" ] } }, { "name": "pod_name", "in": "query", "description": "pod name to query against (uses postgres ILIKE pattern search)", "required": false, "schema": { "type": [ "string", "null" ] } } ], "responses": { "200": { "description": "Returns paginated pods with total count and available filters", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/PaginatedPodResponse" } } } }, "500": { "description": "Failed to fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/pods/statuses": { "get": { "tags": [ "Resources" ], "summary": "List pod statuses", "operationId": "pod_statuses", "responses": { "200": { "description": "Returns distinct pod statuses", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/StatusesResponse" } } } }, "500": { "description": "Failed to fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/policy/create": { "post": { "tags": [ "Policy" ], "summary": "Create policy", "operationId": "policy_create", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/PolicyRequest" } } }, "required": true }, "responses": { "200": { "description": "Policy created successfully", "content": { "text/plain": { "schema": { "type": "string" } } } }, "400": { "description": "Invalid request body", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to create policy in policy table", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/policy/delete": { "delete": { "tags": [ "Policy" ], "summary": "Delete policy", "operationId": "policy_delete", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/PolicyDeleteRequest" } } }, "required": true }, "responses": { "200": { "description": "Policy deleted successfully", "content": { "text/plain": { "schema": { "type": "string" } } } }, "400": { "description": "Invalid request body", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to delete policy from policy table", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/policy/list": { "get": { "tags": [ "Policy" ], "summary": "List policies", "operationId": "policy_list", "responses": { "200": { "description": "Policies retrieved successfully", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/PolicyResponse" } } } } }, "400": { "description": "Invalid request body", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to retriev policies from policy table", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/policy/update": { "post": { "tags": [ "Policy" ], "summary": "Update policy", "operationId": "policy_update", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/PolicyRequest" } } }, "required": true }, "responses": { "200": { "description": "Policy updated successfully", "content": { "text/plain": { "schema": { "type": "string" } } } }, "400": { "description": "Invalid request body", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to update policy from policy table", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/policy/{policy_id}/pods": { "get": { "tags": [ "Policy" ], "summary": "Get pods monitored by a specific policy", "operationId": "policy_pods", "parameters": [ { "name": "policy_id", "in": "path", "description": "Policy UUID", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Returns pods currently monitored by the policy", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/PolicyPodResponse" } } } } }, "400": { "description": "Invalid policy_id format", "content": { "text/plain": { "schema": { "type": "string" } } } }, "404": { "description": "Policy not found", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to connect/fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/restore/pod": { "post": { "tags": [ "Actions" ], "summary": "Restore pod", "operationId": "restore_pod", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RestorePod" } } }, "required": true }, "responses": { "200": { "description": "Returns the checkpoint ID for the new checkpoint", "content": { "text/plain": { "schema": { "type": "string" }, "example": "550e8400-e29b-41d4-a716-446655440000" } } }, "400": { "description": "Invalid action_id or request parameters", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to create restore or connect to eventstream", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/slurm/checkpoint/job": { "post": { "tags": [ "Actions (Slurm)" ], "summary": "Checkpoint job", "operationId": "checkpoint_slurm_job", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CheckpointSlurmJob" } } }, "required": true }, "responses": { "200": { "description": "Returns the id for the task", "content": { "text/plain": { "schema": { "type": "string" }, "example": "" } } }, "404": { "description": "Failed to find checkpointable resource", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to connect/fetch from jobs table from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/slurm/clusters": { "get": { "tags": [ "Clusters (Slurm)" ], "summary": "List clusters", "description": "Use query params to filter clusters", "operationId": "list", "responses": { "200": { "description": "Returns clusters", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/SlurmCluster" } } } } }, "500": { "description": "Failed to connect/fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/slurm/clusters_paginated": { "get": { "tags": [ "Clusters (Slurm)" ], "summary": "List clusters (paginated)", "description": "Use query params to filter clusters", "operationId": "list_paginated", "parameters": [ { "name": "offset", "in": "query", "description": "Number of records to skip (default: 0)", "required": false, "schema": { "type": "integer", "format": "int32" } }, { "name": "limit", "in": "query", "description": "Maximum number of records to return (default: 50, max: 100)", "required": false, "schema": { "type": "integer", "format": "int32" } } ], "responses": { "200": { "description": "Returns clusters", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/SlurmCluster" } } } } }, "500": { "description": "Failed to connect/fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/slurm/jobs": { "get": { "tags": [ "Jobs (Slurm)" ], "summary": "List jobs", "description": "Use query params to filter jobs", "operationId": "list", "responses": { "200": { "description": "Returns jobs", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/SlurmJob" } } } } }, "500": { "description": "Failed to connect/fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/slurm/jobs_paginated": { "get": { "tags": [ "Jobs (Slurm)" ], "summary": "List jobs (paginated)", "description": "Use query params to filter jobs", "operationId": "list_paginated", "parameters": [ { "name": "offset", "in": "query", "description": "Number of records to skip (default: 0)", "required": false, "schema": { "type": "integer", "format": "int32" } }, { "name": "limit", "in": "query", "description": "Maximum number of records to return (default: 50, max: 100)", "required": false, "schema": { "type": "integer", "format": "int32" } } ], "responses": { "200": { "description": "Returns jobs", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/SlurmJob" } } } } }, "500": { "description": "Failed to connect/fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/slurm/nodes": { "get": { "tags": [ "Nodes (Slurm)" ], "summary": "List nodes", "description": "Use query params to filter nodes", "operationId": "list", "responses": { "200": { "description": "Returns nodes", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/SlurmNode" } } } } }, "500": { "description": "Failed to connect/fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/slurm/nodes_paginated": { "get": { "tags": [ "Nodes (Slurm)" ], "summary": "List nodes (paginated)", "description": "Use query params to filter nodes", "operationId": "list_paginated", "parameters": [ { "name": "offset", "in": "query", "description": "Number of records to skip (default: 0)", "required": false, "schema": { "type": "integer", "format": "int32" } }, { "name": "limit", "in": "query", "description": "Maximum number of records to return (default: 50, max: 100)", "required": false, "schema": { "type": "integer", "format": "int32" } } ], "responses": { "200": { "description": "Returns nodes", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/SlurmNode" } } } } }, "500": { "description": "Failed to connect/fetch from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/slurm/restore/job": { "post": { "tags": [ "Actions (Slurm)" ], "summary": "Restore job", "operationId": "restore_slurm_job", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/RestoreSlurmJob" } } }, "required": true }, "responses": { "200": { "description": "Returns the checkpoint id for the new checkpoint", "content": { "text/plain": { "schema": { "type": "string" }, "example": "" } } }, "400": { "description": "Failed create checkpoint", "content": { "text/plain": { "schema": { "type": "string" } } } }, "404": { "description": "Failed create restore", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Failed to connect/fetch from jobs table from database", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } }, "/v2/user": { "get": { "tags": [ "User" ], "summary": "Get user", "description": "Used to validate authentication token", "operationId": "get", "responses": { "200": { "description": "Returns UUID", "content": { "text/plain": { "schema": { "type": "string" } } } }, "401": { "description": "Invalid API key", "content": { "text/plain": { "schema": { "type": "string" } } } }, "404": { "description": "User not found", "content": { "text/plain": { "schema": { "type": "string" } } } }, "500": { "description": "Internal server error", "content": { "text/plain": { "schema": { "type": "string" } } } } }, "security": [ { "Auth Token": [] } ] } } }, "components": { "schemas": { "Action": { "type": "object", "required": [ "action_id", "checkpoint_id", "status", "type", "details", "gpu", "platform" ], "properties": { "action_id": { "type": "string", "format": "uuid" }, "action_timestamp": { "type": [ "string", "null" ], "format": "date-time" }, "checkpoint_completed_timestamp": { "type": [ "string", "null" ], "format": "date-time" }, "checkpoint_id": { "type": "string", "format": "uuid" }, "details": {}, "gpu": { "type": "string" }, "node_name": { "type": [ "string", "null" ] }, "platform": { "type": "string" }, "reason": { "type": [ "string", "null" ] }, "status": { "type": "string" }, "total_duration": { "type": [ "integer", "null" ], "format": "int64" }, "total_io": { "type": [ "integer", "null" ], "format": "int64" }, "type": { "type": "string" } } }, "Alert": { "type": "object", "properties": { "date_threshold": { "type": [ "string", "null" ] }, "spend_threshold": { "type": [ "string", "null" ] } } }, "AutoDelete": { "type": "object", "properties": { "date_threshold": { "type": [ "string", "null" ] }, "spend_threshold": { "type": [ "string", "null" ] } } }, "AvailableFilters": { "type": "object", "required": [ "statuses", "namespaces" ], "properties": { "namespaces": { "type": "array", "items": { "type": "string" } }, "statuses": { "type": "array", "items": { "type": "string" } } } }, "BurstingConfigStatus": { "type": "string", "enum": [ "not_configured", "configured", "invalid" ] }, "BurstingStatus": { "type": "object", "required": [ "status", "message", "can_use_bursting" ], "properties": { "can_use_bursting": { "type": "boolean" }, "message": { "type": "string" }, "status": { "$ref": "#/components/schemas/BurstingConfigStatus" } } }, "Checkpoint": { "type": "object", "description": "Checkpoint is basic unit for cedana service operation\nit stores all the information regarding the snapshot we can use to save and then restore state", "required": [ "id", "status", "gpu", "platform" ], "properties": { "checksum": { "type": [ "string", "null" ] }, "gpu": { "type": "string" }, "id": { "type": "string", "format": "uuid" }, "info": {}, "name": { "type": [ "string", "null" ] }, "platform": { "type": "string" }, "status": { "$ref": "#/components/schemas/CheckpointStatus" } } }, "CheckpointInfo": { "type": "object", "required": [ "gpu", "platform" ], "properties": { "gpu": { "type": "string" }, "info": {}, "platform": { "type": "string" } } }, "CheckpointKind": { "type": "string", "enum": [ "simple", "rootfs", "rootfsonly" ] }, "CheckpointOperationStatus": { "type": "string", "enum": [ "initialized", "processing", "checkpoint_created", "ready", "error", "not_found" ] }, "CheckpointOverrides": { "type": "object", "properties": { "asynchronous": { "type": "boolean" }, "compression": { "type": [ "string", "null" ] }, "criu_opts": { "type": [ "string", "null" ] }, "directory": { "type": [ "string", "null" ] }, "gpu_freeze_type": { "type": [ "string", "null" ] }, "streams": { "type": "integer", "format": "int32" } } }, "CheckpointPod": { "type": "object", "properties": { "action_id": { "type": [ "string", "null" ], "readOnly": true }, "cluster_id": { "type": [ "string", "null" ] }, "kind": { "$ref": "#/components/schemas/CheckpointKind" }, "namespace": { "type": [ "string", "null" ] }, "overrides": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/CheckpointOverrides" } ] }, "pod_id": { "type": [ "string", "null" ] }, "pod_name": { "type": [ "string", "null" ] }, "reason": { "$ref": "#/components/schemas/CheckpointReason" } } }, "CheckpointReason": { "type": "string", "enum": [ "heartbeat", "manual" ] }, "CheckpointSlurmJob": { "type": "object", "properties": { "action_id": { "type": [ "string", "null" ], "readOnly": true }, "job_id": { "type": [ "string", "null" ] }, "job_name": { "type": [ "string", "null" ] }, "kind": { "$ref": "#/components/schemas/CheckpointKind" }, "reason": { "$ref": "#/components/schemas/CheckpointReason" } } }, "CheckpointStatus": { "type": "string", "enum": [ "initializing", "updated_info", "possibly_uploaded", "ready", "deprecated" ] }, "CheckpointStatusWithPodSpec": { "type": "object", "required": [ "status", "action_id" ], "properties": { "action_id": { "type": "string" }, "persistent_volume_claim": {}, "pod_id": { "type": [ "string", "null" ] }, "pod_metadata_spec": {}, "pod_spec": {}, "status": { "type": "string" } } }, "CheckpointSuccessInfo": { "type": "object", "properties": { "restore_path": { "type": [ "string", "null" ] } } }, "Cluster": { "type": "object", "required": [ "id", "name", "status" ], "properties": { "id": { "type": "string", "format": "uuid" }, "last_sync": { "type": [ "string", "null" ], "format": "date-time" }, "metadata": {}, "name": { "type": "string" }, "status": { "type": "string" } } }, "ClusterSyncRequest": { "type": "object", "properties": { "cluster_id": { "type": [ "string", "null" ], "format": "uuid" }, "metadata": {}, "resource": { "$ref": "#/components/schemas/K8sResource" } } }, "CreateClusterRequest": { "type": "object", "description": "CreateClusterRequest is the request body for creating a cluster", "required": [ "cluster_name" ], "properties": { "cluster_name": { "type": "string" } } }, "CreateInstanceRequest": { "type": "object", "required": [ "cloud", "region", "shade_instance_type", "shade_cloud", "name" ], "properties": { "alert": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/Alert" } ] }, "auto_delete": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/AutoDelete" } ] }, "cloud": { "type": "string" }, "envs": { "type": [ "array", "null" ], "items": { "$ref": "#/components/schemas/EnvironmentVariable" } }, "launch_configuration": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/LaunchConfiguration" } ] }, "name": { "type": "string" }, "os": { "type": [ "string", "null" ] }, "region": { "type": "string" }, "shade_cloud": { "type": "boolean" }, "shade_instance_type": { "type": "string" }, "ssh_key_id": { "type": [ "string", "null" ] }, "tags": { "type": [ "array", "null" ], "items": { "type": "string" } }, "template_id": { "type": [ "string", "null" ] }, "volume_ids": { "type": [ "array", "null" ], "items": { "type": "string" } }, "volume_mount": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/VolumeMount" } ] } } }, "CreateInstanceResponse": { "type": "object", "required": [ "instance_id", "status", "cloud_provider", "region", "instance_type" ], "properties": { "cloud_provider": { "type": "string" }, "instance_id": { "type": "string" }, "instance_type": { "type": "string" }, "region": { "type": "string" }, "status": { "type": "string" } } }, "DockerConfiguration": { "type": "object", "required": [ "image" ], "properties": { "args": { "type": [ "string", "null" ] }, "envs": { "type": [ "array", "null" ], "items": { "$ref": "#/components/schemas/EnvironmentVariable" } }, "image": { "type": "string" }, "port_mappings": { "type": [ "array", "null" ], "items": { "$ref": "#/components/schemas/PortMapping" } }, "registry_credentials": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/RegistryCredentials" } ] }, "shared_memory_in_gb": { "type": [ "integer", "null" ], "format": "int32" }, "volume_mounts": { "type": [ "array", "null" ], "items": { "$ref": "#/components/schemas/VolumeMapping" } } } }, "EnvironmentVariable": { "type": "object", "required": [ "name", "value" ], "properties": { "name": { "type": "string" }, "value": { "type": "string" } } }, "Event": { "type": "object", "description": "A single Event record with structured metadata", "required": [ "id", "status", "operation", "resource_type", "message", "metadata", "source", "timestamp" ], "properties": { "id": { "type": "integer", "format": "int64" }, "message": { "type": "string" }, "metadata": {}, "operation": { "type": "string" }, "resource_type": { "type": "string" }, "source": { "type": "string" }, "status": { "type": "string" }, "timestamp": { "type": "integer", "minimum": 0 } } }, "InstanceInfo": { "type": "object", "required": [ "id", "name", "status", "cloud_provider", "region", "instance_type" ], "properties": { "cloud_provider": { "type": "string" }, "created_at": { "type": [ "string", "null" ] }, "id": { "type": "string" }, "instance_type": { "type": "string" }, "name": { "type": "string" }, "region": { "type": "string" }, "status": { "type": "string" } } }, "InstanceStatus": { "type": "object", "required": [ "instance_id", "status", "metadata" ], "properties": { "created_at": { "type": [ "string", "null" ] }, "instance_id": { "type": "string" }, "ip_address": { "type": [ "string", "null" ] }, "metadata": {}, "ssh_port": { "type": [ "integer", "null" ], "format": "int32" }, "ssh_user": { "type": [ "string", "null" ] }, "status": { "type": "string" } } }, "InstanceType": { "type": "object", "required": [ "cloud", "shade_instance_type", "cloud_instance_type", "configuration", "hourly_price", "deployment_type", "availability", "boot_time" ], "properties": { "availability": { "type": "array", "items": { "$ref": "#/components/schemas/InstanceTypeAvailability" } }, "boot_time": { "$ref": "#/components/schemas/InstanceTypeBootTime" }, "cloud": { "type": "string" }, "cloud_instance_type": { "type": "string" }, "configuration": { "$ref": "#/components/schemas/InstanceTypeConfiguration" }, "deployment_type": { "type": "string" }, "hourly_price": { "type": "integer", "format": "int32" }, "shade_instance_type": { "type": "string" } } }, "InstanceTypeAvailability": { "type": "object", "required": [ "region", "available", "display_name" ], "properties": { "available": { "type": "boolean" }, "display_name": { "type": "string" }, "region": { "type": "string" } } }, "InstanceTypeBootTime": { "type": "object", "required": [ "min_boot_in_sec", "max_boot_in_sec" ], "properties": { "max_boot_in_sec": { "type": "integer", "format": "int32" }, "min_boot_in_sec": { "type": "integer", "format": "int32" } } }, "InstanceTypeConfiguration": { "type": "object", "required": [ "memory_in_gb", "storage_in_gb", "vcpus", "num_gpus", "gpu_type", "interconnect", "nvlink", "vram_per_gpu_in_gb", "os_options" ], "properties": { "gpu_type": { "type": "string" }, "interconnect": { "type": "string" }, "memory_in_gb": { "type": "integer", "format": "int32" }, "num_gpus": { "type": "integer", "format": "int32" }, "nvlink": { "type": "boolean" }, "os_options": { "type": "array", "items": { "type": "string" } }, "storage_in_gb": { "type": "integer", "format": "int32" }, "vcpus": { "type": "integer", "format": "int32" }, "vram_per_gpu_in_gb": { "type": "integer", "format": "int32" } } }, "Job": { "type": "object", "title": "Job", "description": "Entire json for the Kubernetes Job" }, "JobResponse": { "type": "object", "required": [ "id", "name", "namespace", "status", "age" ], "properties": { "age": { "type": "string" }, "completions": { "type": [ "integer", "null" ], "format": "int32" }, "duration": { "type": [ "string", "null" ] }, "id": { "type": "string", "format": "uuid" }, "name": { "type": "string" }, "namespace": { "type": "string" }, "start_time": { "type": [ "string", "null" ] }, "status": { "type": "string" } } }, "K8sResource": { "oneOf": [ { "type": "object", "required": [ "type" ], "properties": { "type": { "type": "string", "enum": [ "None" ] } } }, { "allOf": [ { "$ref": "#/components/schemas/Pod" }, { "type": "object", "required": [ "type" ], "properties": { "type": { "type": "string", "enum": [ "Pod" ] } } } ] }, { "allOf": [ { "$ref": "#/components/schemas/Node" }, { "type": "object", "required": [ "type" ], "properties": { "type": { "type": "string", "enum": [ "Node" ] } } } ] }, { "allOf": [ { "$ref": "#/components/schemas/Job" }, { "type": "object", "required": [ "type" ], "properties": { "type": { "type": "string", "enum": [ "Job" ] } } } ] } ] }, "LaunchConfiguration": { "type": "object", "required": [ "type" ], "properties": { "docker_configuration": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/DockerConfiguration" } ] }, "script_configuration": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/ScriptConfiguration" } ] }, "type": { "type": "string" } } }, "MemoryTimeSeries": { "type": "object", "required": [ "free_memory", "allocated_memory" ], "properties": { "allocated_memory": { "type": "array", "items": { "$ref": "#/components/schemas/TimeSeriesDataPoint" } }, "free_memory": { "type": "array", "items": { "$ref": "#/components/schemas/TimeSeriesDataPoint" } } } }, "NamespacesResponse": { "type": "object", "required": [ "namespaces" ], "properties": { "namespaces": { "type": "array", "items": { "type": "string" } } } }, "Node": { "type": "object", "title": "Node", "description": "Entire json for the Kubernetes Node" }, "NodeMetricItem": { "type": "object", "required": [ "metric_name", "collector", "value", "timestamp" ], "properties": { "collector": { "type": "string" }, "cpu_core": { "type": "string" }, "cpu_mode": { "type": "string" }, "filesystem": { "type": "string" }, "metric_name": { "type": "string" }, "mountpoint": { "type": "string" }, "resource": { "type": [ "string", "null" ] }, "timestamp": { "type": "integer", "minimum": 0 }, "value": { "type": "string" } } }, "NodeResourceMetricList": { "type": "object", "required": [ "name" ], "properties": { "details": { "type": "array", "items": { "$ref": "#/components/schemas/NodeMetricItem" } }, "name": { "type": "string" } } }, "NodeResponse": { "type": "object", "required": [ "id", "name", "status", "cluster_name", "compute_type", "instance_type", "region" ], "properties": { "cluster_name": { "type": "string" }, "compute_type": { "type": "string" }, "id": { "type": "string", "format": "uuid" }, "instance_type": { "type": "string" }, "name": { "type": "string" }, "region": { "type": "string" }, "status": { "type": "string" } } }, "PaginatedJobResponse": { "type": "object", "required": [ "jobs", "total_count" ], "properties": { "available_filters": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/AvailableFilters" } ] }, "jobs": { "type": "array", "items": { "$ref": "#/components/schemas/JobResponse" } }, "total_count": { "type": "integer", "format": "int64" } } }, "PaginatedPodResponse": { "type": "object", "required": [ "pods", "total_count" ], "properties": { "available_filters": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/AvailableFilters" } ] }, "pods": { "type": "array", "items": { "$ref": "#/components/schemas/PodResponse" } }, "total_count": { "type": "integer", "format": "int64" } } }, "PartitionStats": { "type": "object", "required": [ "partition_name", "timestamp" ], "properties": { "cpu_load": { "type": [ "number", "null" ], "format": "double" }, "free_memory": { "type": [ "number", "null" ], "format": "double" }, "idle_cpus": { "type": [ "number", "null" ], "format": "double" }, "idle_nodes": { "type": [ "number", "null" ], "format": "double" }, "partition_name": { "type": "string" }, "real_memory": { "type": [ "number", "null" ], "format": "double" }, "timestamp": { "type": "integer", "minimum": 0 }, "total_cpus": { "type": [ "number", "null" ], "format": "double" } } }, "PartitionTimeSeries": { "type": "object", "required": [ "partition_name", "cpu_load", "free_memory" ], "properties": { "cpu_load": { "type": "array", "items": { "$ref": "#/components/schemas/TimeSeriesDataPoint" } }, "free_memory": { "type": "array", "items": { "$ref": "#/components/schemas/TimeSeriesDataPoint" } }, "partition_name": { "type": "string" } } }, "Pod": { "type": "object", "title": "Pod", "description": "Entire json for the Kubernetes " }, "PodLogEntry": { "type": "object", "required": [ "container_name", "message", "timestamp" ], "properties": { "container_name": { "type": "string" }, "message": { "type": "string" }, "stream": { "type": [ "string", "null" ] }, "timestamp": { "type": "integer", "minimum": 0 } } }, "PodLogList": { "type": "object", "required": [ "pod_name", "namespace" ], "properties": { "logs": { "type": "array", "items": { "$ref": "#/components/schemas/PodLogEntry" } }, "namespace": { "type": "string" }, "pod_name": { "type": "string" } } }, "PodResponse": { "type": "object", "required": [ "id", "name", "namespace", "status", "age", "ready", "node", "restarts", "monitored_by_policies" ], "properties": { "age": { "type": "string" }, "id": { "type": "string", "format": "uuid" }, "monitored_by_policies": { "type": "array", "items": { "type": "string" } }, "name": { "type": "string" }, "namespace": { "type": "string" }, "node": { "type": "string" }, "ready": { "type": "string" }, "restarts": { "type": "integer", "format": "int32" }, "start_time": { "type": [ "string", "null" ] }, "status": { "type": "string" } } }, "PolicyDeleteRequest": { "type": "object", "required": [ "id" ], "properties": { "id": { "type": "string", "format": "uuid" } } }, "PolicyPodResponse": { "type": "object", "required": [ "id", "name", "namespace", "status" ], "properties": { "id": { "type": "string", "format": "uuid" }, "name": { "type": "string" }, "namespace": { "type": "string" }, "status": { "type": "string" } } }, "PolicyRequest": { "type": "object", "required": [ "heartbeat_time", "resource_ids", "cluster_id" ], "properties": { "cluster_id": { "type": "string" }, "heartbeat_time": { "type": "integer", "format": "int64", "minimum": 0 }, "id": { "type": [ "string", "null" ] }, "policy_type": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/PolicyType" } ] }, "resource": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/PolicyResource" } ] }, "resource_ids": { "type": "array", "items": { "type": "string", "format": "uuid" } }, "resource_name": { "type": "string" } } }, "PolicyResource": { "type": "string", "enum": [ "pods", "namespace" ] }, "PolicyResponse": { "type": "object", "required": [ "id", "status", "policy_type", "input", "resource", "resource_list", "pod_count" ], "properties": { "id": { "type": "string" }, "input": { "type": "integer", "format": "int64" }, "pod_count": { "type": "integer", "format": "int64" }, "policy_type": { "type": "string" }, "resource": { "type": "string" }, "resource_list": { "type": "array", "items": { "type": "string" } }, "status": { "type": "string" }, "timestamp": { "type": [ "string", "null" ], "format": "date-time" } } }, "PolicyType": { "type": "string", "enum": [ "heartbeat" ] }, "PortMapping": { "type": "object", "required": [ "host_port", "container_port" ], "properties": { "container_port": { "type": "integer", "format": "int32" }, "host_port": { "type": "integer", "format": "int32" } } }, "RegistryCredentials": { "type": "object", "required": [ "username", "password" ], "properties": { "password": { "type": "string" }, "username": { "type": "string" } } }, "RestoreOverrides": { "type": "object", "properties": { "criu_opts": { "type": [ "string", "null" ] } } }, "RestorePod": { "type": "object", "required": [ "action_id", "reason" ], "properties": { "action_id": { "type": "string" }, "cluster_id": { "type": "string" }, "overrides": { "oneOf": [ { "type": "null" }, { "$ref": "#/components/schemas/RestoreOverrides" } ] }, "reason": { "$ref": "#/components/schemas/RestoreReason" } } }, "RestoreReason": { "type": "string", "enum": [ "nodeTermination", "nodeUnschedulable", "manual" ] }, "RestoreSlurmJob": { "type": "object", "required": [ "action_id", "cluster_id", "reason" ], "properties": { "action_id": { "type": "string" }, "cluster_id": { "type": "string" }, "reason": { "$ref": "#/components/schemas/RestoreReason" } } }, "ScriptConfiguration": { "type": "object", "required": [ "base64_script" ], "properties": { "base64_script": { "type": "string" } } }, "SlurmCluster": { "type": "object", "required": [ "id", "name", "status" ], "properties": { "id": { "type": "string", "format": "uuid" }, "metadata": {}, "name": { "type": "string" }, "status": { "type": "string" } } }, "SlurmClusterOverview": { "type": "object", "required": [ "timestamp" ], "properties": { "cpu_load": { "type": [ "number", "null" ], "format": "double" }, "cpus_idle": { "type": [ "number", "null" ], "format": "double" }, "cpus_total": { "type": [ "number", "null" ], "format": "double" }, "mem_alloc": { "type": [ "number", "null" ], "format": "double" }, "mem_free": { "type": [ "number", "null" ], "format": "double" }, "mem_real": { "type": [ "number", "null" ], "format": "double" }, "timestamp": { "type": "integer", "minimum": 0 } } }, "SlurmJob": { "type": "object", "required": [ "priority" ], "properties": { "job_id": { "type": [ "string", "null" ] }, "job_name": { "type": [ "string", "null" ] }, "priority": { "type": "integer", "format": "int32" }, "state": { "type": [ "string", "null" ] } } }, "SlurmMetric": { "type": "object", "required": [ "id", "name", "timestamp" ], "properties": { "counter_value": { "type": [ "number", "null" ], "format": "double" }, "gauge_value": { "type": [ "number", "null" ], "format": "double" }, "id": { "type": "integer", "format": "int64" }, "kind": { "type": [ "string", "null" ] }, "name": { "type": "string" }, "tags": {}, "timestamp": { "type": "integer", "minimum": 0 } } }, "SlurmNode": { "type": "object", "required": [ "id" ], "properties": { "cpus": { "type": [ "integer", "null" ], "format": "int32" }, "id": { "type": "string", "format": "uuid" }, "memory": { "type": [ "integer", "null" ], "format": "int64" }, "name": { "type": [ "string", "null" ] }, "status": { "type": [ "string", "null" ] } } }, "SlurmStateBreakdown": { "type": "object", "required": [ "timestamp", "cpus_by_state", "nodes_by_state" ], "properties": { "cpus_by_state": { "type": "array", "items": { "$ref": "#/components/schemas/StateItem" } }, "nodes_by_state": { "type": "array", "items": { "$ref": "#/components/schemas/StateItem" } }, "timestamp": { "type": "integer", "minimum": 0 } } }, "StateItem": { "type": "object", "required": [ "state", "value" ], "properties": { "state": { "type": "string" }, "value": { "type": "number", "format": "double" } } }, "StatusResponse": { "type": "object", "required": [ "status" ], "properties": { "details": { "type": [ "string", "null" ] }, "status": { "$ref": "#/components/schemas/CheckpointOperationStatus" } } }, "StatusesResponse": { "type": "object", "required": [ "statuses" ], "properties": { "statuses": { "type": "array", "items": { "type": "string" } } } }, "TimeSeriesDataPoint": { "type": "object", "required": [ "timestamp", "value" ], "properties": { "timestamp": { "type": "integer", "minimum": 0 }, "value": { "type": "number", "format": "double" } } }, "TotalCountResponse": { "type": "object", "required": [ "total_count" ], "properties": { "total_count": { "type": "integer", "format": "int64" } } }, "VolumeMapping": { "type": "object", "required": [ "host_path", "container_path" ], "properties": { "container_path": { "type": "string" }, "host_path": { "type": "string" } } }, "VolumeMount": { "type": "object", "properties": { "auto": { "type": [ "boolean", "null" ] } } }, "WorkloadReq": { "type": "object", "required": [ "cluster_id" ], "properties": { "cluster_id": { "type": "string" }, "cluster_name": { "type": [ "string", "null" ], "deprecated": true }, "deployment": {}, "job": {}, "pod": {} } } }, "securitySchemes": { "Auth Token": { "type": "http", "scheme": "bearer" } } }, "tags": [ { "name": "User", "description": "Cedana API to get user information" }, { "name": "Service", "description": "Cedana API for service discovery" }, { "name": "Checkpoints", "description": "Cedana API for checkpoints" }, { "name": "Actions", "description": "Cedana API for actions" }, { "name": "Files", "description": "Cedana API for files" }, { "name": "Resources", "description": "Cedana API for resources" }, { "name": "Clusters", "description": "Cedana API for clusters" }, { "name": "Policy", "description": "Cedana API for policies" }, { "name": "Cloud Bursting", "description": "Cedana API for cloud bursting" }, { "name": "Jobs (Slurm)", "description": "Cedana API for jobs (Slurm)" }, { "name": "Nodes (Slurm)", "description": "Cedana API for nodes (Slurm)" }, { "name": "Actions (Slurm)", "description": "Cedana API for actions (Slurm)" }, { "name": "Clusters (Slurm)", "description": "Cedana API for clusters (Slurm)" }, { "name": "Events", "description": "Cedana API for events" }, { "name": "Metrics", "description": "Cedana API for metrics" }, { "name": "Logs", "description": "Cedana API for logs" } ] }