{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://huggingface.co/schemas/inference-endpoint.json",
  "title": "Hugging Face Inference Endpoint",
  "description": "Schema for a dedicated Inference Endpoint deployment on the Hugging Face platform, including model configuration, compute resources, scaling settings, and runtime status.",
  "type": "object",
  "required": [
    "name",
    "type",
    "provider",
    "compute",
    "model"
  ],
  "properties": {
    "name": {
      "type": "string",
      "description": "Unique name of the Inference Endpoint",
      "pattern": "^[a-z0-9][a-z0-9-]{0,30}[a-z0-9]$",
      "examples": [
        "my-text-gen-endpoint",
        "prod-llama-chat",
        "staging-bert-classifier"
      ]
    },
    "accountId": {
      "type": "string",
      "description": "Account or organization ID that owns the endpoint"
    },
    "type": {
      "type": "string",
      "description": "Security type controlling endpoint access",
      "enum": [
        "public",
        "protected",
        "private"
      ],
      "default": "protected"
    },
    "provider": {
      "type": "object",
      "description": "Cloud provider configuration",
      "required": [
        "vendor",
        "region"
      ],
      "properties": {
        "vendor": {
          "type": "string",
          "description": "Cloud provider vendor",
          "enum": [
            "aws",
            "azure",
            "gcp"
          ]
        },
        "region": {
          "type": "string",
          "description": "Cloud region for deployment",
          "examples": [
            "us-east-1",
            "eu-west-1",
            "us-west-2",
            "ap-southeast-1"
          ]
        }
      }
    },
    "compute": {
      "type": "object",
      "description": "Compute resources configuration",
      "required": [
        "accelerator",
        "instanceType",
        "instanceSize",
        "scaling"
      ],
      "properties": {
        "accelerator": {
          "type": "string",
          "description": "Type of compute accelerator",
          "enum": [
            "cpu",
            "gpu"
          ]
        },
        "instanceType": {
          "type": "string",
          "description": "GPU or instance type",
          "examples": [
            "nvidia-a10g",
            "nvidia-t4",
            "nvidia-a100",
            "nvidia-l4",
            "nvidia-h100",
            "intel-icl",
            "intel-spr",
            "aws-inf2"
          ]
        },
        "instanceSize": {
          "type": "string",
          "description": "Instance size determining memory and compute",
          "examples": [
            "x1",
            "x2",
            "x4",
            "x8"
          ]
        },
        "scaling": {
          "type": "object",
          "description": "Autoscaling configuration",
          "required": [
            "minReplica",
            "maxReplica"
          ],
          "properties": {
            "minReplica": {
              "type": "integer",
              "description": "Minimum number of replicas (0 enables scale-to-zero)",
              "minimum": 0,
              "default": 0
            },
            "maxReplica": {
              "type": "integer",
              "description": "Maximum number of replicas",
              "minimum": 1,
              "default": 1
            },
            "scaleToZeroTimeout": {
              "type": "integer",
              "description": "Minutes of inactivity before scaling to zero",
              "minimum": 1,
              "default": 15
            }
          }
        }
      }
    },
    "model": {
      "type": "object",
      "description": "Model deployment configuration",
      "required": [
        "repository",
        "task"
      ],
      "properties": {
        "repository": {
          "type": "string",
          "description": "Hugging Face model repository ID",
          "examples": [
            "meta-llama/Llama-3-70b-chat-hf",
            "mistralai/Mistral-7B-Instruct-v0.3",
            "sentence-transformers/all-MiniLM-L6-v2"
          ]
        },
        "revision": {
          "type": "string",
          "description": "Git revision (branch, tag, or commit SHA)",
          "default": "main"
        },
        "task": {
          "type": "string",
          "description": "Inference task type",
          "enum": [
            "text-generation",
            "text-classification",
            "token-classification",
            "question-answering",
            "summarization",
            "translation",
            "fill-mask",
            "feature-extraction",
            "sentence-similarity",
            "image-classification",
            "object-detection",
            "automatic-speech-recognition",
            "text-to-image",
            "custom"
          ]
        },
        "framework": {
          "type": "string",
          "description": "Serving framework",
          "enum": [
            "pytorch",
            "custom"
          ],
          "default": "pytorch"
        },
        "image": {
          "type": "object",
          "description": "Container image configuration",
          "properties": {
            "huggingface": {
              "type": "object",
              "description": "Hugging Face optimized container settings"
            },
            "custom": {
              "type": "object",
              "description": "Custom container settings",
              "properties": {
                "url": {
                  "type": "string",
                  "format": "uri",
                  "description": "Custom container image URL"
                },
                "health_route": {
                  "type": "string",
                  "description": "Health check endpoint path"
                },
                "port": {
                  "type": "integer",
                  "description": "Container port"
                },
                "env": {
                  "type": "object",
                  "additionalProperties": {
                    "type": "string"
                  },
                  "description": "Environment variables for the container"
                }
              }
            }
          }
        }
      }
    },
    "status": {
      "type": "object",
      "description": "Current status of the endpoint",
      "properties": {
        "state": {
          "type": "string",
          "description": "Current operational state",
          "enum": [
            "pending",
            "initializing",
            "running",
            "updating",
            "paused",
            "scaledToZero",
            "failed"
          ]
        },
        "message": {
          "type": "string",
          "description": "Human-readable status message"
        },
        "createdAt": {
          "type": "string",
          "format": "date-time",
          "description": "When the endpoint was created"
        },
        "updatedAt": {
          "type": "string",
          "format": "date-time",
          "description": "When the endpoint was last updated"
        },
        "url": {
          "type": "string",
          "format": "uri",
          "description": "Inference URL when the endpoint is running"
        },
        "readyReplica": {
          "type": "integer",
          "description": "Number of replicas currently ready"
        },
        "targetReplica": {
          "type": "integer",
          "description": "Target number of replicas"
        },
        "errorMessage": {
          "type": "string",
          "description": "Error message if the endpoint is in a failed state"
        }
      }
    }
  }
}