{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/scalable-inference-serving/main/json-schema/kserve-inference-request-schema.json",
  "title": "Inference Request",
  "description": "Open Inference Protocol V2 inference request submitted to a model serving endpoint via HTTP POST. Compatible with KServe, NVIDIA Triton, BentoML, and other OIP-compliant servers.",
  "type": "object",
  "required": ["inputs"],
  "properties": {
    "id": {
      "type": "string",
      "description": "Optional request identifier that will be echoed back in the response for correlation.",
      "example": "req-a1b2c3d4-e5f6-7890-abcd-ef1234567890"
    },
    "parameters": {
      "type": "object",
      "description": "Optional key/value parameters passed to the model's pre/post-processing pipeline.",
      "additionalProperties": true
    },
    "inputs": {
      "type": "array",
      "description": "Input tensors for the inference request. Each tensor specifies its name, shape, datatype, and data.",
      "minItems": 1,
      "items": {
        "$ref": "#/$defs/RequestInput"
      }
    },
    "outputs": {
      "type": "array",
      "description": "Optional list of output tensors to return. If omitted, all model outputs are returned.",
      "items": {
        "$ref": "#/$defs/RequestOutput"
      }
    }
  },
  "$defs": {
    "RequestInput": {
      "type": "object",
      "title": "Request Input",
      "description": "A single named input tensor for an inference request.",
      "required": ["name", "shape", "datatype", "data"],
      "properties": {
        "name": {
          "type": "string",
          "description": "Tensor name as defined in the model's input specification."
        },
        "shape": {
          "type": "array",
          "description": "Shape of the tensor. Use -1 for variable-length or batch dimensions.",
          "items": {"type": "integer"},
          "example": [1, 128]
        },
        "datatype": {
          "$ref": "#/$defs/TensorDatatype"
        },
        "parameters": {
          "type": "object",
          "additionalProperties": true,
          "description": "Optional tensor-level parameters."
        },
        "data": {
          "description": "Tensor data in row-major order. Nested arrays or flat array acceptable.",
          "oneOf": [
            {"type": "array", "items": {}},
            {"type": "string", "description": "Base64-encoded binary data for the binary tensor data extension."}
          ]
        }
      }
    },
    "RequestOutput": {
      "type": "object",
      "title": "Request Output",
      "description": "Specifies which model output tensor to include in the response.",
      "required": ["name"],
      "properties": {
        "name": {
          "type": "string",
          "description": "Name of the output tensor to include in the response."
        },
        "parameters": {
          "type": "object",
          "additionalProperties": true
        }
      }
    },
    "TensorDatatype": {
      "type": "string",
      "title": "Tensor Datatype",
      "description": "Data type of a tensor per the Open Inference Protocol specification.",
      "enum": ["BOOL", "UINT8", "UINT16", "UINT32", "UINT64", "INT8", "INT16", "INT32", "INT64", "FP16", "FP32", "FP64", "BYTES", "STRING"]
    }
  }
}