naftiko: 1.0.0-alpha2
info:
  label: Scalable Inference Serving - Model Inference Operations
  description: Workflow capability for ML engineers and data scientists performing model inference operations, health monitoring,
    and metadata inspection against OIP-compliant inference servers. Imports the KServe Open Inference Protocol shared definition
    and exposes a unified workflow-oriented API and MCP server for AI-assisted inference workflows.
  tags:
  - AI
  - CNCF
  - Inference
  - Kubernetes
  - Machine Learning
  - Model Serving
  - MLOps
  created: '2026-05-02'
  modified: '2026-05-06'
binds:
- namespace: env
  keys:
    KSERVE_BASE_URL: KSERVE_BASE_URL
capability:
  consumes:
  - type: http
    namespace: kserve
    baseUri: '{{env.KSERVE_BASE_URL}}'
    description: KServe Open Inference Protocol V2 REST API
    resources:
    - name: server-health
      path: /v2/health
      description: Server liveness and readiness health endpoints
      operations:
      - name: check-server-liveness
        method: GET
        description: Check if the inference server is live and ready to receive requests
        outputRawFormat: json
        outputParameters:
        - name: result
          type: object
          value: $.
      - name: check-server-readiness
        method: GET
        description: Check if all models are loaded and the server is ready for inference
        outputRawFormat: json
        outputParameters:
        - name: result
          type: object
          value: $.
    - name: server-metadata
      path: /v2
      description: Server metadata and version information
      operations:
      - name: get-server-metadata
        method: GET
        description: Get server name, version, and supported protocol extensions
        outputRawFormat: json
        outputParameters:
        - name: result
          type: object
          value: $.
    - name: model-health
      path: /v2/models/{model_name}/ready
      description: Per-model readiness endpoint
      operations:
      - name: check-model-readiness
        method: GET
        description: Check if a specific model is ready for inference
        inputParameters:
        - name: model_name
          in: path
          type: string
          required: true
          description: Name of the model to check
        outputRawFormat: json
        outputParameters:
        - name: result
          type: object
          value: $.
    - name: model-metadata
      path: /v2/models/{model_name}
      description: Model metadata including input/output tensor specifications
      operations:
      - name: get-model-metadata
        method: GET
        description: Retrieve model name, versions, platform, and input/output tensor specs
        inputParameters:
        - name: model_name
          in: path
          type: string
          required: true
          description: Name of the model
        outputRawFormat: json
        outputParameters:
        - name: result
          type: object
          value: $.
    - name: model-inference
      path: /v2/models/{model_name}/infer
      description: Model inference execution endpoint
      operations:
      - name: run-inference
        method: POST
        description: Submit input tensors and receive model predictions
        inputParameters:
        - name: model_name
          in: path
          type: string
          required: true
          description: Name of the model to run inference against
        outputRawFormat: json
        outputParameters:
        - name: result
          type: object
          value: $.
        body:
          type: json
          data:
            id: '{{tools.request_id}}'
            inputs: '{{tools.inputs}}'
            outputs: '{{tools.outputs}}'
    - name: model-version-inference
      path: /v2/models/{model_name}/versions/{model_version}/infer
      description: Version-specific model inference
      operations:
      - name: run-model-version-inference
        method: POST
        description: Submit inference request to a specific model version for A/B testing or version pinning
        inputParameters:
        - name: model_name
          in: path
          type: string
          required: true
          description: Name of the model
        - name: model_version
          in: path
          type: string
          required: true
          description: Specific model version
        outputRawFormat: json
        outputParameters:
        - name: result
          type: object
          value: $.
        body:
          type: json
          data:
            id: '{{tools.request_id}}'
            inputs: '{{tools.inputs}}'
  exposes:
  - type: rest
    port: 8080
    namespace: inference-ops-api
    description: Unified REST API for model inference operations, health monitoring, and metadata inspection.
    resources:
    - path: /v1/health/live
      name: server-liveness
      description: Check if the inference server is live
      operations:
      - method: GET
        name: check-server-liveness
        description: Check Server Liveness
        call: kserve.check-server-liveness
        outputParameters:
        - type: object
          mapping: $.
    - path: /v1/health/ready
      name: server-readiness
      description: Check if all models are ready for inference
      operations:
      - method: GET
        name: check-server-readiness
        description: Check Server Readiness
        call: kserve.check-server-readiness
        outputParameters:
        - type: object
          mapping: $.
    - path: /v1/server
      name: server-metadata
      description: Inference server metadata and capabilities
      operations:
      - method: GET
        name: get-server-metadata
        description: Get Server Metadata
        call: kserve.get-server-metadata
        outputParameters:
        - type: object
          mapping: $.
    - path: /v1/models/{model_name}
      name: model-metadata
      description: Model tensor specifications and version information
      operations:
      - method: GET
        name: get-model-metadata
        description: Get Model Metadata
        call: kserve.get-model-metadata
        with:
          model_name: rest.model_name
        outputParameters:
        - type: object
          mapping: $.
    - path: /v1/models/{model_name}/ready
      name: model-readiness
      description: Model readiness status
      operations:
      - method: GET
        name: check-model-readiness
        description: Check Model Readiness
        call: kserve.check-model-readiness
        with:
          model_name: rest.model_name
        outputParameters:
        - type: object
          mapping: $.
    - path: /v1/models/{model_name}/infer
      name: model-inference
      description: Submit inference requests to a model
      operations:
      - method: POST
        name: run-inference
        description: Run Model Inference
        call: kserve.run-inference
        with:
          model_name: rest.model_name
        outputParameters:
        - type: object
          mapping: $.
    - path: /v1/models/{model_name}/versions/{model_version}/infer
      name: model-version-inference
      description: Submit inference requests to a specific model version
      operations:
      - method: POST
        name: run-model-version-inference
        description: Run Model Version Inference
        call: kserve.run-model-version-inference
        with:
          model_name: rest.model_name
          model_version: rest.model_version
        outputParameters:
        - type: object
          mapping: $.
    - path: /v1/models/{model_name}/versions/{model_version}/metadata
      name: model-version-metadata
      description: Model version-specific metadata
      operations:
      - method: GET
        name: get-model-version-metadata
        description: Get Model Version Metadata
        call: kserve.get-model-metadata
        with:
          model_name: rest.model_name
          model_version: rest.model_version
        outputParameters:
        - type: object
          mapping: $.
  - type: mcp
    port: 9090
    namespace: inference-ops-mcp
    transport: http
    description: MCP server for AI-assisted model inference operations, enabling LLM agents to submit inference requests and
      inspect model health.
    tools:
    - name: check-server-liveness
      description: Check if the KServe inference server is live and able to receive requests
      hints:
        readOnly: true
        idempotent: true
      call: kserve.check-server-liveness
      outputParameters:
      - type: object
        mapping: $.
    - name: check-server-readiness
      description: Check if all models are loaded and the inference server is ready
      hints:
        readOnly: true
        idempotent: true
      call: kserve.check-server-readiness
      outputParameters:
      - type: object
        mapping: $.
    - name: get-server-metadata
      description: Get inference server name, version, and supported protocol extensions
      hints:
        readOnly: true
        idempotent: true
      call: kserve.get-server-metadata
      outputParameters:
      - type: object
        mapping: $.
    - name: check-model-readiness
      description: Check if a specific model is ready for inference
      hints:
        readOnly: true
        idempotent: true
      call: kserve.check-model-readiness
      with:
        model_name: tools.model_name
      outputParameters:
      - type: object
        mapping: $.
    - name: get-model-metadata
      description: Get model input/output tensor specifications, available versions, and serving platform
      hints:
        readOnly: true
        idempotent: true
      call: kserve.get-model-metadata
      with:
        model_name: tools.model_name
      outputParameters:
      - type: object
        mapping: $.
    - name: run-inference
      description: Submit input tensors to a deployed model and receive inference output tensors. Use get-model-metadata first
        to discover the correct input names, shapes, and datatypes.
      hints:
        readOnly: false
        idempotent: false
      call: kserve.run-inference
      with:
        model_name: tools.model_name
        request_id: tools.request_id
        inputs: tools.inputs
        outputs: tools.outputs
      outputParameters:
      - type: object
        mapping: $.
    - name: run-model-version-inference
      description: Run inference against a pinned model version for A/B testing, canary evaluation, or version-specific integration
      hints:
        readOnly: false
        idempotent: false
      call: kserve.run-model-version-inference
      with:
        model_name: tools.model_name
        model_version: tools.model_version
        inputs: tools.inputs
      outputParameters:
      - type: object
        mapping: $.