naftiko: 1.0.0-alpha2 info: label: Triton Model Inference and Management description: Workflow capability for deploying, managing, and running inference against machine learning models on NVIDIA Triton Inference Server. Enables model lifecycle management including loading, health checks, inference execution, statistics monitoring, and observability configuration. tags: - AI - Deep Learning - Inference - Model Serving - Machine Learning - NVIDIA - KServe created: '2026-05-03' modified: '2026-05-06' capability: consumes: - type: http namespace: triton baseUri: http://localhost:8000 description: NVIDIA Triton Inference Server HTTP/REST API (KServe V2 protocol) resources: - name: health path: /v2/health description: Server and model health checks operations: - name: server-live method: GET description: Check if the Triton server is alive outputRawFormat: json outputParameters: - name: result type: object value: $. - name: server-ready method: GET description: Check if the Triton server is ready to accept inference requests outputRawFormat: json outputParameters: - name: result type: object value: $. - name: server-metadata path: /v2 description: Server metadata operations: - name: server-metadata method: GET description: Retrieve server name, version, and supported extensions outputRawFormat: json outputParameters: - name: result type: object value: $. - name: model-health path: /v2/models/{model_name}/ready description: Model readiness check operations: - name: model-ready method: GET description: Check if a model is ready for inference inputParameters: - name: model_name in: path type: string required: true description: Name of the model outputRawFormat: json outputParameters: - name: result type: object value: $. - name: model-metadata path: /v2/models/{model_name} description: Model metadata and configuration operations: - name: model-metadata method: GET description: Retrieve model name, versions, platform, and tensor metadata inputParameters: - name: model_name in: path type: string required: true description: Name of the model outputRawFormat: json outputParameters: - name: result type: object value: $. - name: model-config path: /v2/models/{model_name}/config description: Model configuration operations: - name: model-config method: GET description: Retrieve full model configuration from config.pbtxt inputParameters: - name: model_name in: path type: string required: true description: Name of the model outputRawFormat: json outputParameters: - name: result type: object value: $. - name: inference path: /v2/models/{model_name}/infer description: Model inference endpoint operations: - name: model-infer method: POST description: Submit an inference request to a model inputParameters: - name: model_name in: path type: string required: true description: Name of the model outputRawFormat: json outputParameters: - name: result type: object value: $. body: type: json data: id: '{{tools.request_id}}' inputs: '{{tools.inputs}}' outputs: '{{tools.outputs}}' - name: repository-index path: /v2/repository/index description: Model repository index operations: - name: repository-index method: POST description: List all models in the repository outputRawFormat: json outputParameters: - name: result type: object value: $. body: type: json data: ready: '{{tools.ready_only}}' - name: model-load path: /v2/repository/models/{model_name}/load description: Load a model into Triton operations: - name: model-load method: POST description: Load or reload a model from the repository inputParameters: - name: model_name in: path type: string required: true description: Name of the model to load outputRawFormat: json outputParameters: - name: result type: object value: $. - name: model-unload path: /v2/repository/models/{model_name}/unload description: Unload a model from Triton operations: - name: model-unload method: POST description: Unload a model from Triton, making it unavailable for inference inputParameters: - name: model_name in: path type: string required: true description: Name of the model to unload outputRawFormat: json outputParameters: - name: result type: object value: $. - name: model-statistics path: /v2/models/{model_name}/stats description: Model inference statistics operations: - name: model-statistics method: GET description: Retrieve inference statistics for a specific model inputParameters: - name: model_name in: path type: string required: true description: Name of the model outputRawFormat: json outputParameters: - name: result type: object value: $. - name: all-model-statistics path: /v2/models/stats description: All model statistics operations: - name: all-model-statistics method: GET description: Retrieve inference statistics for all loaded models outputRawFormat: json outputParameters: - name: result type: object value: $. - name: trace-settings path: /v2/trace/setting description: Trace configuration operations: - name: get-trace-setting method: GET description: Get current global trace settings outputRawFormat: json outputParameters: - name: result type: object value: $. - name: update-trace-setting method: POST description: Update global trace settings outputRawFormat: json outputParameters: - name: result type: object value: $. body: type: json data: trace_level: '{{tools.trace_level}}' trace_rate: '{{tools.trace_rate}}' - name: logging path: /v2/logging description: Logging configuration operations: - name: get-log-settings method: GET description: Get current logging settings outputRawFormat: json outputParameters: - name: result type: object value: $. - name: update-log-settings method: POST description: Update server logging settings outputRawFormat: json outputParameters: - name: result type: object value: $. body: type: json data: log_info: '{{tools.log_info}}' log_verbose_level: '{{tools.log_verbose_level}}' exposes: - type: rest port: 8080 namespace: triton-inference-api description: Unified REST API for Triton model lifecycle management and inference. resources: - path: /v1/health name: health description: Server and model health status operations: - method: GET name: server-live description: Check if Triton server is alive call: triton.server-live outputParameters: - type: object mapping: $. - path: /v1/server name: server description: Server metadata and information operations: - method: GET name: server-metadata description: Get Triton server name, version, and extensions call: triton.server-metadata outputParameters: - type: object mapping: $. - path: /v1/models name: models description: Model repository and management operations: - method: GET name: list-models description: List all models available in the repository call: triton.repository-index outputParameters: - type: object mapping: $. - path: /v1/models/{model_name} name: model description: Individual model operations operations: - method: GET name: model-metadata description: Get model metadata including tensor definitions call: triton.model-metadata with: model_name: rest.model_name outputParameters: - type: object mapping: $. - path: /v1/models/{model_name}/config name: model-config description: Model configuration operations: - method: GET name: model-config description: Get full model configuration call: triton.model-config with: model_name: rest.model_name outputParameters: - type: object mapping: $. - path: /v1/models/{model_name}/infer name: model-inference description: Run model inference operations: - method: POST name: model-infer description: Submit an inference request to a model call: triton.model-infer with: model_name: rest.model_name outputParameters: - type: object mapping: $. - path: /v1/models/{model_name}/load name: model-load description: Load model into server operations: - method: POST name: model-load description: Load or reload a model from the repository call: triton.model-load with: model_name: rest.model_name outputParameters: - type: object mapping: $. - path: /v1/models/{model_name}/unload name: model-unload description: Unload model from server operations: - method: POST name: model-unload description: Unload a model from Triton call: triton.model-unload with: model_name: rest.model_name outputParameters: - type: object mapping: $. - path: /v1/models/{model_name}/stats name: model-stats description: Model inference statistics operations: - method: GET name: model-statistics description: Get inference statistics for a specific model call: triton.model-statistics with: model_name: rest.model_name outputParameters: - type: object mapping: $. - path: /v1/stats name: all-stats description: Statistics for all models operations: - method: GET name: all-model-statistics description: Get inference statistics for all loaded models call: triton.all-model-statistics outputParameters: - type: object mapping: $. - path: /v1/trace name: trace description: Trace configuration operations: - method: GET name: get-trace-settings description: Get current global trace settings call: triton.get-trace-setting outputParameters: - type: object mapping: $. - method: POST name: update-trace-settings description: Update request tracing configuration call: triton.update-trace-setting outputParameters: - type: object mapping: $. - path: /v1/logging name: logging description: Logging configuration operations: - method: GET name: get-log-settings description: Get current logging settings call: triton.get-log-settings outputParameters: - type: object mapping: $. - method: POST name: update-log-settings description: Update server logging configuration call: triton.update-log-settings outputParameters: - type: object mapping: $. - type: mcp port: 9090 namespace: triton-inference-mcp transport: http description: MCP server for AI-assisted model deployment and inference management on Triton. tools: - name: server-live description: Check if Triton inference server is alive hints: readOnly: true openWorld: false call: triton.server-live outputParameters: - type: object mapping: $. - name: server-ready description: Check if Triton server is ready to accept inference requests hints: readOnly: true openWorld: false call: triton.server-ready outputParameters: - type: object mapping: $. - name: server-metadata description: Get Triton server name, version, and supported extensions hints: readOnly: true openWorld: false call: triton.server-metadata outputParameters: - type: object mapping: $. - name: list-models description: List all models available in the Triton model repository hints: readOnly: true openWorld: true call: triton.repository-index with: ready_only: tools.ready_only outputParameters: - type: object mapping: $. - name: model-metadata description: Get metadata for a specific model including input/output tensor shapes hints: readOnly: true openWorld: false call: triton.model-metadata with: model_name: tools.model_name outputParameters: - type: object mapping: $. - name: model-config description: Get the full configuration for a specific model hints: readOnly: true openWorld: false call: triton.model-config with: model_name: tools.model_name outputParameters: - type: object mapping: $. - name: model-ready description: Check if a specific model is ready to accept inference requests hints: readOnly: true openWorld: false call: triton.model-ready with: model_name: tools.model_name outputParameters: - type: object mapping: $. - name: model-infer description: Run inference against a loaded model with input tensors hints: readOnly: false destructive: false idempotent: false call: triton.model-infer with: model_name: tools.model_name inputs: tools.inputs outputs: tools.outputs outputParameters: - type: object mapping: $. - name: model-load description: Load or reload a model from the repository into Triton hints: readOnly: false destructive: false idempotent: true call: triton.model-load with: model_name: tools.model_name outputParameters: - type: object mapping: $. - name: model-unload description: Unload a model from Triton to free resources hints: readOnly: false destructive: true idempotent: true call: triton.model-unload with: model_name: tools.model_name outputParameters: - type: object mapping: $. - name: model-statistics description: Get inference statistics for a specific model hints: readOnly: true openWorld: false call: triton.model-statistics with: model_name: tools.model_name outputParameters: - type: object mapping: $. - name: all-model-statistics description: Get inference statistics for all loaded models hints: readOnly: true openWorld: true call: triton.all-model-statistics outputParameters: - type: object mapping: $. - name: get-trace-settings description: Get current global request tracing configuration hints: readOnly: true openWorld: false call: triton.get-trace-setting outputParameters: - type: object mapping: $. - name: update-trace-settings description: Update request tracing levels and sampling rate hints: readOnly: false destructive: false idempotent: true call: triton.update-trace-setting with: trace_level: tools.trace_level trace_rate: tools.trace_rate outputParameters: - type: object mapping: $. - name: get-log-settings description: Get current server logging configuration hints: readOnly: true openWorld: false call: triton.get-log-settings outputParameters: - type: object mapping: $. - name: update-log-settings description: Update server logging level and format hints: readOnly: false destructive: false idempotent: true call: triton.update-log-settings with: log_info: tools.log_info log_verbose_level: tools.log_verbose_level outputParameters: - type: object mapping: $.