naftiko: 1.0.0-alpha2
info:
  label: NVIDIA NIM Vision
  description: 'Vision-language model inference via /v1/chat/completions with image inputs.'
  tags:
  - NVIDIA
  - NIM
  - Vision
  - Multimodal
  created: '2026-05-25'
  modified: '2026-05-25'
binds:
- namespace: env
  keys:
    NVIDIA_API_KEY: NVIDIA_API_KEY
capability:
  consumes:
  - type: http
    namespace: vision
    baseUri: https://integrate.api.nvidia.com
    resources:
    - name: vision-chat
      path: /v1/chat/completions
      operations:
      - name: createvisionchat
        method: POST
        description: Create A Multimodal Chat Completion
        outputRawFormat: json
        outputParameters:
        - name: result
          type: object
          value: $.
        inputParameters:
        - name: body
          in: body
          type: object
          required: true
    authentication:
      type: bearer
      value: '{{env.NVIDIA_API_KEY}}'
      placement: header
  exposes:
  - type: rest
    namespace: vision-rest
    port: 8080
    resources:
    - path: /v1/vision/chat
      name: vision-chat
      operations:
      - method: POST
        name: createvisionchat
        description: Create A Multimodal Chat Completion
        call: vision.createvisionchat
        with:
          body: rest.body
        outputParameters:
        - type: object
          mapping: $.
  - type: mcp
    namespace: vision-mcp
    port: 9090
    transport: http
    tools:
    - name: nvidia-nim-vision
      description: Create A Multimodal Chat Completion
      hints:
        readOnly: false
        destructive: false
        idempotent: false
      call: vision.createvisionchat
      with:
        body: tools.body
      outputParameters:
      - type: object
        mapping: $.