registryVersion: 1.9.0
models:
- name: Llama 3.3 70B Instruct
  displayName: Llama 3.3 70B Instruct
  modelHubID: llama-3-3-70b-instruct
  category: Text Generation
  type: NGC
  description: The Llama 3.3 70B-Instruct NIM simplifies the deployment of the Llama 3.3 70B instruction tuned model which is optimized for language understanding, reasoning, and text generation use cases, and outperforms many of the available open source chat models on common industry benchmarks.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://www.llama.com/llama3_3/use-policy/
    - label: License Agreement
      url: https://www.llama.com/llama3_3/license/
  modelVariants:
    - variantId: Llama 3.3 70B Instruct
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/meta/containers/llama-3.3-70b-instruct
      optimizationProfiles:
        - profileId: nim/meta/llama-3.3-70b-instruct:a100x4-throughput-bf16-sf8byh808a
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct A100x4 BF16 Throughput
          ngcMetadata:
            00e6f59e1003f038ecee8e9aa3ab2d40745bef214c476a381b21886dd8383952:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 140GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h200x4-latency-fp8-nju7sb1wcw
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H200x4 FP8 Latency
          ngcMetadata:
            13a9a5e5b372db6e92ecd2523a1a5d8b8f6ebd3fa8849608481e05a596a38d9e:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100_NVLx8 BF16 Latency
          ngcMetadata:
            144fcde387869e92dfec8597f477ad671ee4424269e3e25cd16037c721bf925d:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100_NVLx4 BF16 Throughput
          ngcMetadata:
            14654290e66815c15ef45c507c483a4bcc3a22fcc11a479083bce0a14b743b71:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h100x8-latency-fp8-z88enisl8a
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100x8 FP8 Latency
          ngcMetadata:
            233973ff86b33b1076b8d8dfbf1b1c292ad224ae2d9c8b18f28a44b6f6f42768:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 70GB
        - profileId: nim/meta/llama-3.3-70b-instruct:l40sx4-throughput-fp8-daydbgtrgg
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct L40Sx4 FP8 Throughput
          ngcMetadata:
            3d0e5989f2fbc23e7d4504cd69269c9636deb61d0efc12225d3d59d54afea297:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:b200x2-latency-fp8-yoijdqa45a
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct B200x2 FP8 Latency
          ngcMetadata:
            4950d30811e1e426e97cda69e6c03a8a4819db8aa4abf34722ced4542a1f6b52:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100_NVLx2 FP8 Throughput
          ngcMetadata:
            4c538175eb36814513f5c95c115c8ed15273f0cffda9d2d355a17f0f311f2fbd:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100_NVLx8 FP8 Latency
          ngcMetadata:
            582fd7bfbe504eb5ee4ded5254cced1d83ea2682a91b6dd6610af842be947ecc:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3.3-70b-instruct:l40sx8-throughput-bf16-essm4-kcrg
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct L40Sx8 BF16 Throughput
          ngcMetadata:
            60b95dfcc3a17bf00cabb2da1a264f5e8757763d0ebe2a3a073c5c0fc7c078ec:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 151GB
        - profileId: nim/meta/llama-3.3-70b-instruct:a100x8-latency-bf16-i6bl589a3a
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct A100x8 BF16 Latency
          ngcMetadata:
            646e2eff5f305302c1cd5fe873ef7c8172021d9948157163761817c4e36352d7:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 150GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100_NVLx4 FP8 Throughput
          ngcMetadata:
            6708ebad5077e24eaff0eabce1134feb16b2a35d2313567b94e3f27479a90544:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h100x4-latency-fp8-tkp3aadetg
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100x4 FP8 Latency
          ngcMetadata:
            6d6d2aebdecec52d7982746f98b00421cf53e10295a9ac7f993e4554fa164d10:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h200x4-throughput-bf16-eltdntbjla
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H200x4 BF16 Throughput
          ngcMetadata:
            6dc00fc21eb6d8de62d35c96eed22174e205fdb3db816dbe547deeb37fbdd9a8:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h100x8-latency-bf16-kwqeyhgvua
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100x8 BF16 Latency
          ngcMetadata:
            758482618a1f166cc4e620228600410a6f05649a05c1838d5a93572d44289b95:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 147GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100_NVLx4 FP8 Latency
          ngcMetadata:
            76fc388794dc368145a440d16d72c0ba70e4aecac09901fe4a2c06a767c7eb0d:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h100x4-throughput-fp8-cqigo1kenw
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100x4 FP8 Throughput
          ngcMetadata:
            7d8a02f47911fb7ddf1a6f6b09438f621b6057cb21098999484f09d5a5bb7b23:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h100x2-throughput-fp8--pwiqokzsa
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100x2 FP8 Throughput
          ngcMetadata:
            7ee2258631ed9d51ebfe5ab44bd547ae5777217686d87cc89c15d06ccdca4047:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h100x4-throughput-bf16-ygpeeau-0q
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100x4 BF16 Throughput
          ngcMetadata:
            7f99ed5107c79b938b0ef4fcf2dd21aac27281f71d41a0a7c46d649879d374f0:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 138GB
        - profileId: nim/meta/llama-3.3-70b-instruct:b200x1-throughput-fp8-sfrhca0ipw
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct B200x1 FP8 Throughput
          ngcMetadata:
            8b87146e39b0305ae1d73bc053564d1b4b4c565f81aa5abe3e84385544ca9b60:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 68GB
        - profileId: nim/meta/llama-3.3-70b-instruct:b200x4-latency-fp8-jwv73nrwia
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct B200x4 FP8 Latency
          ngcMetadata:
            9527145a2d1316a1e55581d1f6b0a45e394fe37b853ec5172dea14c2c9767d96:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h200x4-latency-bf16-bdxpl7wu-g
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H200x4 BF16 Latency
          ngcMetadata:
            99142c13a095af184ae20945a208a81fae8d650ac0fd91747b03148383f882cf:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:b200x4-throughput-bf16-dnxvrdjuta
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct B200x4 BF16 Throughput
          ngcMetadata:
            9b4836e143f78d245cf161c16a225be11d3e8f9b2024b99dd76e5b2ac6cd7efd:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h200x1-throughput-fp8-9qirfnkola
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H200x1 FP8 Throughput
          ngcMetadata:
            af876a179190d1832143f8b4f4a71f640f3df07b0503259cedee3e3a8363aa96:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 68GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h200x2-throughput-bf16-qxgo9ky1rq
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H200x2 BF16 Throughput
          ngcMetadata:
            b407d3df1db123ba8a4c98fb9f73790c01cd53a70fa0e0185814ad57a17cb72b:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h200x2-throughput-fp8-j5rwrqq4aa
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H200x2 FP8 Throughput
          ngcMetadata:
            c91a755246cb08dd9aa6905bc40b7db552071d141a850be5a791b06eb4fb2ef8:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h100x8-throughput-bf16-2i0l24npsg
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100x8 BF16 Throughput
          ngcMetadata:
            d128c772583bd10da4f31bf8e961893eb2b62363f3cecb94b5ef67d8bbd54665:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 147GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H100_NVLx8 BF16 Throughput
          ngcMetadata:
            d14fa7bd1f4287e74b856fe3f0030312cc4d03b8fe35a8c8aaedf0140ac55067:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3.3-70b-instruct:b200x2-throughput-bf16-4qvbdeuv4a
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct B200x2 BF16 Throughput
          ngcMetadata:
            d33e8144476992a7d8d621d8e50cf66b89d254dc721aa2782e5a5a6f07b1af80:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct A100_SXM4_40GBx8 BF16 Throughput
          ngcMetadata:
            dc0f5f87ca37f69af7f525ac293c599cd0cbdaf8130da4d9e2ad63d376b12039:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: A100_SXM4_40GB
                gpu_device: 20b0:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100_SXM4_40GB
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 20B0:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3.3-70b-instruct:h200x2-latency-fp8-pgmrxe0j3g
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct H200x2 FP8 Latency
          ngcMetadata:
            e4f217a5fb016b570e34b8a8eb06051ccfef9534ba43da973bb7f678242eaa5f:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:b200x4-latency-bf16-mnjb4olhmw
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct B200x4 BF16 Latency
          ngcMetadata:
            f17543bf1ee65e4a5c485385016927efe49cbc068a6021573d83eacb32537f76:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct Generic NVIDIA GPUx8 BF16
          ngcMetadata:
            1d7b604f835f74791e6bfd843047fc00a5aef0f72954ca48ce963811fb6f3f09:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '8'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 8
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct Generic NVIDIA GPUx2 BF16
          ngcMetadata:
            375dc0ff86133c2a423fbe9ef46d8fdf12d6403b3caa3b8e70d7851a89fc90dd:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '2'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 2
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3.3-70b-instruct:hf-5825c91-tool-calling-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.3 70B Instruct Generic NVIDIA GPUx4 BF16
          ngcMetadata:
            54946b08b79ecf9e7f2d5c000234bf2cce19c8fee21b243c1a084b03897e8c95:
              model: meta/llama-3.3-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '4'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 4
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
  labels:
    - Llama
    - Meta
    - Chat
    - Text Generation
    - Large Language Model
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: Llama 3.3 Nemotron Super 49B V1
  displayName: Llama 3.3 Nemotron Super 49B V1
  modelHubID: nvidia/llama-3.3-nemotron-super-49b-v1
  category: Chatbots
  type: NGC
  description: Llama-3.3-Nemotron-Super-49B-v1 is a language model that can follow instructions, complete requests, and generate creative text formats. The Llama-3.3-Nemotron-Super-49B-v1 Large Language Model (LLM) is an instruct fine-tuned version of the Llama-Nemotron.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/
    - label: License Agreement
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/
  modelVariants:
    - variantId: Llama 3.3 Nemotron Super 49B V1
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers/llama-3.3-nemotron-super-49b-v1
      optimizationProfiles:
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:a100x4-throughput-bf16--d40eserlg
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 A100x4 BF16 Throughput
          ngcMetadata:
            00e6f59e1003f038ecee8e9aa3ab2d40745bef214c476a381b21886dd8383952:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 101GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100_NVLx8 BF16 Latency
          ngcMetadata:
            144fcde387869e92dfec8597f477ad671ee4424269e3e25cd16037c721bf925d:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100_NVLx4 BF16 Throughput
          ngcMetadata:
            14654290e66815c15ef45c507c483a4bcc3a22fcc11a479083bce0a14b743b71:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:h100x8-latency-fp8-sfw5xn1oba
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100x8 FP8 Latency
          ngcMetadata:
            233973ff86b33b1076b8d8dfbf1b1c292ad224ae2d9c8b18f28a44b6f6f42768:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 51GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:b200x2-throughput-fp8-kiq2efz-dq
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 B200x2 FP8 Throughput
          ngcMetadata:
            26bd84b107a99415b474267bec4cbcf932fbb28e45d7fb4e4db2971506825888:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 49GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:l40sx4-throughput-fp8-dtuojeeekw
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 L40Sx4 FP8 Throughput
          ngcMetadata:
            3d0e5989f2fbc23e7d4504cd69269c9636deb61d0efc12225d3d59d54afea297:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 50GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100_NVLx2 FP8 Throughput
          ngcMetadata:
            4c538175eb36814513f5c95c115c8ed15273f0cffda9d2d355a17f0f311f2fbd:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100_NVLx1 FP8 Throughput
          ngcMetadata:
            5811750e70b7e9f340f4d670c72fcbd5282e254aeb31f62fd4f937cfb9361007:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100_NVLx8 FP8 Latency
          ngcMetadata:
            582fd7bfbe504eb5ee4ded5254cced1d83ea2682a91b6dd6610af842be947ecc:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:a100x8-latency-bf16-96llyrpauw
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 A100x8 BF16 Latency
          ngcMetadata:
            646e2eff5f305302c1cd5fe873ef7c8172021d9948157163761817c4e36352d7:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 111GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:l40sx8-latency-bf16-9kyxnmiu9w
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 L40Sx8 BF16 Latency
          ngcMetadata:
            66341208a7bba7fdde341dcad4a654eecb27681d2e322ec10c4fde9970030c26:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 111GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100_NVLx4 FP8 Throughput
          ngcMetadata:
            6708ebad5077e24eaff0eabce1134feb16b2a35d2313567b94e3f27479a90544:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:h100x8-latency-bf16-prto0dmpjw
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100x8 BF16 Latency
          ngcMetadata:
            758482618a1f166cc4e620228600410a6f05649a05c1838d5a93572d44289b95:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 109GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:h100x1-throughput-fp8-kjzavt-3zq
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100x1 FP8 Throughput
          ngcMetadata:
            7b508014e846234db3cabe5c9f38568b4ee96694b60600a0b71c621dc70cacf3:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 49GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:h100x4-throughput-fp8-slqbwxm0vq
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100x4 FP8 Throughput
          ngcMetadata:
            7d8a02f47911fb7ddf1a6f6b09438f621b6057cb21098999484f09d5a5bb7b23:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 50GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:h100x2-throughput-fp8-4ocry3irow
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100x2 FP8 Throughput
          ngcMetadata:
            7ee2258631ed9d51ebfe5ab44bd547ae5777217686d87cc89c15d06ccdca4047:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 49GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:h100x4-throughput-bf16-fzhqywxh-a
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H100x4 BF16 Throughput
          ngcMetadata:
            7f99ed5107c79b938b0ef4fcf2dd21aac27281f71d41a0a7c46d649879d374f0:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 100GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:b200x1-throughput-fp8-cpviqqa47q
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 B200x1 FP8 Throughput
          ngcMetadata:
            8b87146e39b0305ae1d73bc053564d1b4b4c565f81aa5abe3e84385544ca9b60:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 49GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:b200x8-latency-fp8-jbthzwoarq
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 B200x8 FP8 Latency
          ngcMetadata:
            8f9f165fc2a52b860b8eca20856e3bf5f6dc411ff3e2d1e617b1e4408a1d0191:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 50GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:b200x8-latency-bf16-glb4omvl8q
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 B200x8 BF16 Latency
          ngcMetadata:
            91df8db9fbe818a6a9c3cb1779f151ac7bc70d4806924abdd591c7cf1bfee2f6:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 108GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:a10gx8-throughput-bf16-ea3czux3aq
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 A10Gx8 BF16 Throughput
          ngcMetadata:
            935ec3ac922bf54106311dfc6b3214a1651a26033b4f5007b6351fffb4058b7a:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2237:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 111GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:h200x4-latency-bf16-2v7ziveceg
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H200x4 BF16 Latency
          ngcMetadata:
            99142c13a095af184ae20945a208a81fae8d650ac0fd91747b03148383f882cf:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 100GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:b200x4-throughput-bf16-wudyjwpk6w
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 B200x4 BF16 Throughput
          ngcMetadata:
            9b4836e143f78d245cf161c16a225be11d3e8f9b2024b99dd76e5b2ac6cd7efd:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 100GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:b200x4-throughput-fp8-mvpvygyr-g
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 B200x4 FP8 Throughput
          ngcMetadata:
            a9b23031714881187b3beddb0eaa526006c799def8fca0e7975721724296a9d2:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 50GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:l40sx4-throughput-bf16-gt01zn8w7a
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 L40Sx4 BF16 Throughput
          ngcMetadata:
            ab8f2faec3bcafc32efaf05acada4df4d8a171a759b4fb5c44d2d9d43a348764:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 101GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:h200x1-throughput-fp8-bj-uzcumnq
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H200x1 FP8 Throughput
          ngcMetadata:
            af876a179190d1832143f8b4f4a71f640f3df07b0503259cedee3e3a8363aa96:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 49GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:h200x2-throughput-bf16-cqdwimpbbw
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H200x2 BF16 Throughput
          ngcMetadata:
            b407d3df1db123ba8a4c98fb9f73790c01cd53a70fa0e0185814ad57a17cb72b:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 96GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 A100_SXM4_40GBx8 BF16 Throughput
          ngcMetadata:
            dc0f5f87ca37f69af7f525ac293c599cd0cbdaf8130da4d9e2ad63d376b12039:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A100_SXM4_40GB
                gpu_device: 20b0:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100_SXM4_40GB
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 20B0:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:l40sx8-latency-fp8-ataopkp21a
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 L40Sx8 FP8 Latency
          ngcMetadata:
            e19c01f4cfb3b39ba19830f23fde73783d9c3044a5864bdee29e13c867a5382c:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 51GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:h200x2-latency-fp8-gljasu2ggw
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 H200x2 FP8 Latency
          ngcMetadata:
            e4f217a5fb016b570e34b8a8eb06051ccfef9534ba43da973bb7f678242eaa5f:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 49GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 GH200_480GBx1 FP8 Throughput
          ngcMetadata:
            f49b49f3d90159a594def51efd8595f1d618e288bca2721fe08e786a1ac67d04:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: GH200_480GB
                gpu_device: 2342:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: GH200_480GB
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2342:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 Generic NVIDIA GPUx8 BF16
          ngcMetadata:
            1d7b604f835f74791e6bfd843047fc00a5aef0f72954ca48ce963811fb6f3f09:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '8'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 8
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 Generic NVIDIA GPUx2 BF16
          ngcMetadata:
            375dc0ff86133c2a423fbe9ef46d8fdf12d6403b3caa3b8e70d7851a89fc90dd:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '2'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 2
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
        - profileId: nim/nvidia/llama-3.3-nemotron-super-49b-v1:hf-1a2cb80-nim-tool-use
          framework: TensorRT-LLM
          displayName: Llama 3.3 Nemotron Super 49B V1 Generic NVIDIA GPUx4 BF16
          ngcMetadata:
            54946b08b79ecf9e7f2d5c000234bf2cce19c8fee21b243c1a084b03897e8c95:
              model: nvidia/llama-3.3-nemotron-super-49b-v1
              release: 1.8.6
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '4'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 4
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 93GB
  labels:
    - Llama
    - Chatbots
    - Virtual Assistants
    - Large Language Model
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: Llama 3.1 Nemotron Nano 8b V1
  displayName: Llama 3.1 Nemotron Nano 8b V1
  modelHubID: llama-3.1-nemotron-nano-8b-v1
  category: Chatbots
  type: NGC
  description: Llama 3.1 Nemotron Nano 8B or 4B is a language model that can follow instructions, complete requests, and generate creative text formats.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://llama.meta.com/llama3/use-policy/
    - label: License Agreement
      url: https://llama.meta.com/llama3/license/
  modelVariants:
    - variantId: Llama 3.1 Nemotron Nano 8b V1
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers/llama-3.1-nemotron-nano-8b-v1
      optimizationProfiles:
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:a100x2-latency-bf16-zxsnn7zu2g
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 A100x2 BF16 Latency
          ngcMetadata:
            2146fcf18ea0412d564c6ed21d2f727281b95361fd78ccfa3d0570ec1716e8db:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:a100x1-throughput-bf16-jfn07bk9ua
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 A100x1 BF16 Throughput
          ngcMetadata:
            222d1729a785201e8a021b226d74d227d01418c41b556283ee1bdbf0a818bd94:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:hf-25.03.17-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H100_NVLx1 BF16 Throughput
          ngcMetadata:
            25b5e251d366671a4011eaada9872ad1d02b48acc33aa0637853a3e3c3caa516:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:h200x1-throughput-bf16-hqyhv2wimw
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H200x1 BF16 Throughput
          ngcMetadata:
            434e8d336fa23cbe151748d32b71e196d69f20d319ee8b59852a1ca31a48d311:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:hf-25.03.17-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H100_NVLx1 FP8 Throughput
          ngcMetadata:
            5811750e70b7e9f340f4d670c72fcbd5282e254aeb31f62fd4f937cfb9361007:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:h200x2-latency-bf16-q6opgs6yja
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H200x2 BF16 Latency
          ngcMetadata:
            6832a9395f54086162fd7b1c6cfaae17c7d1e535a60e2b7675504c9fc7b57689:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:h100x2-latency-fp8-zsiywmloya
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H100x2 FP8 Latency
          ngcMetadata:
            6c3f01dd2b2a56e3e83f70522e4195d3f2add70b28680082204bbb9d6150eb04:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:h100x1-throughput-fp8-5tn9pkgdbq
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H100x1 FP8 Throughput
          ngcMetadata:
            7b508014e846234db3cabe5c9f38568b4ee96694b60600a0b71c621dc70cacf3:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:l40sx4-latency-bf16-k3y094rsxq
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 L40Sx4 BF16 Latency
          ngcMetadata:
            844ebe2b42df8de8ce66cbb6ecf43f90858ea7efc14ddf020cf1ae7450ae0c33:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 19GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:a10gx2-throughput-bf16-htgj9vhmiw
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 A10Gx2 BF16 Throughput
          ngcMetadata:
            8a62b002be0b7f82c407e5ed45c50dabe654deca052b521a920682f918323d0d:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2237:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:l40sx2-throughput-bf16-qivaletdla
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 L40Sx2 BF16 Throughput
          ngcMetadata:
            973a6bfbfc5d13fc5eb18f5011fab777a5bd257d5807e97f842a3364e82160dc:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:hf-25.03.17-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H100_NVLx2 FP8 Latency
          ngcMetadata:
            a00ce1e782317cd19ed192dcb0ce26ab8b0c1da8928c33de8893897888ff7580:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:l40sx1-throughput-bf16-anodjae0ya
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 L40Sx1 BF16 Throughput
          ngcMetadata:
            ac5071bbd91efcc71dc486fcd5210779570868b3b8328b4abf7a408a58b5e57c:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:l40sx1-throughput-fp8-dbamkqep8q
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 L40Sx1 FP8 Throughput
          ngcMetadata:
            ad17776f4619854fccd50354f31132a558a1ca619930698fd184d6ccf5fe3c99:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:h200x1-throughput-fp8-mafkx9-zmq
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H200x1 FP8 Throughput
          ngcMetadata:
            af876a179190d1832143f8b4f4a71f640f3df07b0503259cedee3e3a8363aa96:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:h100x2-latency-bf16-iq2eo5lxgw
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H100x2 BF16 Latency
          ngcMetadata:
            b3d535c0a7eaaea089b087ae645417c0b32fd01e7e9d638217cc032e51e74fd0:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:hf-25.03.17-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H100_NVLx2 BF16 Latency
          ngcMetadata:
            b7fad3b35b07d623fac6549078305b71d0e6e1d228a86fa0f7cfe4dbeca9151a:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:l40sx2-latency-fp8-hkd8uidneq
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 L40Sx2 FP8 Latency
          ngcMetadata:
            c4ff823a8202af4b523274fb8c6cdd73fa8ee5af16391a6d36b17f714a3c71a0:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:h200x2-latency-fp8-a3-t7tca3g
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H200x2 FP8 Latency
          ngcMetadata:
            e4f217a5fb016b570e34b8a8eb06051ccfef9534ba43da973bb7f678242eaa5f:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:h100x1-throughput-bf16-iugafozvdq
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 H100x1 BF16 Throughput
          ngcMetadata:
            e7dbd9a8ce6270d2ec649a0fecbcae9b5336566113525f20aee3809ba5e63856:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:l40sx2-latency-bf16-z1ujefobmq
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 L40Sx2 BF16 Latency
          ngcMetadata:
            fa36c3502e92c50f78a1906242f929864955e702b7dbfbdb19758fb7ee9aa811:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:hf-25.03.17-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 Generic NVIDIA GPUx2 BF16
          ngcMetadata:
            375dc0ff86133c2a423fbe9ef46d8fdf12d6403b3caa3b8e70d7851a89fc90dd:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '2'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 2
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:hf-25.03.17-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 Generic NVIDIA GPUx4 BF16
          ngcMetadata:
            54946b08b79ecf9e7f2d5c000234bf2cce19c8fee21b243c1a084b03897e8c95:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '4'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 4
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/nvidia/llama-3.1-nemotron-nano-8b-v1:hf-25.03.17-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 8B V1 Generic NVIDIA GPUx1 BF16
          ngcMetadata:
            ac34857f8dcbd174ad524974248f2faf271bd2a0355643b2cf1490d0fe7787c2:
              model: nvidia/llama-3.1-nemotron-nano-8b-v1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '1'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 15GB
    - variantId: Llama 3.1 Nemotron Nano 4b V1
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers/llama3.1-nemotron-nano-4b-v1.1
      optimizationProfiles:
        - profileId: nim/nvidia/llama3.1-nemotron-nano-4b-v1.1:hf-9f834a8-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 4B V1.1 Generic NVIDIA GPUx2 BF16
          ngcMetadata:
            375dc0ff86133c2a423fbe9ef46d8fdf12d6403b3caa3b8e70d7851a89fc90dd:
              model: nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '2'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 2
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/nvidia/llama3.1-nemotron-nano-4b-v1.1:hf-9f834a8-fix-checksum
          framework: TensorRT-LLM
          displayName: Llama 3.1 Nemotron Nano 4B V1.1 Generic NVIDIA GPUx1 BF16
          ngcMetadata:
            ac34857f8dcbd174ad524974248f2faf271bd2a0355643b2cf1490d0fe7787c2:
              model: nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1
              release: 1.8.4
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '1'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.4
            - key: DOWNLOAD SIZE
              value: 9GB
  labels:
    - Llama
    - Meta
    - Text Generation
    - Large Language Model
    - NVIDIA Validated
    - Nemo
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: Llama 3.1 Instruct
  displayName: Llama 3.1 Instruct
  modelHubID: llama-3.1-instruct
  category: Text Generation
  type: NGC
  description: The Llama 3.1 70B-Instruct, 8B instruct and 8B base NIM simplifies the deployment of the Llama 3.1 70B-Instruct, 8B instruct and 8B base tuned models which is optimized for language understanding, reasoning, and text generation use cases, and outperforms many of the available open source chat models on common industry benchmarks.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://llama.meta.com/llama3/use-policy/
    - label: License Agreement
      url: https://llama.meta.com/llama3/license/
  modelVariants:
    - variantId: Llama 3.1 70B Instruct
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/llama-3_1-70b-instruct-nemo
      optimizationProfiles:
        - profileId: nim/meta/llama-3_1-70b-instruct:a100x4-throughput-bf16-w0uzw1gkbg
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct A100x4 BF16 Throughput
          ngcMetadata:
            00e6f59e1003f038ecee8e9aa3ab2d40745bef214c476a381b21886dd8383952:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 140GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h200x4-latency-fp8-ihwbqzj9ow
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H200x4 FP8 Latency
          ngcMetadata:
            13a9a5e5b372db6e92ecd2523a1a5d8b8f6ebd3fa8849608481e05a596a38d9e:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100_NVLx8 BF16 Latency
          ngcMetadata:
            144fcde387869e92dfec8597f477ad671ee4424269e3e25cd16037c721bf925d:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100_NVLx4 BF16 Throughput
          ngcMetadata:
            14654290e66815c15ef45c507c483a4bcc3a22fcc11a479083bce0a14b743b71:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h100x8-latency-fp8-aecgnfbvhg
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100x8 FP8 Latency
          ngcMetadata:
            233973ff86b33b1076b8d8dfbf1b1c292ad224ae2d9c8b18f28a44b6f6f42768:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 70GB
        - profileId: nim/meta/llama-3_1-70b-instruct:b200x2-throughput-fp8-xykupukdga
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct B200x2 FP8 Throughput
          ngcMetadata:
            26bd84b107a99415b474267bec4cbcf932fbb28e45d7fb4e4db2971506825888:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 68GB
        - profileId: nim/meta/llama-3_1-70b-instruct:l40sx4-throughput-fp8-uw2s64w-qg
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct L40Sx4 FP8 Throughput
          ngcMetadata:
            3d0e5989f2fbc23e7d4504cd69269c9636deb61d0efc12225d3d59d54afea297:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:b200x2-latency-fp8-mkjcj1u-4g
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct B200x2 FP8 Latency
          ngcMetadata:
            4950d30811e1e426e97cda69e6c03a8a4819db8aa4abf34722ced4542a1f6b52:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100_NVLx2 FP8 Throughput
          ngcMetadata:
            4c538175eb36814513f5c95c115c8ed15273f0cffda9d2d355a17f0f311f2fbd:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100_NVLx8 FP8 Latency
          ngcMetadata:
            582fd7bfbe504eb5ee4ded5254cced1d83ea2682a91b6dd6610af842be947ecc:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3_1-70b-instruct:l40sx8-throughput-bf16-p1mhasfmgw
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct L40Sx8 BF16 Throughput
          ngcMetadata:
            60b95dfcc3a17bf00cabb2da1a264f5e8757763d0ebe2a3a073c5c0fc7c078ec:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 151GB
        - profileId: nim/meta/llama-3_1-70b-instruct:a100x8-latency-bf16-b0vhtvjbxa
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct A100x8 BF16 Latency
          ngcMetadata:
            646e2eff5f305302c1cd5fe873ef7c8172021d9948157163761817c4e36352d7:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 150GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100_NVLx4 FP8 Throughput
          ngcMetadata:
            6708ebad5077e24eaff0eabce1134feb16b2a35d2313567b94e3f27479a90544:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h100x4-latency-fp8-a-vzebrdia
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100x4 FP8 Latency
          ngcMetadata:
            6d6d2aebdecec52d7982746f98b00421cf53e10295a9ac7f993e4554fa164d10:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h200x4-throughput-bf16-srhrzg3ziw
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H200x4 BF16 Throughput
          ngcMetadata:
            6dc00fc21eb6d8de62d35c96eed22174e205fdb3db816dbe547deeb37fbdd9a8:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h100x8-latency-bf16-z0pmfdyj0g
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100x8 BF16 Latency
          ngcMetadata:
            758482618a1f166cc4e620228600410a6f05649a05c1838d5a93572d44289b95:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 147GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100_NVLx4 FP8 Latency
          ngcMetadata:
            76fc388794dc368145a440d16d72c0ba70e4aecac09901fe4a2c06a767c7eb0d:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h100x4-throughput-fp8-xuwerjgoba
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100x4 FP8 Throughput
          ngcMetadata:
            7d8a02f47911fb7ddf1a6f6b09438f621b6057cb21098999484f09d5a5bb7b23:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h100x2-throughput-fp8-bgcmw4su3w
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100x2 FP8 Throughput
          ngcMetadata:
            7ee2258631ed9d51ebfe5ab44bd547ae5777217686d87cc89c15d06ccdca4047:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h100x4-throughput-bf16-mimvphw4mg
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100x4 BF16 Throughput
          ngcMetadata:
            7f99ed5107c79b938b0ef4fcf2dd21aac27281f71d41a0a7c46d649879d374f0:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 138GB
        - profileId: nim/meta/llama-3_1-70b-instruct:b200x1-throughput-fp8-pqzaqotuoq
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct B200x1 FP8 Throughput
          ngcMetadata:
            8b87146e39b0305ae1d73bc053564d1b4b4c565f81aa5abe3e84385544ca9b60:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 68GB
        - profileId: nim/meta/llama-3_1-70b-instruct:a10gx8-throughput-bf16-iklvwtod4w
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct A10Gx8 BF16 Throughput
          ngcMetadata:
            935ec3ac922bf54106311dfc6b3214a1651a26033b4f5007b6351fffb4058b7a:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2237:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 150GB
        - profileId: nim/meta/llama-3_1-70b-instruct:b200x4-latency-fp8-j5gt1gjpha
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct B200x4 FP8 Latency
          ngcMetadata:
            9527145a2d1316a1e55581d1f6b0a45e394fe37b853ec5172dea14c2c9767d96:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h200x4-latency-bf16-xsddatmm2w
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H200x4 BF16 Latency
          ngcMetadata:
            99142c13a095af184ae20945a208a81fae8d650ac0fd91747b03148383f882cf:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:b200x4-throughput-bf16-jpnclv9i-w
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct B200x4 BF16 Throughput
          ngcMetadata:
            9b4836e143f78d245cf161c16a225be11d3e8f9b2024b99dd76e5b2ac6cd7efd:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h200x1-throughput-fp8-e2wepn6pma
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H200x1 FP8 Throughput
          ngcMetadata:
            af876a179190d1832143f8b4f4a71f640f3df07b0503259cedee3e3a8363aa96:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 68GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h200x2-throughput-bf16-qe3ldz912g
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H200x2 BF16 Throughput
          ngcMetadata:
            b407d3df1db123ba8a4c98fb9f73790c01cd53a70fa0e0185814ad57a17cb72b:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h200x2-throughput-fp8-qaimoqhvoq
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H200x2 FP8 Throughput
          ngcMetadata:
            c91a755246cb08dd9aa6905bc40b7db552071d141a850be5a791b06eb4fb2ef8:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h100x8-throughput-bf16-o1xmgf-zsg
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100x8 BF16 Throughput
          ngcMetadata:
            d128c772583bd10da4f31bf8e961893eb2b62363f3cecb94b5ef67d8bbd54665:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 147GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H100_NVLx8 BF16 Throughput
          ngcMetadata:
            d14fa7bd1f4287e74b856fe3f0030312cc4d03b8fe35a8c8aaedf0140ac55067:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3_1-70b-instruct:b200x2-throughput-bf16-mcpnmtluwq
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct B200x2 BF16 Throughput
          ngcMetadata:
            d33e8144476992a7d8d621d8e50cf66b89d254dc721aa2782e5a5a6f07b1af80:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct A100_SXM4_40GBx8 BF16 Throughput
          ngcMetadata:
            dc0f5f87ca37f69af7f525ac293c599cd0cbdaf8130da4d9e2ad63d376b12039:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: A100_SXM4_40GB
                gpu_device: 20b0:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100_SXM4_40GB
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 20B0:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3_1-70b-instruct:h200x2-latency-fp8-gi3mfprtxq
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct H200x2 FP8 Latency
          ngcMetadata:
            e4f217a5fb016b570e34b8a8eb06051ccfef9534ba43da973bb7f678242eaa5f:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:b200x4-latency-bf16-la2mlox8dg
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct B200x4 BF16 Latency
          ngcMetadata:
            f17543bf1ee65e4a5c485385016927efe49cbc068a6021573d83eacb32537f76:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct Generic NVIDIA GPUx8 BF16
          ngcMetadata:
            1d7b604f835f74791e6bfd843047fc00a5aef0f72954ca48ce963811fb6f3f09:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '8'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 8
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct Generic NVIDIA GPUx2 BF16
          ngcMetadata:
            375dc0ff86133c2a423fbe9ef46d8fdf12d6403b3caa3b8e70d7851a89fc90dd:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '2'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 2
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
        - profileId: nim/meta/llama-3_1-70b-instruct:hf-1d54af3-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 70B Instruct Generic NVIDIA GPUx4 BF16
          ngcMetadata:
            54946b08b79ecf9e7f2d5c000234bf2cce19c8fee21b243c1a084b03897e8c95:
              model: meta/llama-3.1-70b-instruct
              release: 1.8.5
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '4'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 4
            - key: NIM VERSION
              value: 1.8.5
            - key: DOWNLOAD SIZE
              value: 132GB
    - variantId: Llama 3.1 8B Instruct
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/llama-3_1-8b-instruct-nemo
      optimizationProfiles:
        - profileId: nim/meta/llama-3.1-8b-instruct:a100x2-latency-bf16-dxn2qkwphq
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct A100x2 BF16 Latency
          ngcMetadata:
            2146fcf18ea0412d564c6ed21d2f727281b95361fd78ccfa3d0570ec1716e8db:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/meta/llama-3.1-8b-instruct:a100x1-throughput-bf16-wlgvs1umtg
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct A100x1 BF16 Throughput
          ngcMetadata:
            222d1729a785201e8a021b226d74d227d01418c41b556283ee1bdbf0a818bd94:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/meta/llama-3.1-8b-instruct:hf-8c22764-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H100_NVLx1 BF16 Throughput
          ngcMetadata:
            25b5e251d366671a4011eaada9872ad1d02b48acc33aa0637853a3e3c3caa516:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/meta/llama-3.1-8b-instruct:h200x1-throughput-bf16-o8q-ystghg
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H200x1 BF16 Throughput
          ngcMetadata:
            434e8d336fa23cbe151748d32b71e196d69f20d319ee8b59852a1ca31a48d311:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/meta/llama-3.1-8b-instruct:b200x2-latency-fp8-jg84ho12tg
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct B200x2 FP8 Latency
          ngcMetadata:
            4950d30811e1e426e97cda69e6c03a8a4819db8aa4abf34722ced4542a1f6b52:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/meta/llama-3.1-8b-instruct:hf-8c22764-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H100_NVLx1 FP8 Throughput
          ngcMetadata:
            5811750e70b7e9f340f4d670c72fcbd5282e254aeb31f62fd4f937cfb9361007:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/meta/llama-3.1-8b-instruct:h200x2-latency-bf16-1to0kzerqq
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H200x2 BF16 Latency
          ngcMetadata:
            6832a9395f54086162fd7b1c6cfaae17c7d1e535a60e2b7675504c9fc7b57689:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/meta/llama-3.1-8b-instruct:h100x2-latency-fp8-ebdyccaccw
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H100x2 FP8 Latency
          ngcMetadata:
            6c3f01dd2b2a56e3e83f70522e4195d3f2add70b28680082204bbb9d6150eb04:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/meta/llama-3.1-8b-instruct:h100x1-throughput-fp8-qaruykck6q
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H100x1 FP8 Throughput
          ngcMetadata:
            7b508014e846234db3cabe5c9f38568b4ee96694b60600a0b71c621dc70cacf3:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/meta/llama-3.1-8b-instruct:l40sx4-latency-bf16-zffhxt5r8w
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct L40Sx4 BF16 Latency
          ngcMetadata:
            844ebe2b42df8de8ce66cbb6ecf43f90858ea7efc14ddf020cf1ae7450ae0c33:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 19GB
        - profileId: nim/meta/llama-3.1-8b-instruct:b200x1-throughput-fp8-y2ykzepf-w
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct B200x1 FP8 Throughput
          ngcMetadata:
            8b87146e39b0305ae1d73bc053564d1b4b4c565f81aa5abe3e84385544ca9b60:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/meta/llama-3.1-8b-instruct:l40sx2-throughput-bf16-bvf3cu1zyg
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct L40Sx2 BF16 Throughput
          ngcMetadata:
            973a6bfbfc5d13fc5eb18f5011fab777a5bd257d5807e97f842a3364e82160dc:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/meta/llama-3.1-8b-instruct:hf-8c22764-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H100_NVLx2 FP8 Latency
          ngcMetadata:
            a00ce1e782317cd19ed192dcb0ce26ab8b0c1da8928c33de8893897888ff7580:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/meta/llama-3.1-8b-instruct:b200x1-throughput-bf16-hirlxvtedg
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct B200x1 BF16 Throughput
          ngcMetadata:
            a4c63a91bccf635b570ddb6d14eeb6e7d0acb2389712892b08d21fad2ceaee38:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/meta/llama-3.1-8b-instruct:l40sx1-throughput-bf16-twgrayi1-g
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct L40Sx1 BF16 Throughput
          ngcMetadata:
            ac5071bbd91efcc71dc486fcd5210779570868b3b8328b4abf7a408a58b5e57c:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/meta/llama-3.1-8b-instruct:l40sx1-throughput-fp8-ao4io-s5ow
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct L40Sx1 FP8 Throughput
          ngcMetadata:
            ad17776f4619854fccd50354f31132a558a1ca619930698fd184d6ccf5fe3c99:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/meta/llama-3.1-8b-instruct:h200x1-throughput-fp8-rauvqtnsoq
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H200x1 FP8 Throughput
          ngcMetadata:
            af876a179190d1832143f8b4f4a71f640f3df07b0503259cedee3e3a8363aa96:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/meta/llama-3.1-8b-instruct:h100x2-latency-bf16-lsdwkmc2tg
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H100x2 BF16 Latency
          ngcMetadata:
            b3d535c0a7eaaea089b087ae645417c0b32fd01e7e9d638217cc032e51e74fd0:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/meta/llama-3.1-8b-instruct:hf-8c22764-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H100_NVLx2 BF16 Latency
          ngcMetadata:
            b7fad3b35b07d623fac6549078305b71d0e6e1d228a86fa0f7cfe4dbeca9151a:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/meta/llama-3.1-8b-instruct:l40sx2-latency-fp8-tu6g7vu05a
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct L40Sx2 FP8 Latency
          ngcMetadata:
            c4ff823a8202af4b523274fb8c6cdd73fa8ee5af16391a6d36b17f714a3c71a0:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/meta/llama-3.1-8b-instruct:h200x2-latency-fp8-wlbch9i5qg
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H200x2 FP8 Latency
          ngcMetadata:
            e4f217a5fb016b570e34b8a8eb06051ccfef9534ba43da973bb7f678242eaa5f:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/meta/llama-3.1-8b-instruct:h100x1-throughput-bf16-jiqjalwe7w
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct H100x1 BF16 Throughput
          ngcMetadata:
            e7dbd9a8ce6270d2ec649a0fecbcae9b5336566113525f20aee3809ba5e63856:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/meta/llama-3.1-8b-instruct:b200x2-latency-bf16-vunbkvteua
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct B200x2 BF16 Latency
          ngcMetadata:
            f44768c625db71a327cf17e750d5e1a8e60171a8d8ef6b4c1c4b57fe74c9bf46:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/meta/llama-3.1-8b-instruct:l40sx2-latency-bf16-jrvo7gca9g
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct L40Sx2 BF16 Latency
          ngcMetadata:
            fa36c3502e92c50f78a1906242f929864955e702b7dbfbdb19758fb7ee9aa811:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/meta/llama-3.1-8b-instruct:hf-8c22764-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct GH200_480GBx1 FP8 Throughput
          ngcMetadata:
            f49b49f3d90159a594def51efd8595f1d618e288bca2721fe08e786a1ac67d04:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: GH200_480GB
                gpu_device: 2342:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: GH200_480GB
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2342:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/meta/llama-3.1-8b-instruct:hf-8c22764-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct GH200_480GBx1 BF16 Throughput
          ngcMetadata:
            f7f74ecd523cd63065a50016a8786a893b9b1efe0d313bc5bcc54682f56e55fe:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: GH200_480GB
                gpu_device: 2342:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: GH200_480GB
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2342:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/meta/llama-3.1-8b-instruct:a10gx2-throughput-bf16-7c0u0kiiqw
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct A10Gx2 BF16 Throughput
          ngcMetadata:
            8a62b002be0b7f82c407e5ed45c50dabe654deca052b521a920682f918323d0d:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2237:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/meta/llama-3.1-8b-instruct:hf-8c22764-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct Generic NVIDIA GPUx2 BF16
          ngcMetadata:
            375dc0ff86133c2a423fbe9ef46d8fdf12d6403b3caa3b8e70d7851a89fc90dd:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '2'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 2
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/meta/llama-3.1-8b-instruct:hf-8c22764-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct Generic NVIDIA GPUx4   BF16
          ngcMetadata:
            54946b08b79ecf9e7f2d5c000234bf2cce19c8fee21b243c1a084b03897e8c95:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '4'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 4
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/meta/llama-3.1-8b-instruct:hf-8c22764-nim1.3b
          framework: TensorRT-LLM
          displayName: Llama 3.1 8B Instruct Generic NVIDIA GPUx1 BF16
          ngcMetadata:
            ac34857f8dcbd174ad524974248f2faf271bd2a0355643b2cf1490d0fe7787c2:
              model: meta/llama-3.1-8b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '1'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 15GB
  labels:
    - Llama
    - Meta
    - Text Generation
    - Large Language Model
    - TensorRT-LLM
    - Language Generation
    - NeMo
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: StarCoder2-7B
  displayName: StarCoder2-7B
  modelHubID: starcoder2-7b
  category: Language Model
  type: NGC
  description: StarCoder2-7B is a language model that can follow instructions, complete requests, and generate creative text formats.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://llama.meta.com/llama3/use-policy/
    - label: License Agreement
      url: https://llama.meta.com/llama3/license/
  modelVariants:
    - variantId: StarCoder2-7B
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/bigcode/containers/starcoder2-7b
      optimizationProfiles:
        - profileId: nim/bigcode/starcoder2-7b:hf-bb9afde
          framework: TensorRT-LLM
          displayName: StarCoder2-7B A10Gx2 BF16
          ngcMetadata:
            375dc0ff86133c2a423fbe9ef46d8fdf12d6403b3caa3b8e70d7851a89fc90dd:
              model: bigcode/starcoder2-7b
              release: 1.8.1
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                gpu: A10G
                gpu_device: 2237:10de
                pp: '1'
                precision: bf16
                tp: '2'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: GPU DEVICE
              value: 2237:10de
            - key: COUNT
              value: 2
            - key: NIM VERSION
              value: 1.8.1
            - key: DOWNLOAD SIZE
              value: 14GB
        - profileId: nim/bigcode/starcoder2-7b:h100x2-latency-fp8-zxtdqz4nva
          framework: TensorRT-LLM
          displayName: StarCoder2-7B H100x2 FP8 Latency
          ngcMetadata:
            6c3f01dd2b2a56e3e83f70522e4195d3f2add70b28680082204bbb9d6150eb04:
              model: bigcode/starcoder2-7b
              release: 1.8.1
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.8.1
            - key: DOWNLOAD SIZE
              value: 8GB
        - profileId: nim/bigcode/starcoder2-7b:h100x1-throughput-fp8-gxzrmbzlca
          framework: TensorRT-LLM
          displayName: StarCoder2-7B H100 FP8 Throughput
          ngcMetadata:
            7b508014e846234db3cabe5c9f38568b4ee96694b60600a0b71c621dc70cacf3:
              model: bigcode/starcoder2-7b
              release: 1.8.1
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.8.1
            - key: DOWNLOAD SIZE
              value: 8GB
        - profileId: nim/bigcode/starcoder2-7b:hf-bb9afde
          framework: TensorRT-LLM
          displayName: StarCoder2-7B L40S BF16
          ngcMetadata:
            ac34857f8dcbd174ad524974248f2faf271bd2a0355643b2cf1490d0fe7787c2:
              model: bigcode/starcoder2-7b
              release: 1.8.1
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                gpu: L40S
                gpu_device: 26b9:10de
                pp: '1'
                precision: bf16
                tp: '1'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: GPU DEVICE
              value: 26b9:10de
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.1
            - key: DOWNLOAD SIZE
              value: 14GB
        - profileId: nim/bigcode/starcoder2-7b:h100x2-latency-bf16-tqld74axpq
          framework: TensorRT-LLM
          displayName: StarCoder2-7B H100x2 BF16 Latency
          ngcMetadata:
            b3d535c0a7eaaea089b087ae645417c0b32fd01e7e9d638217cc032e51e74fd0:
              model: bigcode/starcoder2-7b
              release: 1.8.1
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.8.1
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/bigcode/starcoder2-7b:h100x1-throughput-bf16-bouv9kemrw
          framework: TensorRT-LLM
          displayName: StarCoder2-7B H100 BF16 Throughput
          ngcMetadata:
            e7dbd9a8ce6270d2ec649a0fecbcae9b5336566113525f20aee3809ba5e63856:
              model: bigcode/starcoder2-7b
              release: 1.8.1
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.8.1
            - key: DOWNLOAD SIZE
              value: 15GB
  labels:
    - bigCode
    - StarCoder
    - "Code Generation"
    - "Text Generation"
    - "Multilingual support"
    - Large Language Model
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: Mistral Instruct
  displayName: Mistral Instruct
  modelHubID: mistral-instruct
  category: Text Generation
  type: NGC
  description: Mistral Instruct is a language model that can follow instructions, complete requests, and generate creative text formats. The Mistral Instract Large Language Model (LLM) is an instruct fine-tuned version of the Mistral.
  modelVariants:
    - variantId: Mistral 7B Instruct
      displayName: Mistral 7B Instruct
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/mistralai/containers/mistral-7b-instruct-v0.3
      optimizationProfiles:
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-h100x1-bf16-throughput.1.3.655593
          displayName: Mistral 7B Instruct H100 BF16 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            ed4af8b6563348d37f72bfd013be44573a1c88f384ef8fb3eaf0c69e4f235c20:
              container_url: nvcr.io/nim/mistralai/mistral-7b-instruct-v03:1.1.2
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 14GB
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-l40sx1-bf16-throughput.1.3.655593
          displayName: Mistral 7B Instruct L40S BF16 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            8af967d80ae8f30f4635a59b2140fdc2b38d3004e16e66c9667fa032e56497fd:
              container_url: nvcr.io/nim/mistralai/mistral-7b-instruct-v03:1.1.2
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b5:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26b5:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 14GB
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-a10gx2-bf16-throughput.1.3.127462
          displayName: Mistral 7B Instruct A10Gx2 BF16 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            57beb7b4f94f72519842de3e1b4cda5ae0774271cf433ff56180551e0f15d0c8:
              container_url: nvcr.io/nim/mistralai/mistral-7b-instruct-v03:1.1.2
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2237:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 14GB
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-l40sx2-fp8-latency.1.3.885640
          displayName: Mistral 7B Instruct L40Sx2 FP8 Latency
          framework: TensorRT-LLM
          ngcMetadata:
            138437d95405e4dad69a8cd4dc6126a2b8fc9254a274af83b1fd0b1b01658b55:
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b5:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26b9:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 8GB
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-l40sx2-bf16-latency.1.3.655593
          displayName: Mistral 7B Instruct L40Sx2 BF16 Latency
          framework: TensorRT-LLM
          ngcMetadata:
            4c50d586aaa9b9a484d5090213be8ff5db7f5b775aa94b66651eac515108f16c:
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b5:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26b9:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-h100x2-bf16-latency.1.3.655593
          displayName: Mistral 7B Instruct H100x2 BF16 Latency
          framework: TensorRT-LLM
          ngcMetadata:
            8c27f77dab1986e76b524c755fa5a809f8882517b503e76bfcf8d42b991adc89:
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-a100x1-bf16-throughput.1.3.127462
          displayName: Mistral 7B Instruct A100 BF16 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            9189d008806a9638d4206e6ff94c0b0d9acc2a8861f6de5a49b9d0a5acdcf049:
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 20b2:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 15GB
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-a10gx4-bf16-latency.1.3.127462
          displayName: Mistral 7B Instruct A10Gx4 BF16 Latency
          framework: TensorRT-LLM
          ngcMetadata:
            9bccc20c28c1728b59cdbad4b2c1607d3b57388ff266da4477ea8a413ae0fb7d:
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2237:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-l40sx1-fp8-throughput.1.3.885640
          displayName: Mistral 7B Instruct L40S FP8 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            f34180a7eb689e915c741cda5ea015ac54b134a73b13b0b2865a5a4e44291a85:
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b5:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26b5:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 8GB
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-h100x1-fp8-throughput.1.3.885640
          displayName: Mistral 7B Instruct H100 FP8 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            f8b5f71dd66c36c70deac7927cbd98b1c4f78caf1abf01f768be7118e1daa278:
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 8GB
        - profileId: nim/mistralai/mistral-7b-instruct-v0-3:0.12+2333135a3-h100x2-fp8-latency.1.3.885640
          displayName: Mistral 7B Instruct H100x2 FP8 Latency
          framework: TensorRT-LLM
          ngcMetadata:
            fa55c825306dfc09c9d0e7ef423e897d91fe8334a3da87d284f45f45cbd4c1b0:
              model: mistralai/mistral-7b-instruct-v0.3
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 8GB
  labels:
    - Mistral
    - Instruct
    - Large Language Model
    - TensorRT-LLM
    - Language Generation
    - NeMo
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: Mistral
  license: NVIDIA AI Foundation Models Community License
- name: Mixtral Instruct
  displayName: Mixtral Instruct
  modelHubID: mixtral-instruct
  category: Text Generation
  type: NGC
  description: The Mixtral Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts model. Mixtral Instruct is a language model that can follow instructions, complete requests, and generate creative text formats. The Mixtral Instruct Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral.
  modelVariants:
    - variantId: Mixtral 8x7B Instruct
      displayName: Mixtral 8x7B Instruct
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/mistralai/containers/mixtral-8x7b-instruct-v01
      optimizationProfiles:
        - profileId: nim/mistralai/mixtral-8x7b-instruct-v01:0.12.0+2333135a3-a10gx8-fp16-throughput.1.3.18301798
          displayName: Mixtral 8x7B Instruct A10Gx8 FP16 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            03501a01c138dcfc63fc672c20053e3fca8d7bdae1f448165d7bed3f241973cf:
              model: mistralai/mixtral-8x7b-instruct-v0.1
              release: 1.3.0
              tags:
                feat_lora: false
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2237:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 89GB
        - profileId: nim/mistralai/mixtral-8x7b-instruct-v01:0.12.0+2333135a3-h100x2-int8wo-throughput.1.3.18301798
          displayName: Mixtral 8x7B Instruct H100x2 int8wo Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            208d53be878cb4d31c9019a80637c54e441e4a4edbee17754d1fc1b0b31b1cc1:
              model: mistralai/mixtral-8x7b-instruct-v0.1
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: int8wo
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: INT8WO
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 48GB
        - profileId: nim/mistralai/mixtral-8x7b-instruct-v01:0.12.0+2333135a3-h100x2-fp16-throughput.1.3.18301798
          displayName: Mixtral 8x7B Instruct H100x2 FP16 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            bbaccf5c5f059943db905cfcb4e9f2e4e83f0da3617abd244b693103d13005f4:
              container_url: nvcr.io/nim/mistralai/mixtral-8x7b-instruct-v01:1.2.1
              model: mistralai/mixtral-8x7b-instruct-v0.1
              release: 1.3.0
              tags:
                feat_lora: false
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 94GB
        - profileId: nim/mistralai/mixtral-8x7b-instruct-v01:0.12.0+2333135a3-l40sx4-fp8-throughput.1.3.18301798
          displayName: Mixtral 8x7B Instruct L40Sx4 FP8 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            4a7fcddcd723f52264e0a9b90b3a17674d1ceb11000aa6dfa50e8a9f1d7c4c8e:
              model: mistralai/mixtral-8x7b-instruct-v0.1
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b5:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 26b5:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 94GB
        - profileId: nim/mistralai/mixtral-8x7b-instruct-v01:0.12.0+2333135a3-l40sx4-fp16-throughput.1.3.18301798
          displayName: Mixtral 8x7B Instruct L40Sx4 FP16 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            536502b5ba23293b7a9bd6dfabd9b93d2d82c8436d0788cc748b28aefd4adf79:
              model: mistralai/mixtral-8x7b-instruct-v0.1
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b5:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 26b5:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 95GB
        - profileId: nim/mistralai/mixtral-8x7b-instruct-v01:0.12.0+2333135a3-h100x4-int8wo-latency.1.3.18301798
          displayName: Mixtral 8x7B Instruct H100 INT8WO Latency
          framework: TensorRT-LLM
          ngcMetadata:
            5cf31967505bc7d4e792563c5521545703cee2be36714b6944e0e33adb70409a:
              model: mistralai/mixtral-8x7b-instruct-v0.1
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: int8wo
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: INT8WO
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 48GB
        - profileId: nim/mistralai/mixtral-8x7b-instruct-v01:0.12.0+2333135a3-h100x4-fp16-latency.1.3.18301798
          displayName: Mixtral 8x7B Instruct H100x4 FP16 Latency
          framework: TensorRT-LLM
          ngcMetadata:
            ed45c32307812aa9b45ef8b3f73d635a4ed8af4ee46ffa09253fc529fbfd55db:
              model: mistralai/mixtral-8x7b-instruct-v0.1
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp16
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 95GB
        - profileId: nim/mistralai/mixtral-8x7b-instruct-v01:0.12.0+2333135a3-h100x4-fp8-latency.1.3.18301798
          displayName: Mixtral 8x7B Instruct H100x4 FP8 Latency
          framework: TensorRT-LLM
          ngcMetadata:
            f255f2c7d6787f8b436aa1a74280ebb1a736fa21ae39fd56aeef92f10f7c9c81:
              model: mistralai/mixtral-8x7b-instruct-v0.1
              release: 1.3.0
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 48GB
    - variantId: Mixtral 8x22B Instruct
      displayName: Mixtral 8x22B Instruct
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/mistralai/containers/mixtral-8x22b-instruct-v01
      optimizationProfiles:
        - profileId: nim/mistralai/mixtral-8x22b-instruct-v01:0.10.1+79a76176-h100x8-int8wo-throughput.1.2.2.16140417
          displayName: Mixtral 8x22B Instruct H100 int8wo Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            4ad9a208ce0f8ec41cd6b8681cd0ddf6fbeb406efb3d9baf6847a3fb8bac5863:
              container_url: nvcr.io/nim/mistralai/mixtral-8x22b-instruct-v01:1.0.0
              model: mistralai/mixtral-8x22b-instruct-v0.1
              model_type: text_generation
              release: 1.0.0
              tags:
                feat_lora: false
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: int8wo
                profile: throughput
                tp: '8'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'README.md'
                        - !name 'checksums.blake3'
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'model.safetensors.index.json'
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer.model'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/mistralai/mixtral-8x22b-instruct-v01:hf-52572b2
                  - dst: trtllm_engine
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'config.json'
                        - !name 'metadata.json'
                        - !name 'rank0.engine'
                        - !name 'rank1.engine'
                        - !name 'rank2.engine'
                        - !name 'rank3.engine'
                        - !name 'rank4.engine'
                        - !name 'rank5.engine'
                        - !name 'rank6.engine'
                        - !name 'rank7.engine'
                        - !name 'trt_llm_config.yaml'
                      repo_id: ngc://nim/mistralai/mixtral-8x22b-instruct-v01:0.10.1+79a76176-h100x8-int8wo-throughput.1.0.0.16140417
          sha: 4ad9a208ce0f8ec41cd6b8681cd0ddf6fbeb406efb3d9baf6847a3fb8bac5863
          modelFormat: trt-llm
          latestVersionSizeInBytes: 144762798586
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: int8wo
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.2.2
            - key: DOWNLOAD SIZE
              value: 144GB
        - profileId: nim/mistralai/mixtral-8x22b-instruct-v01:0.11.1+14957bf8-h100x8-fp16-throughput.1.1.2.17572569
          displayName: Mixtral 8x22B Instruct H100 FP16 Throughput
          framework: TensorRT-LLM
          ngcMetadata:
            e44c755ef6628cccb74ccf58af4a6efa039f7e49e07a9dd7a27eb17f6500964e:
              container_url: nvcr.io/nim/mistralai/mixtral-8x22b-instruct-v01:1.2.2
              model: mistralai/mixtral-8x22b-instruct-v0.1
              release: 1.2.2
              tags:
                feat_lora: false
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '8'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'README.md'
                        - !name 'checksums.blake3'
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'model.safetensors.index.json'
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer.model'
                        - !name 'tokenizer_config.json'
                        - !name 'tool_use_config.json'
                      repo_id: ngc://nim/mistralai/mixtral-8x22b-instruct-v01:hf-1702b01-tool-calling
                  - dst: trtllm_engine
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'config.json'
                        - !name 'metadata.json'
                        - !name 'rank0.engine'
                        - !name 'rank1.engine'
                        - !name 'rank2.engine'
                        - !name 'rank3.engine'
                        - !name 'rank4.engine'
                        - !name 'rank5.engine'
                        - !name 'rank6.engine'
                        - !name 'rank7.engine'
                      repo_id: ngc://nim/mistralai/mixtral-8x22b-instruct-v01:0.11.1+14957bf8-h100x8-fp16-throughput.1.1.2.17572569
          sha: e44c755ef6628cccb74ccf58af4a6efa039f7e49e07a9dd7a27eb17f6500964e
          modelFormat: trt-llm
          latestVersionSizeInBytes: 285170977174
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.2.2
            - key: DOWNLOAD SIZE
              value: 285GB
  labels:
    - Mistral
    - Instruct
    - Large Language Model
    - TensorRT-LLM
    - Language Generation
    - NeMo
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: mistral
  license: NVIDIA AI Foundation Models Community License
- name: Deepseek R1 Distill Llama
  displayName: Deepseek R1 Distill Llama
  modelHubID: deepseek-r1-distill-llama
  category: Chat Assistant
  type: NGC
  description: The DeepSeek-R1-Distill-Llama-70B NIM simplifies the deployment of a distilled version of the DeepSeek-R1 series, built upon the Llama3.3-70B-Instruct architecture. This model is designed to deliver efficient performance for reasoning, math, and code tasks while maintaining high accuracy. By distilling knowledge from the larger DeepSeek-R1 model, it provides state-of-the-art performance with reduced computational requirements.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://llama.meta.com/llama3/use-policy/
    - label: License Agreement
      url: https://llama.meta.com/llama3/license/
  modelVariants:
    - variantId: Deepseek R1 Distill Llama 70b
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/deepseek-ai/containers/deepseek-r1-distill-llama-70b
      optimizationProfiles:
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-70b:l40sx4-throughput-fp8-46u3lvp6ja
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 70B L40Sx4 FP8 Throughput
          ngcMetadata:
            23c28e4a1ad4d963c1504f1a33b45afb65bf61b64b20be1a8ea2c8816ea0fc36:
              model: deepseek-r1-distill-llama-70b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-70b:h100x4-latency-fp8-k5tlofelyw
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 70B H100x4 FP8 Latency
          ngcMetadata:
            4696d5c5b44b13bb5e864affcdcfa30ad229390285476315d9921fd0828bda5b:
              model: deepseek-r1-distill-llama-70b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-70b:h100x8-latency-fp8-xz3eymtuzq
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 70B H100x8 FP8 Latency
          ngcMetadata:
            91f2b7c9e719c0c380ba6c1d6c3e5cad61aaf807730de88fa3b6233a39edeeaa:
              model: deepseek-r1-distill-llama-70b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 70GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-70b:h100x2-throughput-fp8-8cx2penaia
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 70B H100x2 FP8 Throughput
          ngcMetadata:
            da94a5c34cf665e85813fa49f321f1e87ca12317722b5e65628cf3ed0371897b:
              model: deepseek-r1-distill-llama-70b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 69GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-70b:h100x4-throughput-bf16-g31fj2uvrw
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 70B H100x4 BF16 Throughput
          ngcMetadata:
            e6b8fb8c4c76343b05b9051974593e5bd9110a868770d52e8eb0fe5a3b46dd67:
              model: deepseek-r1-distill-llama-70b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 138GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-70b:h100x8-latency-bf16-v8q6jmcd9g
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 70B H100x8 BF16 Latency
          ngcMetadata:
            f87605b6d8cfc0ca39fad21b4ec580219f3a3be42884d2c7caad9b8ae4b3c1c7:
              model: deepseek-r1-distill-llama-70b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '8'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 147GB
    - variantId: Deepseek R1 Distill Llama 8b
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/deepseek-ai/containers/deepseek-r1-distill-llama-8b
      optimizationProfiles:
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:l40sx1-throughput-fp8-vbqc0btoqg
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B L40Sx1 FP8 Throughput
          ngcMetadata:
            d968c663c710e56275088096bc0dcf823560aaf7dca910bfcb41f5056063ab02:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:h100x1-throughput-fp8-d9grrq-lka
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B H100x1 FP8 Throughput
          ngcMetadata:
            0bdec027404c16d6ca96e159079082f9630a24a277ff519d0c8fea71007222ec:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:h100x2-latency-bf16-7ztok5r0dg
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B H100x2 BF16 Latency
          ngcMetadata:
            0ce355335e6c3aec54e49ab53822e628fa1227091d0326da962bcc4f95b5f602:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:a10gx4-latency-bf16-aiejrysrlw
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B A10Gx4 BF16 Latency
          ngcMetadata:
            1dfac8e12042573dc93536a393902478e1a6a46d1cd742cf0a4251c11f77e253:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '4'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '4'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2237:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 19GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:l40sx2-latency-fp8-fmuoxfbb0q
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B L40Sx2 FP8 Latency
          ngcMetadata:
            c2d4efce2d553c3aa78109b6d5dff0fd34b86bbb3b765aa8afdf12e9d13e8e83:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:h100x1-throughput-bf16-4jcstzx27q
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B H100x1 BF16 Throughput
          ngcMetadata:
            4f6dba657c08280bdb419cbc1c60d265e82731b807ee2ae3c111cb9a91571aa1:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:h100x2-latency-fp8-q8xwzp22aa
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B H100x2 FP8 Latency
          ngcMetadata:
            518edac01f731b63676743a1860fe21861d1399b19cb2e584de3d9a6a3ea6d8e:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 9GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:l40sx1-throughput-bf16-yvbnwvfzew
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B L40Sx1 BF16 Throughput
          ngcMetadata:
            9bc8e8aa12847674fa2840b9c03cbdb0246d7f144a5257510fd53eacc2a9d62f:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:a100x1-throughput-bf16-iq9maz9nkw
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B A100x1 BF16 Throughput
          ngcMetadata:
            c959aa89b69ad9295ccc99a34546819d16bb0e2566a6cfed0985eecf37bcc14b:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 16GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:l40sx2-latency-bf16-tlmx3sgrdw
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B L40Sx2 BF16 Latency
          ngcMetadata:
            20d6bb61a1ee5160c0baed3721f8b580525a0aaaaa3b1333e9a882d4c61b1ed7:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: LATENCY
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 17GB
        - profileId: nim/deepseek-ai/deepseek-r1-distill-llama-8b:a10gx2-throughput-bf16-uv8ptkf8-g
          framework: TensorRT-LLM
          displayName: Deepseek R1 Distill Llama 8B A10Gx2 BF16 Throughput
          ngcMetadata:
            edbb37d3ef94a5cc38919ab86694b835307c0668ca6d41ea746796b34ced78f1:
              model: deepseek-ai/deepseek-r1-distill-llama-8b
              release: 1.5.2
              tags:
                feat_lora: 'false'
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '2'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2237:10DE
            - key: NIM VERSION
              value: 1.5.2
            - key: DOWNLOAD SIZE
              value: 17GB
  labels:
    - Deepseek
    - Distill
    - Llama
    - Meta
    - Chat
    - Large Language Model
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: Llama 3.2 Instruct
  displayName: Llama 3.2 Instruct
  modelHubID: meta/llama-3.2-instruct
  category: Commercial and Research
  type: NGC
  description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pre-trained and instruction-tuned generative models in 1B and 3B sizes (text in/text out).
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://llama.meta.com/llama3_2/use-policy/
    - label: License Agreement
      url: https://llama.meta.com/llama3_2/license/
  modelVariants:
    - variantId: Llama 3.2 1B Instruct
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/llama-3_2-1b-instruct
      optimizationProfiles:
        - profileId: nim/meta/llama-3.2-1b-instruct:hf-e9f8eff-nim1.5+
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct GH200_480GBx1 BF16 Throughput
          ngcMetadata:
            f7f74ecd523cd63065a50016a8786a893b9b1efe0d313bc5bcc54682f56e55fe:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: GH200_480GB
                gpu_device: 2342:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: GH200_480GB
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2342:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
        - profileId: nim/meta/llama-3.2-1b-instruct:hf-e9f8eff-nim1.5+
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct GH200_480GBx1 FP8 Throughput
          ngcMetadata:
            f49b49f3d90159a594def51efd8595f1d618e288bca2721fe08e786a1ac67d04:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: GH200_480GB
                gpu_device: 2342:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: GH200_480GB
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2342:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
        - profileId: nim/meta/llama-3.2-1b-instruct:a100x1-throughput-bf16-a2zlotpozq
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct A100x1 BF16 Throughput
          ngcMetadata:
            222d1729a785201e8a021b226d74d227d01418c41b556283ee1bdbf0a818bd94:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
        - profileId: nim/meta/llama-3.2-1b-instruct:hf-e9f8eff-nim1.5+
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct H100_NVLx1 BF16 Throughput
          ngcMetadata:
            25b5e251d366671a4011eaada9872ad1d02b48acc33aa0637853a3e3c3caa516:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
        - profileId: nim/meta/llama-3.2-1b-instruct:h200x1-throughput-bf16-pkk6mlb47w
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct H200x1 BF16 Throughput
          ngcMetadata:
            434e8d336fa23cbe151748d32b71e196d69f20d319ee8b59852a1ca31a48d311:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
        - profileId: nim/meta/llama-3.2-1b-instruct:hf-e9f8eff-nim1.5+
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct H100_NVLx1 FP8 Throughput
          ngcMetadata:
            5811750e70b7e9f340f4d670c72fcbd5282e254aeb31f62fd4f937cfb9361007:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
        - profileId: nim/meta/llama-3.2-1b-instruct:a10gx1-throughput-bf16-orfmmsdx5a
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct A10Gx1 BF16 Throughput
          ngcMetadata:
            74bfd8b2df5eafe452a9887637eef4820779fb4e1edb72a4a7a2a1a2d1e6480b:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2237:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 4GB
        - profileId: nim/meta/llama-3.2-1b-instruct:h100x1-throughput-fp8-u882rrmxeg
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct H100x1 FP8 Throughput
          ngcMetadata:
            7b508014e846234db3cabe5c9f38568b4ee96694b60600a0b71c621dc70cacf3:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 2GB
        - profileId: nim/meta/llama-3.2-1b-instruct:b200x1-throughput-fp8-qhbtqh0mwg
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct B200x1 FP8 Throughput
          ngcMetadata:
            8b87146e39b0305ae1d73bc053564d1b4b4c565f81aa5abe3e84385544ca9b60:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 2GB
        - profileId: nim/meta/llama-3.2-1b-instruct:b200x1-throughput-bf16-mficrdoqfw
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct B200x1 BF16 Throughput
          ngcMetadata:
            a4c63a91bccf635b570ddb6d14eeb6e7d0acb2389712892b08d21fad2ceaee38:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
        - profileId: nim/meta/llama-3.2-1b-instruct:l40sx1-throughput-bf16-vpk2vn8hrg
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct L40Sx1 BF16 Throughput
          ngcMetadata:
            ac5071bbd91efcc71dc486fcd5210779570868b3b8328b4abf7a408a58b5e57c:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
        - profileId: nim/meta/llama-3.2-1b-instruct:l40sx1-throughput-fp8-dngq15ocbg
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct L40Sx1 FP8 Throughput
          ngcMetadata:
            ad17776f4619854fccd50354f31132a558a1ca619930698fd184d6ccf5fe3c99:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 2GB
        - profileId: nim/meta/llama-3.2-1b-instruct:h200x1-throughput-fp8-ajzq-idegq
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct H200x1 FP8 Throughput
          ngcMetadata:
            af876a179190d1832143f8b4f4a71f640f3df07b0503259cedee3e3a8363aa96:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 2GB
        - profileId: nim/meta/llama-3.2-1b-instruct:hf-e9f8eff-nim1.5+
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct A100_SXM4_40GBx1 BF16 Throughput
          ngcMetadata:
            c6821c013c559912c37e61d7b954c5ca8fe07dda76d8bea0f4a52320e0a54427:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A100_SXM4_40GB
                gpu_device: 20b0:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100_SXM4_40GB
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 20B0:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
        - profileId: nim/meta/llama-3.2-1b-instruct:h100x1-throughput-bf16-ns5-dcqtwq
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct H100x1 BF16 Throughput
          ngcMetadata:
            e7dbd9a8ce6270d2ec649a0fecbcae9b5336566113525f20aee3809ba5e63856:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
        - profileId: nim/meta/llama-3.2-1b-instruct:hf-e9f8eff-nim1.5+
          framework: TensorRT-LLM
          displayName: Llama 3.2 1B Instruct BF16
          ngcMetadata:
            ac34857f8dcbd174ad524974248f2faf271bd2a0355643b2cf1490d0fe7787c2:
              model: meta/llama-3.2-1b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '1'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 3GB
    - variantId: Llama 3.2 3B Instruct
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/llama-3_2-3b-instruct
      optimizationProfiles:
        - profileId: nim/meta/llama-3.2-3b-instruct:a100x1-throughput-fp16-6m5dnxceua
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct A100x1 FP16 Throughput
          ngcMetadata:
            34e8e5d0b7ac366e5247473de1ac4a6620fa6000e13f1a256735129d0db23761:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 20B2:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/meta/llama-3.2-3b-instruct:hf-392a143-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct H100_NVLx1 FP16 Throughput
          ngcMetadata:
            3c8257b0990b7e2a5c6a2ba21a4e8e4dac6bf6e43320f41281a0b5f6d6c0228d:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 6GB
        - profileId: nim/meta/llama-3.2-3b-instruct:hf-392a143-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct H100_NVLx1 FP8 Throughput
          ngcMetadata:
            5811750e70b7e9f340f4d670c72fcbd5282e254aeb31f62fd4f937cfb9361007:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100_NVL
                gpu_device: 2321:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100_NVL
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2321:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 6GB
        - profileId: nim/meta/llama-3.2-3b-instruct:h200x1-throughput-fp16-ylknhtupda
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct H200x1 FP16 Throughput
          ngcMetadata:
            5fbbb278b341858164183716ee8e04eb41e0d5283d6faedfba8aed4180535b53:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/meta/llama-3.2-3b-instruct:h100x1-throughput-fp8-cehb8f0o2q
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct H100x1 FP8 Throughput
          ngcMetadata:
            7b508014e846234db3cabe5c9f38568b4ee96694b60600a0b71c621dc70cacf3:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 5GB
        - profileId: nim/meta/llama-3.2-3b-instruct:b200x1-throughput-fp8-ecjbunkvug
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct B200x1 FP8 Throughput
          ngcMetadata:
            8b87146e39b0305ae1d73bc053564d1b4b4c565f81aa5abe3e84385544ca9b60:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 5GB
        - profileId: nim/meta/llama-3.2-3b-instruct:hf-392a143-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct A100_SXM4_40GBx1 FP16 Throughput
          ngcMetadata:
            8d4a2ffc83d859ba5a1c31912cb3e555f7c994111987b3e1101baae915371bf1:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A100_SXM4_40GB
                gpu_device: 20b0:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100_SXM4_40GB
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 20B0:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 6GB
        - profileId: nim/meta/llama-3.2-3b-instruct:a10gx1-throughput-fp16-ygf1h6yobw
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct A10Gx1 FP16 Throughput
          ngcMetadata:
            8ffb5cb4d82407de65b02eb9749fd1fa84084137e05593706f33466259df9f6b:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2237:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/meta/llama-3.2-3b-instruct:b200x1-throughput-bf16-qk5bstzzsa
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct B200x1 BF16 Throughput
          ngcMetadata:
            a4c63a91bccf635b570ddb6d14eeb6e7d0acb2389712892b08d21fad2ceaee38:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: B200
                gpu_device: 2901:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2901:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/meta/llama-3.2-3b-instruct:l40sx1-throughput-fp8-8ujyylyqww
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct L40Sx1 FP8 Throughput
          ngcMetadata:
            ad17776f4619854fccd50354f31132a558a1ca619930698fd184d6ccf5fe3c99:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 5GB
        - profileId: nim/meta/llama-3.2-3b-instruct:h200x1-throughput-fp8-nj2npsfa-w
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct H200x1 FP8 Throughput
          ngcMetadata:
            af876a179190d1832143f8b4f4a71f640f3df07b0503259cedee3e3a8363aa96:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H200
                gpu_device: 2335:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H200
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2335:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 5GB
        - profileId: nim/meta/llama-3.2-3b-instruct:h100x1-throughput-fp16-m2qe4ioxxq
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct H100x1 FP16 Throughput
          ngcMetadata:
            d6e3e406c0b0eaeb76f21ee5c5a545edfe8031d3cfa302030f690f38126b0ab8:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/meta/llama-3.2-3b-instruct:l40sx1-throughput-fp16-utfz33y-fg
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct L40Sx1 FP16 Throughput
          ngcMetadata:
            e9674f56ec90347526f8edf1cf407cce1441972ef277dee7a15f5c768112d2bd:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: L40S
                gpu_device: 26b9:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26B9:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/meta/llama-3.2-3b-instruct:hf-392a143-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct GH200_480GBx1 FP8 Throughput
          ngcMetadata:
            f49b49f3d90159a594def51efd8595f1d618e288bca2721fe08e786a1ac67d04:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: GH200_480GB
                gpu_device: 2342:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: FP8
            - key: GPU
              value: GH200_480GB
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2342:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 6GB
        - profileId: nim/meta/llama-3.2-3b-instruct:hf-392a143-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct GH200_480GBx1 BF16 Throughput
          ngcMetadata:
            f7f74ecd523cd63065a50016a8786a893b9b1efe0d313bc5bcc54682f56e55fe:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                gpu: GH200_480GB
                gpu_device: 2342:10de
                llm_engine: tensorrt_llm
                number_of_gpus: '1'
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: THROUGHPUT
            - key: PRECISION
              value: BF16
            - key: GPU
              value: GH200_480GB
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2342:10DE
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 6GB
        - profileId: nim/meta/llama-3.2-3b-instruct:hf-392a143-0508-tool-use-v2
          framework: TensorRT-LLM
          displayName: Llama 3.2 3B Instruct BF16
          ngcMetadata:
            ac34857f8dcbd174ad524974248f2faf271bd2a0355643b2cf1490d0fe7787c2:
              model: meta/llama-3.2-3b-instruct
              release: 1.8.6
              tags:
                feat_lora: 'false'
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                tp: '1'
                trtllm_buildable: 'true'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: BF16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.6
            - key: DOWNLOAD SIZE
              value: 6GB
  labels:
    - Llama
    - Meta
    - Multilingual Large Language Model
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: NeMo Retriever-Parse
  displayName: NeMo Retriever-Parse
  modelHubID: nemoretriever-parse
  category: Text Extraction
  type: NGC
  description: Nemoretriever-parse is a general purpose text-extraction model, specifically designed to handle documents. Given an image, nemoretriever-parse is able to extract formatted-text, with bounding-boxes and the corresponding semantic class. This has downstream benefits for several tasks such as increasing the availability of training-data for Large Language Models (LLMs), improving the accuracy of retriever systems, and enhancing document understanding pipelines.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://llama.meta.com/llama3/use-policy/
    - label: License Agreement
      url: https://llama.meta.com/llama3/license/
  modelVariants:
    - variantId: nemoretriever-parse:1.2.0
      source:
        URL: https://build.nvidia.com/nvidia/nemoretriever-parse
      optimizationProfiles:
        - profileId: nim/nvidia/nemoretriever-parse:a100x1-throughput-bf16-e9wjao-enw
          framework: TensorRT-LLM
          displayName: nemoretriever-parse A100 BF16 Throughput
          ngcMetadata:
            19c68819d9428cfa494e977f4d2be6378215a8f610cce9bdfc0aa3cdd7d66aa9:
              model: nvidia/nemoretriever-parse
              release: 1.2.0
              tags:
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                pp: '1'
                profile: throughput
                precision: bf16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 20b2:10de
            - key: NIM VERSION
              value: 1.2.0
            - key: DOWNLOAD SIZE
              value: 600MB
        - profileId: nim/nvidia/nemoretriever-parse:h100x1-throughput-bf16-2apiazbpma
          framework: TensorRT-LLM
          displayName: nemoretriever-parse H100 BF16 Throughput
          ngcMetadata:
            8db6dcd816ca1ce8d07e72d8b9c4682120b3c50799422361e35b4ab87820efd6:
              model: nvidia/nemoretriever-parse
              release: 1.2.0
              tags:
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                profile: throughput
                precision: bf16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.2.0
            - key: DOWNLOAD SIZE
              value: 600MB
        - profileId: nim/nvidia/nemoretriever-parse:l40sx1-throughput-bf16-r98ogb1a1a
          framework: TensorRT-LLM
          displayName: nemoretriever-parse L40S BF16 Throughput
          ngcMetadata:
            00c8a43783e7acf3d59a0d773cd78d3d29eaa71fa4412af7af2fbaf20e196a8b:
              model: nvidia/nemoretriever-parse
              release: 1.2.0
              tags:
                gpu: L40S
                gpu_device: 26b5:10de
                llm_engine: tensorrt_llm
                pp: '1'
                profile: throughput
                precision: bf16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 26b5:10de
            - key: NIM VERSION
              value: 1.2.0
            - key: DOWNLOAD SIZE
              value: 600MB
  labels:
    - NeMo
    - Text Extraction
    - Large Language Model
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: Nemoretriever Graphic Elements V1
  displayName: Nemoretriever Graphic Elements V1
  modelHubID: nemoretriever-graphic-elements-v1
  category: Object Detection
  type: NGC
  description: NVIDIA NeMo Retriever NIM for graphic elements v1 is a fine-tuned object detection model, trained specifically for detecting the elements of charts and tables in documents
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-community-models-license/
    - label: License Agreement
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/
  modelVariants:
    - variantId: Nemoretriever Graphic Elements V1
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers/nemoretriever-graphic-elements-v1
      optimizationProfiles:
        - profileId: nim/nvidia/nemoretriever-graphic-elements-v1:a100x1-trt-fp16-7lfmwoem-q
          framework: TensorRT-LLM
          displayName: Nemoretriever Graphic Elements V1 A100-SXM4-80GBx1 FP16
          ngcMetadata:
            26c97e9919dae5405145446f00e0189f615ce682526ec9b6da88b5138ff8097d:
              model: nvidia/nemoretriever-graphic-elements-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A100-SXM4-80GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100-SXM4-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-graphic-elements-v1:h100-nvlx1-trt-fp16-pqwy1dxfxg
          framework: TensorRT-LLM
          displayName: Nemoretriever Graphic Elements V1 H100-NVLx1 FP16
          ngcMetadata:
            2e7da4417b1c44978e00f67823ae7ec885edb310e34c2c18464fbd6cad345065:
              model: nvidia/nemoretriever-graphic-elements-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: H100-NVL
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100-NVL
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-graphic-elements-v1:h100x1-trt-fp16-tcgzpfpbwq
          framework: TensorRT-LLM
          displayName: Nemoretriever Graphic Elements V1 H100-HBM3-80GBx1 FP16
          ngcMetadata:
            6097695b532c9abe549de9918de6b4702eda625f27b508acd7b7dcc04f38ebe1:
              model: nvidia/nemoretriever-graphic-elements-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: H100-HBM3-80GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100-HBM3-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-graphic-elements-v1:a10gx1-trt-fp16-ge6bpqew8g
          framework: TensorRT-LLM
          displayName: Nemoretriever Graphic Elements V1 A10Gx1 FP16
          ngcMetadata:
            859ace730c899fb7b8362fe773639da57544f87584f9ed138089e85665653972:
              model: nvidia/nemoretriever-graphic-elements-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A10G
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-graphic-elements-v1:l40sx1-trt-fp16-ltjamioamw
          framework: TensorRT-LLM
          displayName: Nemoretriever Graphic Elements V1 L40Sx1 FP16
          ngcMetadata:
            90699b066c264c9533628aeb4f1814ef51e0f2f021540e3ae77181f2ef9ce9ed:
              model: nvidia/nemoretriever-graphic-elements-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: L40S
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-graphic-elements-v1:b200x1-trt-fp16-4kosyy1fig
          framework: TensorRT-LLM
          displayName: Nemoretriever Graphic Elements V1 B200x1 FP16
          ngcMetadata:
            a7d7cc7f7236b793a7722a3f0777b0cf3a989cc3c2c34d3e66a392329e1530e7:
              model: nvidia/nemoretriever-graphic-elements-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: B200
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-graphic-elements-v1:l4x1-trt-fp16-lhpdyawoug
          framework: TensorRT-LLM
          displayName: Nemoretriever Graphic Elements V1 L4x1 FP16
          ngcMetadata:
            ed0b8106aedfc536be363b6f1f0901bd4cd371ef22e640ac03fa7f4e3ed71647:
              model: nvidia/nemoretriever-graphic-elements-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: L4
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L4
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-graphic-elements-v1:2_ONNX_FP16_1024
          framework: ONNX
          displayName: Nemoretriever Graphic Elements V1 ONNX FP16
          ngcMetadata:
            edc693c6fccd68d266622eace04225421e353d7ce31e3b207afc5ff35124127b:
              model: nvidia/nemoretriever-graphic-elements-v1
              release: 1.3.0
              tags:
                backend: triton
                model_type: onnx
                precision: fp16
                tp: '1'
          modelFormat: onnx
          spec:
            - key: PRECISION
              value: FP16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: ONNX
        - profileId: nim/nvidia/nemoretriever-graphic-elements-v1:a100x1-trt-fp16-7lfmwoem-q
          framework: TensorRT-LLM
          displayName: Nemoretriever Graphic Elements V1 A100-SXM4-40GBx1 FP16
          ngcMetadata:
            f93ae043aafc696a85fc58461c074397d39ec747651ca996ae470222f93b4e62:
              model: nvidia/nemoretriever-graphic-elements-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A100-SXM4-40GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100-SXM4-40GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
  labels:
    - signed images
    - NVIDIA AI Enterprise Supported
    - NVIDIA NIM
    - NSPECT-7OBP-T77C
  config:
    architectures:
      - Other
    modelType: NGC
  license: NVIDIA AI Foundation Models Community License
- name: Nemoretriever Page Elements V2
  displayName: Nemoretriever Page Elements V2
  modelHubID: nemoretriever-page-elements-v2
  category: Object Detection
  type: NGC
  description: NVIDIA NeMo Retriever NIM for page elements v2 is a fine-tuned object detection model, trained specifically for detecting charts, tables, infographics, and titles on a document page.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-community-models-license/
    - label: License Agreement
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/
  modelVariants:
    - variantId: Nemoretriever Page Elements V2
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers/nemoretriever-page-elements-v2
      optimizationProfiles:
        - profileId: nim/nvidia/nemoretriever-page-elements-v2:a100x1-trt-fp16-vwtgi2gdbg
          framework: TensorRT-LLM
          displayName: Nemoretriever Page Elements V2 A100-SXM4-80GBx1 FP16
          ngcMetadata:
            26c97e9919dae5405145446f00e0189f615ce682526ec9b6da88b5138ff8097d:
              model: nvidia/nemoretriever-page-elements-v2
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A100-SXM4-80GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100-SXM4-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-page-elements-v2:h100x1-trt-fp16-gu9grcql-w
          framework: TensorRT-LLM
          displayName: Nemoretriever Page Elements V2 H100-NVLx1 FP16
          ngcMetadata:
            2e7da4417b1c44978e00f67823ae7ec885edb310e34c2c18464fbd6cad345065:
              model: nvidia/nemoretriever-page-elements-v2
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: H100-NVL
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100-NVL
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-page-elements-v2:h100x1-trt-fp16-gu9grcql-w
          framework: TensorRT-LLM
          displayName: Nemoretriever Page Elements V2 H100-HBM3-80GBx1 FP16
          ngcMetadata:
            6097695b532c9abe549de9918de6b4702eda625f27b508acd7b7dcc04f38ebe1:
              model: nvidia/nemoretriever-page-elements-v2
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: H100-HBM3-80GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100-HBM3-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-page-elements-v2:a10gx1-trt-fp16-xrpnjisxaw
          framework: TensorRT-LLM
          displayName: Nemoretriever Page Elements V2 A10Gx1 FP16
          ngcMetadata:
            859ace730c899fb7b8362fe773639da57544f87584f9ed138089e85665653972:
              model: nvidia/nemoretriever-page-elements-v2
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A10G
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-page-elements-v2:l40sx1-trt-fp16-ckx59ghtpa
          framework: TensorRT-LLM
          displayName: Nemoretriever Page Elements V2 L40Sx1 FP16
          ngcMetadata:
            90699b066c264c9533628aeb4f1814ef51e0f2f021540e3ae77181f2ef9ce9ed:
              model: nvidia/nemoretriever-page-elements-v2
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: L40S
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-page-elements-v2:b200x1-trt-fp16-b0owuhri1a
          framework: TensorRT-LLM
          displayName: Nemoretriever Page Elements V2 B200x1 FP16
          ngcMetadata:
            a7d7cc7f7236b793a7722a3f0777b0cf3a989cc3c2c34d3e66a392329e1530e7:
              model: nvidia/nemoretriever-page-elements-v2
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: B200
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-page-elements-v2:l4x1-trt-fp16-z-e8x-oorg
          framework: TensorRT-LLM
          displayName: Nemoretriever Page Elements V2 L4x1 FP16
          ngcMetadata:
            ed0b8106aedfc536be363b6f1f0901bd4cd371ef22e640ac03fa7f4e3ed71647:
              model: nvidia/nemoretriever-page-elements-v2
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: L4
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L4
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-page-elements-v2:a100x1-onnx-fp16-wagmq6-x1q
          framework: ONNX
          displayName: Nemoretriever Page Elements V2 ONNX FP16
          ngcMetadata:
            edc693c6fccd68d266622eace04225421e353d7ce31e3b207afc5ff35124127b:
              model: nvidia/nemoretriever-page-elements-v2
              release: 1.3.0
              tags:
                backend: triton
                model_type: onnx
                precision: fp16
                tp: '1'
          modelFormat: onnx
          spec:
            - key: PRECISION
              value: FP16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: ONNX
        - profileId: nim/nvidia/nemoretriever-page-elements-v2:a100x1-trt-fp16-vwtgi2gdbg
          framework: TensorRT-LLM
          displayName: Nemoretriever Page Elements V2 A100-SXM4-40GBx1 FP16
          ngcMetadata:
            f93ae043aafc696a85fc58461c074397d39ec747651ca996ae470222f93b4e62:
              model: nvidia/nemoretriever-page-elements-v2
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A100-SXM4-40GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100-SXM4-40GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
  labels:
    - signed images
    - NSPECT-7OBP-T77C
    - NVIDIA AI Enterprise Supported
    - NVIDIA NIM
  config:
    architectures:
      - Other
    modelType: NIM
  license: NVIDIA AI Foundation Models Community License
- name: Nemoretriever Table Structure V1
  displayName: Nemoretriever Table Structure V1
  modelHubID: nemoretriever-table-structure-v1
  category: Object Detection
  type: NGC
  description: NVIDIA NeMo Retriever NIM for table structure v1 is a fine-tuned object detection model, trained specifically for detecting the structure of complex tables.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-community-models-license/
    - label: License Agreement
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/
  modelVariants:
    - variantId: Nemoretriever Table Structure V1
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers/nemoretriever-table-structure-v1
      optimizationProfiles:
        - profileId: nim/nvidia/nemoretriever-table-structure-v1:a100x1-trt-fp16-sdugvfnmmg
          framework: TensorRT-LLM
          displayName: Nemoretriever Table Structure V1 A100-SXM4-80GBx1 FP16
          ngcMetadata:
            26c97e9919dae5405145446f00e0189f615ce682526ec9b6da88b5138ff8097d:
              model: nvidia/nemoretriever-table-structure-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A100-SXM4-80GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100-SXM4-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-table-structure-v1:h100x1-trt-fp16-ujymwllaaq
          framework: TensorRT-LLM
          displayName: Nemoretriever Table Structure V1 H100-NVLx1 FP16
          ngcMetadata:
            2e7da4417b1c44978e00f67823ae7ec885edb310e34c2c18464fbd6cad345065:
              model: nvidia/nemoretriever-table-structure-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: H100-NVL
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100-NVL
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-table-structure-v1:h100x1-trt-fp16-ujymwllaaq
          framework: TensorRT-LLM
          displayName: Nemoretriever Table Structure V1 H100-HBM3-80GBx1 FP16
          ngcMetadata:
            6097695b532c9abe549de9918de6b4702eda625f27b508acd7b7dcc04f38ebe1:
              model: nvidia/nemoretriever-table-structure-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: H100-HBM3-80GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100-HBM3-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-table-structure-v1:a10gx1-trt-fp16-rxf-vkobqa
          framework: TensorRT-LLM
          displayName: Nemoretriever Table Structure V1 A10Gx1 FP16
          ngcMetadata:
            859ace730c899fb7b8362fe773639da57544f87584f9ed138089e85665653972:
              model: nvidia/nemoretriever-table-structure-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A10G
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-table-structure-v1:l40sx1-trt-fp16-s31zox26qg
          framework: TensorRT-LLM
          displayName: Nemoretriever Table Structure V1 L40Sx1 FP16
          ngcMetadata:
            90699b066c264c9533628aeb4f1814ef51e0f2f021540e3ae77181f2ef9ce9ed:
              model: nvidia/nemoretriever-table-structure-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: L40S
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-table-structure-v1:b200x1-trt-fp16-gqzy1y0hua
          framework: TensorRT-LLM
          displayName: Nemoretriever Table Structure V1 B200x1 FP16
          ngcMetadata:
            a7d7cc7f7236b793a7722a3f0777b0cf3a989cc3c2c34d3e66a392329e1530e7:
              model: nvidia/nemoretriever-table-structure-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: B200
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-table-structure-v1:l4x1-trt-fp16-5m0higxepq
          framework: TensorRT-LLM
          displayName: Nemoretriever Table Structure V1 L4x1 FP16
          ngcMetadata:
            ed0b8106aedfc536be363b6f1f0901bd4cd371ef22e640ac03fa7f4e3ed71647:
              model: nvidia/nemoretriever-table-structure-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: L4
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L4
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/nemoretriever-table-structure-v1:a100x1-onnx-fp16-l8hnwsbr3g
          framework: ONNX
          displayName: Nemoretriever Table Structure V1 ONNX FP16
          ngcMetadata:
            edc693c6fccd68d266622eace04225421e353d7ce31e3b207afc5ff35124127b:
              model: nvidia/nemoretriever-table-structure-v1
              release: 1.3.0
              tags:
                backend: triton
                model_type: onnx
                precision: fp16
                tp: '1'
          modelFormat: onnx
          spec:
            - key: PRECISION
              value: FP16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: ONNX
        - profileId: nim/nvidia/nemoretriever-table-structure-v1:a100x1-trt-fp16-sdugvfnmmg
          framework: TensorRT-LLM
          displayName: Nemoretriever Table Structure V1 A100-SXM4-40GBx1 FP16
          ngcMetadata:
            f93ae043aafc696a85fc58461c074397d39ec747651ca996ae470222f93b4e62:
              model: nvidia/nemoretriever-table-structure-v1
              release: 1.3.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A100-SXM4-40GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100-SXM4-40GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
  labels:
    - signed images
    - NSPECT-7OBP-T77C
    - NVIDIA AI Enterprise Supported
    - NVIDIA NIM
  config:
    architectures:
      - Other
    modelType: NIM
  license: NVIDIA AI Foundation Models Community License
- name: PaddleOCR
  displayName: PaddleOCR
  modelHubID: paddleocr
  category: Optical Character Recognition
  type: NGC
  description: PaddleOCR is an ultra lightweight Optical Character Recognition (OCR) system by Baidu. PaddleOCR supports a variety of cutting-edge algorithms related to OCR.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-community-models-license/
    - label: License Agreement
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/
  modelVariants:
    - variantId: PaddleOCR
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/baidu/containers/paddleocr
      optimizationProfiles:
        - profileId: nim/baidu/paddleocr:2_TRT_python_2
          framework: TensorRT-LLM
          displayName: Paddleocr A100-SXM4-80GBx1 FP16
          ngcMetadata:
            26c97e9919dae5405145446f00e0189f615ce682526ec9b6da88b5138ff8097d:
              model: baidu/paddleocr
              release: 1.4.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A100-SXM4-80GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100-SXM4-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.4.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/baidu/paddleocr:2_TRT_python_2
          framework: TensorRT-LLM
          displayName: Paddleocr H100-NVLx1 FP16
          ngcMetadata:
            2e7da4417b1c44978e00f67823ae7ec885edb310e34c2c18464fbd6cad345065:
              model: baidu/paddleocr
              release: 1.4.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: H100-NVL
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100-NVL
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.4.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/baidu/paddleocr:2_TRT_python_2
          framework: TensorRT-LLM
          displayName: Paddleocr H100-HBM3-80GBx1 FP16
          ngcMetadata:
            6097695b532c9abe549de9918de6b4702eda625f27b508acd7b7dcc04f38ebe1:
              model: baidu/paddleocr
              release: 1.4.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: H100-HBM3-80GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100-HBM3-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.4.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/baidu/paddleocr:a10gx1-trt-fp16-ijpjeptpna
          framework: TensorRT-LLM
          displayName: Paddleocr A10Gx1 FP16
          ngcMetadata:
            859ace730c899fb7b8362fe773639da57544f87584f9ed138089e85665653972:
              model: baidu/paddleocr
              release: 1.4.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A10G
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.4.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/baidu/paddleocr:l40sx1-trt-fp16-evboykuf0g
          framework: TensorRT-LLM
          displayName: Paddleocr L40Sx1 FP16
          ngcMetadata:
            90699b066c264c9533628aeb4f1814ef51e0f2f021540e3ae77181f2ef9ce9ed:
              model: baidu/paddleocr
              release: 1.4.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: L40S
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.4.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/baidu/paddleocr:b200x1-trt-fp16-itomrwaucq
          framework: TensorRT-LLM
          displayName: Paddleocr B200x1 FP16
          ngcMetadata:
            a7d7cc7f7236b793a7722a3f0777b0cf3a989cc3c2c34d3e66a392329e1530e7:
              model: baidu/paddleocr
              release: 1.4.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: B200
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.4.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/baidu/paddleocr:l4x1-trt-fp16-fswetrkejq
          framework: TensorRT-LLM
          displayName: Paddleocr L4x1 FP16
          ngcMetadata:
            ed0b8106aedfc536be363b6f1f0901bd4cd371ef22e640ac03fa7f4e3ed71647:
              model: baidu/paddleocr
              release: 1.4.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: L4
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L4
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.4.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/baidu/paddleocr:4_ONNX_python_2
          framework: ONNX
          displayName: Paddleocr ONNX FP16
          ngcMetadata:
            edc693c6fccd68d266622eace04225421e353d7ce31e3b207afc5ff35124127b:
              model: baidu/paddleocr
              release: 1.4.0
              tags:
                backend: triton
                model_type: onnx
                precision: fp16
                tp: '1'
          modelFormat: onnx
          spec:
            - key: PRECISION
              value: FP16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.4.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: ONNX
        - profileId: nim/baidu/paddleocr:2_TRT_python_2
          framework: TensorRT-LLM
          displayName: Paddleocr A100-SXM4-40GBx1 FP16
          ngcMetadata:
            f93ae043aafc696a85fc58461c074397d39ec747651ca996ae470222f93b4e62:
              model: baidu/paddleocr
              release: 1.4.0
              tags:
                backend: triton
                batch_size: '32'
                gpu: A100-SXM4-40GB
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100-SXM4-40GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.4.0
            - key: DOWNLOAD SIZE
              value: 1GB
            - key: BACKEND
              value: TRITON
            - key: MODEL TYPE
              value: TENSORRT
  labels:
    - signed images
    - NSPECT-LDAL-INWI
    - NVIDIA AI Enterprise Supported
    - NVIDIA NIM
  config:
    architectures:
      - Other
    modelType: NIM
  license: NVIDIA AI Foundation Models Community License
- name: Llama 3.2 NV EmbedQA 1b V2
  displayName: Llama 3.2 NV EmbedQA 1b V2
  modelHubID: nvidia/llama-3.2-nv-embedqa-1b-v2
  category: Text Embedding
  type: NGC
  description: The NVIDIA Retrieval QA Llama3.2 1b Embedding NIM is an embedding NIM optimized for multilingual and crosslingual text question-answering retrieval.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://llama.meta.com/llama3/use-policy/
    - label: License Agreement
      url: https://llama.meta.com/llama3/license/
  modelVariants:
    - variantId: Llama 3.2 NV EmbedQA 1b V2
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers/llama-3.2-nv-embedqa-1b-v2
      optimizationProfiles:
        - profileId: nim/nvidia/llama-3.2-nv-embedqa-1b-v2:onnx-precision.fp16-7c7a1c17
          framework: ONNX
          displayName: Llama 3.2 NV Embedqa 1B V2 ONNX FP16
          ngcMetadata:
            f7391ddbcb95b2406853526b8e489fedf20083a2420563ca3e65358ff417b10f:
              model: nvidia/llama-3.2-nv-embedqa-1b-v2
              release: 1.10.0
              tags:
                backend: onnx
                model_type: onnx
                precision: fp16
                tp: '1'
          modelFormat: onnx
          spec:
            - key: PRECISION
              value: FP16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.10.0
            - key: DOWNLOAD SIZE
              value: 3GB
            - key: BACKEND
              value: ONNX
            - key: MODEL TYPE
              value: ONNX
            - key: MAX TOKENS
              value: 8192
            - key: TOTAL PARAMETERS
              value: 1236
            - key: Embedding Dimension
              value: 2048
  labels:
    - Llama
    - Meta
    - Chat
    - Large Language Model
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: Llama 3.2 NV RerankQA 1b V2
  displayName: Llama 3.2 NV RerankQA 1b V2
  modelHubID: nvidia/llama-3.2-nv-rerankqa-1b-v2
  category: Text Embedding
  type: NGC
  description: The NVIDIA Retrieval QA Llama 1B Reranking NIM is a NIM optimized for providing a logit score that represents how relevant a document(s) is to a given query, fine-tuned for multilingual and cross-lingual text question-answering retrieval.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://llama.meta.com/llama3/use-policy/
    - label: License Agreement
      url: https://llama.meta.com/llama3/license/
  modelVariants:
    - variantId: Llama 3.2 NV RerankQA 1b V2
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers/llama-3.2-nv-rerankqa-1b-v2
      optimizationProfiles:
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:h100x1-trt-fp16--ckqlv3j2g
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA H100 NVLx1 FP16
          ngcMetadata:
            3b1e767e41d02ed0ffa5aa6b46a2edfdd1540edaec2eeda4c00278c838bba38b:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 2321:10de
                gpu: NVIDIA H100 NVL
                gpu_key: h100-nvl
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100-NVL
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 5GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:a100x1-trt-fp16-dxtbz8wstg
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA A100-SXM4-40GBx1 FP16
          ngcMetadata:
            477500a740ea33ea1419289866bbfd598ce51a806fe034b48dc176db32155f59:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 20b0:10de
                gpu: NVIDIA A100-SXM4-40GB
                gpu_key: a100-sxm4-40gb
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100-SXM4-40GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 3GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:l40sx1-trt-fp16-20qsn53gag
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA L40Sx1 FP16
          ngcMetadata:
            49d14b4eaebc6b1f61e48afb3d88535f4ad3758ea55036f5ab3815d1c5a927fc:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 26b9:10de
                gpu: NVIDIA L40S
                gpu_key: l40s
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 3GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:a100x1-trt-fp16-dxtbz8wstg
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA A100-SXM4-80GBx1 FP16
          ngcMetadata:
            4ea4624dcc114adeeb29272322897800cddf5dfa873dac467f67d827b7dd9c4d:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 20b2:10de
                gpu: NVIDIA A100-SXM4-80GB
                gpu_key: a100-sxm4-80gb
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A100-SXM4-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 3GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:l40sx1-trt-fp8-4nwnajwq4g
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA L40Sx1 FP8
          ngcMetadata:
            5036ebf412fba4e54511ab4b3822ec7dfb9fd2c256c3100ad2ed9d2b4bda9f79:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 26b9:10de
                gpu: NVIDIA L40S
                gpu_key: l40s
                model_type: tensorrt
                precision: fp8
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 2GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:a10gx1-trt-fp16-fxo3knzn8w
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA A10Gx1 FP16
          ngcMetadata:
            6f21ae4169cfe3c03cc92eb194713f5a3044ac2f61526edf632d0f9a5155b538:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 2237:10de
                gpu: NVIDIA A10G
                gpu_key: a10g
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 3GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:b200x1-trt-fp16-jiw0-uharg
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA B200x1 FP16
          ngcMetadata:
            75b659320dada86548fb6af5d3adfe386df6c515969d71db4e76cd64375777e1:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 2901:10de
                gpu: NVIDIA B200
                gpu_key: b200
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 4GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:h100x1-trt-fp8-bm87q6egvq
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA H100 80GB HBM3x1 FP8
          ngcMetadata:
            774e4d699d318f41630b51b4280cadecb184b9b2755b707aa74232f1ea642b2c:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 2330:10de
                gpu: NVIDIA H100 80GB HBM3
                gpu_key: h100-hbm3-80gb
                model_type: tensorrt
                precision: fp8
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100-HBM3-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 2GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:l4x1-trt-fp16-bajefiwkra
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA L4x1 FP16
          ngcMetadata:
            9278eac727396c9f6ab9b3d421748889b0686afd20a9cef12d1d16c39fcd6a9d:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 27b8:10de
                gpu: NVIDIA L4
                gpu_key: l4
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: L4
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 3GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:l4x1-trt-fp8-vk0qdpls2w
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA L4x1 FP8
          ngcMetadata:
            a344745c8dbe62413a4e95b4e5718a689c155dfb8743868fb5d13956a621b31e:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 27b8:10de
                gpu: NVIDIA L4
                gpu_key: l4
                model_type: tensorrt
                precision: fp8
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP8
            - key: GPU
              value: L4
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 2GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:h100x1-trt-fp8-bm87q6egvq
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA H100 NVLx1 FP8
          ngcMetadata:
            b469c56c1a9ac1001151765527d3c7de77f590255b08eea4aa064ee1abf0ef3f:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 2321:10de
                gpu: NVIDIA H100 NVL
                gpu_key: h100-nvl
                model_type: tensorrt
                precision: fp8
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100-NVL
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 2GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:h100x1-trt-fp16--ckqlv3j2g
          framework: TensorRT-LLM
          displayName: Llama 3.2 NV Rerankqa 1B V2 NVIDIA H100 80GB HBM3x1 FP16
          ngcMetadata:
            ddd9c5d1430631c0bd75c04b0c18e9b620219ad82c808a30d019be9cbcd618bd:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: tensorrt
                device_id: 2330:10de
                gpu: NVIDIA H100 80GB HBM3
                gpu_key: h100-hbm3-80gb
                model_type: tensorrt
                precision: fp16
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: PRECISION
              value: FP16
            - key: GPU
              value: H100-HBM3-80GB
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 5GB
            - key: BACKEND
              value: TENSORRT
            - key: MODEL TYPE
              value: TENSORRT
        - profileId: nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:onnx-precision.fp16-d03bf375
          framework: ONNX
          displayName: Llama 3.2 NV Rerankqa 1B V2 ONNX FP16
          ngcMetadata:
            f7391ddbcb95b2406853526b8e489fedf20083a2420563ca3e65358ff417b10f:
              model: nvidia/llama-3.2-nv-rerankqa-1b-v2
              release: 1.8.0
              tags:
                backend: onnx
                model_type: onnx
                precision: fp16
                tp: '1'
          modelFormat: onnx
          spec:
            - key: PRECISION
              value: FP16
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.8.0
            - key: DOWNLOAD SIZE
              value: 3GB
            - key: BACKEND
              value: ONNX
            - key: MODEL TYPE
              value: ONNX
  labels:
    - Llama
    - Meta
    - Chat
    - NIM
    - Large Language Model
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: Llama 3.2 Vision Instruct
  displayName: Llama 3.2 Vision Instruct
  modelHubID: llama-3.2-vision-instruct
  category: Image to Text Generation
  type: NGC
  description: The Llama 3.2 Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://llama.meta.com/llama3/use-policy/
    - label: License Agreement
      url: https://llama.meta.com/llama3/license/
  modelVariants:
    - variantId: Llama 3.2 11B  Vision Instruct
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/meta/containers/llama-3.2-11b-vision-instruct
      optimizationProfiles:
        - profileId: nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x2-bf16-latency.0.3.20143152
          framework: TensorRT-LLM
          displayName: Llama 3.2 11B Vision Instruct H100 BF16 Latency
          sha: 126d5a664ba4b6b4557d5e0225b51a5e2ffbf9e9909bfe25ed203bec421ea2e5
          ngcMetadata:
            126d5a664ba4b6b4557d5e0225b51a5e2ffbf9e9909bfe25ed203bec421ea2e5:
              model: meta/llama-3.2-11b-vision-instruct
              release: 1.1.1
              tags:
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x2-bf16-latency.0.3.20143152
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 20GB
        - profileId: nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-a10gx4-bf16-throughput.0.3.20143152
          framework: TensorRT-LLM
          displayName: Llama 3.2 11B Vision Instruct A10G BF16 Throughput
          sha: 417611b3f9e2c25db671083acfcfd4c2340f511f3533838fc6366bb47960915c
          ngcMetadata:
            417611b3f9e2c25db671083acfcfd4c2340f511f3533838fc6366bb47960915c:
              model: meta/llama-3.2-11b-vision-instruct
              release: 1.1.1
              tags:
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'trtllm_engine/rank2.engine'
                        - !name 'trtllm_engine/rank3.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-a10gx4-bf16-throughput.0.3.20143152
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2237:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 20GB
        - profileId: nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-a10gx8-bf16-latency.0.3.20143152
          framework: TensorRT-LLM
          displayName: Llama 3.2 11B Vision Instruct A10G BF16 Latency
          sha: 5a9f2d4459908cf6c5b5222e31b8df053c00354b5866f6ee3b8de7552a695524
          ngcMetadata:
            5a9f2d4459908cf6c5b5222e31b8df053c00354b5866f6ee3b8de7552a695524:
              model: meta/llama-3.2-11b-vision-instruct
              release: 1.1.1
              tags:
                gpu: A10G
                gpu_device: 2237:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'trtllm_engine/rank2.engine'
                        - !name 'trtllm_engine/rank3.engine'
                        - !name 'trtllm_engine/rank4.engine'
                        - !name 'trtllm_engine/rank5.engine'
                        - !name 'trtllm_engine/rank6.engine'
                        - !name 'trtllm_engine/rank7.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-a10gx8-bf16-latency.0.3.20143152
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A10G
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2237:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 20GB
        - profileId: nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x2-fp8-latency.0.3.20143152
          framework: TensorRT-LLM
          displayName: Llama 3.2 11B Vision Instruct H100 FP8 Latency
          sha: ab89f816413848c86e311123d2ed98af7bcda0c3624b0a6c4d43704b720585d5
          ngcMetadata:
            ab89f816413848c86e311123d2ed98af7bcda0c3624b0a6c4d43704b720585d5:
              model: meta/llama-3.2-11b-vision-instruct
              release: 1.1.1
              tags:
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp8
                profile: latency
                tp: '2'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x2-fp8-latency.0.3.20143152
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 12GB
        - profileId: nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-a100x2-bf16-latency.0.3.20143152
          framework: TensorRT-LLM
          displayName: Llama 3.2 11B Vision Instruct A100 BF16 Latency
          sha: ad16e693a234cf8eee85c43dd66ab4502c51ab0bc553af1644477a4f966bf5c6
          ngcMetadata:
            ad16e693a234cf8eee85c43dd66ab4502c51ab0bc553af1644477a4f966bf5c6:
              model: meta/llama-3.2-11b-vision-instruct
              release: 1.1.1
              tags:
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: latency
                tp: '2'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-a100x2-bf16-latency.0.3.20143152
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 20b2:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 20GB
        - profileId: nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-l40sx2-bf16-throughput.0.3.20143152
          framework: TensorRT-LLM
          displayName: Llama 3.2 11B Vision Instruct L40S BF16 Throughput
          sha: b16d5969212a8cea632fd6f70928ab514aab835cf217281899564933e6cafa5b
          ngcMetadata:
            b16d5969212a8cea632fd6f70928ab514aab835cf217281899564933e6cafa5b:
              model: meta/llama-3.2-11b-vision-instruct
              release: 1.1.1
              tags:
                gpu: L40S
                gpu_device: 26b5:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '2'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-l40sx2-bf16-throughput.0.3.20143152
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 26b5:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 20GB
        - profileId: nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x1-bf16-throughput.0.3.20143152
          framework: TensorRT-LLM
          displayName: Llama 3.2 11B Vision Instruct H100 BF16 Throughput
          sha: b7aa6bf9d9946de665480a5669bb73f981eab7c4fe43ddf7217b672eb11a003a
          ngcMetadata:
            b7aa6bf9d9946de665480a5669bb73f981eab7c4fe43ddf7217b672eb11a003a:
              model: meta/llama-3.2-11b-vision-instruct
              release: 1.1.1
              tags:
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x1-bf16-throughput.0.3.20143152
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 20GB
        - profileId: nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-l40sx4-bf16-latency.0.3.20143152
          framework: TensorRT-LLM
          displayName: Llama 3.2 11B Vision Instruct L40S BF16 Latency
          sha: be5af3c968ce6bc45e740edc985fa05dffd3695abb7cc5723407e1f5e3f12c70
          ngcMetadata:
            be5af3c968ce6bc45e740edc985fa05dffd3695abb7cc5723407e1f5e3f12c70:
              model: meta/llama-3.2-11b-vision-instruct
              release: 1.1.1
              tags:
                gpu: L40S
                gpu_device: 26b5:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: latency
                tp: '4'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'trtllm_engine/rank2.engine'
                        - !name 'trtllm_engine/rank3.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-l40sx4-bf16-latency.0.3.20143152
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 26b5:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 20GB
        - profileId: nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-a100x1-bf16-throughput.0.3.20143152
          framework: TensorRT-LLM
          displayName: Llama 3.2 11B Vision Instruct A100 BF16 Throughput
          sha: ee1e936b878082dee74574deae5064cc7fba3e11ba155de1198ee544d7c3468a
          ngcMetadata:
            ee1e936b878082dee74574deae5064cc7fba3e11ba155de1198ee544d7c3468a:
              model: meta/llama-3.2-11b-vision-instruct
              release: 1.1.1
              tags:
                gpu: A100
                gpu_device: 20b2:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '1'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-a100x1-bf16-throughput.0.3.20143152
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 20b2:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 20GB
        - profileId: nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x1-fp8-throughput.0.3.20143152
          framework: TensorRT-LLM
          displayName: Llama 3.2 11B Vision Instruct H100 FP8 Throughput
          sha: fa1e1cbf698be85c0cc56d707f8bc5b17044e091136dae3f8e4be694af727c87
          ngcMetadata:
            fa1e1cbf698be85c0cc56d707f8bc5b17044e091136dae3f8e4be694af727c87:
              model: meta/llama-3.2-11b-vision-instruct
              release: 1.1.1
              tags:
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '1'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x1-fp8-throughput.0.3.20143152
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:hf-cee5b78-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-11b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 12GB
    - variantId: Llama 3.2 90B  Vision Instruct
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/meta/containers/llama-3.2-90b-vision-instruct
      optimizationProfiles:
        - profileId: nim/meta/llama-3.2-90b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x4-bf16-throughput.0.3.20194742
          framework: TensorRT-LLM
          displayName: Llama 3.2 90B Vision Instruct H100 BF16 Throughput
          sha: 42c91902414bc5ea7f4ef4e9a34ef382165b8b65f9adcc5d1abaf195ade2d8fc
          ngcMetadata:
            42c91902414bc5ea7f4ef4e9a34ef382165b8b65f9adcc5d1abaf195ade2d8fc:
              model: meta/llama-3.2-90b-vision-instruct
              release: 1.1.1
              tags:
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '4'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'trtllm_engine/rank2.engine'
                        - !name 'trtllm_engine/rank3.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x4-bf16-throughput.0.3.20194742
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:hf-0a6d69b
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:hf-0a6d69b-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 166GB
        - profileId: nim/meta/llama-3.2-90b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x2-fp8-throughput.0.3.20194742
          framework: TensorRT-LLM
          displayName: Llama 3.2 90B Vision Instruct H100 FP8 Throughput
          sha: 6b24bf2e19c23b85f9d2651efdc2de08cd179a03c50f942b1dcd856fa4d4074b
          ngcMetadata:
            6b24bf2e19c23b85f9d2651efdc2de08cd179a03c50f942b1dcd856fa4d4074b:
              model: meta/llama-3.2-90b-vision-instruct
              release: 1.1.1
              tags:
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp8
                profile: throughput
                tp: '2'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x2-fp8-throughput.0.3.20194742
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:hf-0a6d69b
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:hf-0a6d69b-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 2
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 85GB
        - profileId: nim/meta/llama-3.2-90b-vision-instruct:0.15.0.dev2024102300+ea8391c56-l40sx8-bf16-throughput.0.3.1342
          framework: TensorRT-LLM
          displayName: Llama 3.2 90B Vision Instruct L40S BF16 Throughput
          sha: 7bb72cbd19b5eab69ed21b2e031e4ea18909ff034255471c25b29ab45a99cc8b
          ngcMetadata:
            7bb72cbd19b5eab69ed21b2e031e4ea18909ff034255471c25b29ab45a99cc8b:
              model: meta/llama-3.2-90b-vision-instruct
              release: 1.1.1
              tags:
                gpu: L40S
                gpu_device: 26b5:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: throughput
                tp: '8'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'trtllm_engine/rank2.engine'
                        - !name 'trtllm_engine/rank3.engine'
                        - !name 'trtllm_engine/rank4.engine'
                        - !name 'trtllm_engine/rank5.engine'
                        - !name 'trtllm_engine/rank6.engine'
                        - !name 'trtllm_engine/rank7.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:0.15.0.dev2024102300+ea8391c56-l40sx8-bf16-throughput.0.3.1342
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:hf-0a6d69b
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:hf-0a6d69b-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Throughput
            - key: PRECISION
              value: BF16
            - key: GPU
              value: L40S
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 26b5:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 166GB
        - profileId: nim/meta/llama-3.2-90b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x4-fp8-latency.0.3.20194742
          framework: TensorRT-LLM
          displayName: Llama 3.2 90B Vision Instruct H100 FP8 Latency
          sha: a6e9fde5c1edfb4ab4c0b206a536693a6f9b1f95cde1448ddd679fb880fcef71
          ngcMetadata:
            a6e9fde5c1edfb4ab4c0b206a536693a6f9b1f95cde1448ddd679fb880fcef71:
              model: meta/llama-3.2-90b-vision-instruct
              release: 1.1.1
              tags:
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: fp8
                profile: latency
                tp: '4'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'trtllm_engine/rank2.engine'
                        - !name 'trtllm_engine/rank3.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x4-fp8-latency.0.3.20194742
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:hf-0a6d69b
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:hf-0a6d69b-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: FP8
            - key: GPU
              value: H100
            - key: COUNT
              value: 4
            - key: GPU DEVICE
              value: null
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 87GB
        - profileId: nim/meta/llama-3.2-90b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x8-bf16-latency.0.3.20194742
          framework: TensorRT-LLM
          displayName: Llama 3.2 90B Vision Instruct H100 BF16 Latency
          sha: e994500d8b0e10f63a08e6a90143a60c360d004f6d5ea8bdb4d38d215eb3fa83
          ngcMetadata:
            e994500d8b0e10f63a08e6a90143a60c360d004f6d5ea8bdb4d38d215eb3fa83:
              model: meta/llama-3.2-90b-vision-instruct
              release: 1.1.1
              tags:
                gpu: H100
                gpu_device: 2330:10de
                llm_engine: tensorrt_llm
                pp: '1'
                precision: bf16
                profile: latency
                tp: '8'
              workspace: !workspace
                components:
                  - dst: ''
                    src:
                      files:
                        - !name 'LICENSE.txt'
                        - !name 'NOTICE.txt'
                        - !name 'checksums.blake3'
                        - !name 'trtllm_engine/config.json'
                        - !name 'trtllm_engine/metadata.json'
                        - !name 'trtllm_engine/rank0.engine'
                        - !name 'trtllm_engine/rank1.engine'
                        - !name 'trtllm_engine/rank2.engine'
                        - !name 'trtllm_engine/rank3.engine'
                        - !name 'trtllm_engine/rank4.engine'
                        - !name 'trtllm_engine/rank5.engine'
                        - !name 'trtllm_engine/rank6.engine'
                        - !name 'trtllm_engine/rank7.engine'
                        - !name 'visual_engine/config.json'
                        - !name 'visual_engine/metadata.json'
                        - !name 'visual_engine/visual_encoder.engine'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:0.15.0.dev2024102300+ea8391c56-h100x8-bf16-latency.0.3.20194742
                  - dst: ''
                    src:
                      files:
                        - !name 'config.json'
                        - !name 'generation_config.json'
                        - !name 'preprocessor_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:hf-0a6d69b
                  - dst: ''
                    src:
                      files:
                        - !name 'special_tokens_map.json'
                        - !name 'tokenizer.json'
                        - !name 'tokenizer_config.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:hf-0a6d69b-tok
                  - dst: visual_engine
                    src:
                      files:
                        - !name 'vision_processor.py'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:vision-processor
                  - dst: ''
                    src:
                      files:
                        - !name 'runtime_params.json'
                      repo_id: ngc://nim/meta/llama-3.2-90b-vision-instruct:runtime-params-trtllm
          modelFormat: trt-llm
          spec:
            - key: PROFILE
              value: Latency
            - key: PRECISION
              value: BF16
            - key: GPU
              value: H100
            - key: COUNT
              value: 8
            - key: GPU DEVICE
              value: 2330:10de
            - key: NIM VERSION
              value: 1.1.1
            - key: DOWNLOAD SIZE
              value: 166GB
  labels:
    - Llama
    - Meta
    - Chat
    - Large Language Model
    - TensorRT-LLM
    - Vision Instruct
    - Image to Text Generation
    - Language Generation
    - NeMo
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: Riva ASR Whisper Large v3
  displayName: Riva ASR Whisper Large v3
  modelHubID: nvidia/riva-asr/whisper
  category: Text-Prompt
  type: NGC
  description: This model is used to transcribe short-form audio files and is designed to be compatible with OpenAI's sequential long-form transcription algorithm. Whisper is a pre-trained model for automatic speech recognition (ASR) and speech translation. Trained on 680k hours of labeled data, Whisper models demonstrate a strong ability to generalize to many datasets and domains without the need for fine-tuning. Whisper-large-v3 is one of the 5 configurations of the model with 1550M parameters. This model version is optimized to run with NVIDIA TensorRT-LLM. This model is ready for commercial use.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/
    - label: License Agreement
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/
  modelVariants:
    - variantId: Riva ASR Whisper Large v3
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nvidia/teams/riva/models/whisper_large
      optimizationProfiles:
        - profileId: nim/nvidia/whisper-large-v3:ofl-rmir-25.06
          framework: TensorRT-LLM
          displayName: Riva ASR Whisper Large v3 Generic NVIDIA GPUx1
          ngcMetadata:
            5e44fa6d8cd80ad46a089089157ff4565974f0a64fd37c594265c61f00418ae0:
              model: nvidia/riva-asr/whisper
              release: 1.3.1
              tags:
                mode: ofl
                model_type: rmir
                name: whisper-large-v3
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.3.1
            - key: DOWNLOAD SIZE
              value: 3GB
            - key: MODEL TYPE
              value: RMIR
            - key: MODE
              value: OFL
        - profileId: nim/nvidia/whisper-large-v3:h100x1-ofl-25.08-fp16-mnz4pnn0pw
          framework: TensorRT-LLM
          displayName: Riva ASR Whisper Large v3 H100 FP16
          ngcMetadata:
            72232937075119887298deb92b5e58f4d98a0ce0948df60d424f0d97b05da55e:
              model: nvidia/riva-asr/whisper
              release: 1.3.1
              tags:
                gpu_device: '2330'
                mode: ofl
                model_type: prebuilt
                name: whisper-large-v3
                gpu: H100
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: GPU DEVICE
              value: 2330
            - key: NIM VERSION
              value: 1.3.1
            - key: DOWNLOAD SIZE
              value: 2GB
            - key: MODEL TYPE
              value: PREBUILT
            - key: MODE
              value: OFL
  labels:
    - Transformer
    - TensorRT-LLM
    - Audio
    - NVIDIA Validated
  config:
    architectures:
      - Other
    modelType: llama
  license: NVIDIA AI Foundation Models Community License
- name: Boltz2
  displayName: Boltz2
  modelHubID: boltz2
  category: Biology Foundation Model
  type: NGC
  description: Boltz-2 NIM is a next-generation structural biology foundation model that shows strong performance for both structure and affinity prediction.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/
    - label: License Agreement
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/
  modelVariants:
    - variantId: Boltz2
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/mit/containers/boltz2
      optimizationProfiles:
        - profileId: nim/mit/boltz2:1.1.0-combi-gpuA100_sm80_v11
          framework: TensorRT-LLM
          displayName: Boltz2 A100x1 SM80 V11
          ngcMetadata:
            9c411ee75a91c41ae5e85ae81e91ab9fcdde58477da17785d188e0de41a54a8e:
              model: mit/boltz2
              release: 1.1.0
              tags:
                gpu: A100
                sm: '80'
                v: '11'
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.1.0
            - key: DOWNLOAD SIZE
              value: 13GB
            - key: SM
              value: '80'
            - key: V
              value: '11'
        - profileId: nim/mit/boltz2:1.1.0-combi-gpuB200_sm100_v11
          framework: TensorRT-LLM
          displayName: Boltz2 B200x1 SM100 V11
          ngcMetadata:
            c9f95e6e506df04cf37ff5b7a70f2cde0eed8b61f43a9dc5c857d4d6fdbe4c78:
              model: mit/boltz2
              release: 1.1.0
              tags:
                gpu: B200
                sm: '100'
                v: '11'
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: GPU
              value: B200
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.1.0
            - key: DOWNLOAD SIZE
              value: 12GB
            - key: SM
              value: '100'
            - key: V
              value: '11'
        - profileId: nim/mit/boltz2:1.1.0-combi-gpuL40S_sm89_v11
          framework: TensorRT-LLM
          displayName: Boltz2 L40Sx1 SM89 V11
          ngcMetadata:
            f429a14a6470d83cd68dd11dc45cbd69c235636d5f82f147cad04b927e86be56:
              model: mit/boltz2
              release: 1.1.0
              tags:
                gpu: L40S
                sm: '89'
                v: '11'
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.1.0
            - key: DOWNLOAD SIZE
              value: 14GB
            - key: SM
              value: '89'
            - key: V
              value: '11'
        - profileId: nim/mit/boltz2:1.1.0-combi-gpuH100_sm90_v11
          framework: TensorRT-LLM
          displayName: Boltz2 H100x1 SM90 V11
          ngcMetadata:
            f6884d58e3b6cf070d18085a79760da0dd36669aaa52900e8f5b50eebd8f304c:
              model: mit/boltz2
              release: 1.1.0
              tags:
                gpu: H100
                sm: '90'
                v: '11'
                tp: '1'
          modelFormat: trt-llm
          spec:
            - key: GPU
              value: H100
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.1.0
            - key: DOWNLOAD SIZE
              value: 13GB
            - key: SM
              value: '90'
            - key: V
              value: '11'
  labels:
    - Biology Foundation Model
    - signed images
    - NSPECT-D4IX-8I2O
    - NVIDIA AI Enterprise Supported
    - NVIDIA NIM
  config:
    architectures:
      - Other
    modelType: NIM
  license: NVIDIA AI Foundation Models Community License
- name: GPT-OSS
  displayName: GPT-OSS
  modelHubID: gpt-oss
  category: Text Generation
  type: NGC
  description: The GPT-OSS NIM simplifies the deployment of the GPT-OSS-120B and GPT-OSS-20B tuned models which are optimized for language understanding, reasoning, and text generation use cases, and outperforms many of the available open source chat models on common industry benchmarks.
  requireLicense: true
  licenseAgreements:
    - label: Use Policy
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/
    - label: License Agreement
      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/
  modelVariants:
    - variantId: GPT-OSS 120B
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/openai/containers/gpt-oss-120b
      optimizationProfiles:
        - profileId: nim/openai/gpt-oss-120b:v1
          framework: VLLM
          displayName: GPT-OSS 120B MXFP4
          ngcMetadata:
            46b15913be22333eb518656e23ce6e4add9ef248521868cca0bb31af8c99458c:
              model: openai/gpt-oss-120b
              release: 1.12.1
              tags:
                feat_lora: 'false'
                llm_engine: vllm
                nim_workspace_hash_v1: 0d2325f68f4fecf18db654a22e7647c17ee6003b0f8c605d29675be643727b60
                pp: '1'
                precision: mxfp4
                tp: '8'
          modelFormat: vllm
          spec:
            - key: PRECISION
              value: MXFP4
            - key: COUNT
              value: 8
            - key: NIM VERSION
              value: 1.12.1
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/openai/gpt-oss-120b:v1
          framework: VLLM
          displayName: GPT-OSS 120B Generic NVIDIA GPUx2 MXFP4
          ngcMetadata:
            4a0d3557b2676290857e191d9b71eeff964f4fe10a7ddac66c66d2fc9983c399:
              model: openai/gpt-oss-120b
              release: 1.12.1
              tags:
                feat_lora: 'false'
                llm_engine: vllm
                nim_workspace_hash_v1: 0d2325f68f4fecf18db654a22e7647c17ee6003b0f8c605d29675be643727b60
                pp: '1'
                precision: mxfp4
                tp: '2'
          modelFormat: vllm
          spec:
            - key: PRECISION
              value: MXFP4
            - key: COUNT
              value: 2
            - key: NIM VERSION
              value: 1.12.1
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/openai/gpt-oss-120b:v1
          framework: VLLM
          displayName: GPT-OSS 120B Generic NVIDIA GPUx4 MXFP4
          ngcMetadata:
            531bca8ac3e457ae8a69a62780430eab361264dbd29b79f2da474084a93ee000:
              model: openai/gpt-oss-120b
              release: 1.12.1
              tags:
                feat_lora: 'false'
                llm_engine: vllm
                nim_workspace_hash_v1: 0d2325f68f4fecf18db654a22e7647c17ee6003b0f8c605d29675be643727b60
                pp: '1'
                precision: mxfp4
                tp: '4'
          modelFormat: vllm
          spec:
            - key: PRECISION
              value: MXFP4
            - key: COUNT
              value: 4
            - key: NIM VERSION
              value: 1.12.1
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/openai/gpt-oss-120b:v1
          framework: VLLM
          displayName: GPT-OSS 120B Generic NVIDIA GPUx1 MXFP4
          ngcMetadata:
            899cf2e170bb3af67ba7fef534eda3eeb8e3b69c415cd1ec4872752f6c64e6a0:
              model: openai/gpt-oss-120b
              release: 1.12.1
              tags:
                feat_lora: 'false'
                llm_engine: vllm
                nim_workspace_hash_v1: 0d2325f68f4fecf18db654a22e7647c17ee6003b0f8c605d29675be643727b60
                pp: '1'
                precision: mxfp4
                tp: '1'
          modelFormat: vllm
          spec:
            - key: PRECISION
              value: MXFP4
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.12.1
            - key: DOWNLOAD SIZE
              value: 7GB
    - variantId: GPT-OSS 20B
      source:
        URL: https://catalog.ngc.nvidia.com/orgs/nim/teams/openai/containers/gpt-oss-20b
      optimizationProfiles:
        - profileId: nim/openai/gpt-oss-20b:v1
          framework: VLLM
          displayName: GPT-OSS 20B Generic NVIDIA GPUx8 MXFP4
          ngcMetadata:
            2e2c8811172659dd321e19a927ec632eaba747f3e0017bacc8ad27109e75878a:
              model: openai/gpt-oss-20b
              release: 1.12.1
              tags:
                feat_lora: 'false'
                llm_engine: vllm
                nim_workspace_hash_v1: 1a244f5794c2c5706bd813ad4d1b003065f81939741d67ccc0bd11f9c963f5db
                pp: '1'
                precision: mxfp4
                tp: '8'
          modelFormat: vllm
          spec:
            - key: PRECISION
              value: MXFP4
            - key: COUNT
              value: 8
            - key: NIM VERSION
              value: 1.12.1
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/openai/gpt-oss-20b:v1
          framework: VLLM
          displayName: GPT-OSS 20B Generic NVIDIA GPUx1 MXFP4
          ngcMetadata:
            319230871747dcf65aac0a4af04e603297460c2eab06611b7a7e6a41ae885da8:
              model: openai/gpt-oss-20b
              release: 1.12.1
              tags:
                feat_lora: 'false'
                llm_engine: vllm
                nim_workspace_hash_v1: 1a244f5794c2c5706bd813ad4d1b003065f81939741d67ccc0bd11f9c963f5db
                pp: '1'
                precision: mxfp4
                tp: '1'
          modelFormat: vllm
          spec:
            - key: PRECISION
              value: MXFP4
            - key: COUNT
              value: 1
            - key: NIM VERSION
              value: 1.12.1
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/openai/gpt-oss-20b:v1
          framework: VLLM
          displayName: GPT-OSS 20B Generic NVIDIA GPUx4 MXFP4
          ngcMetadata:
            bfa3fc92db1f01459fe0dea1886ad7f89366a04208a24debb1ad13747fe1cb9b:
              model: openai/gpt-oss-20b
              release: 1.12.1
              tags:
                feat_lora: 'false'
                llm_engine: vllm
                nim_workspace_hash_v1: 1a244f5794c2c5706bd813ad4d1b003065f81939741d67ccc0bd11f9c963f5db
                pp: '1'
                precision: mxfp4
                tp: '4'
          modelFormat: vllm
          spec:
            - key: PRECISION
              value: MXFP4
            - key: COUNT
              value: 4
            - key: NIM VERSION
              value: 1.12.1
            - key: DOWNLOAD SIZE
              value: 7GB
        - profileId: nim/openai/gpt-oss-20b:v1
          framework: VLLM
          displayName: GPT-OSS 20B Generic NVIDIA GPUx2 MXFP4
          ngcMetadata:
            efa7e9795d36904b4466f82f6b32c28507ed700b2fef0be0b2e7c1de10a2eeb8:
              model: openai/gpt-oss-20b
              release: 1.12.1
              tags:
                feat_lora: 'false'
                llm_engine: vllm
                nim_workspace_hash_v1: 1a244f5794c2c5706bd813ad4d1b003065f81939741d67ccc0bd11f9c963f5db
                pp: '1'
                precision: mxfp4
                tp: '2'
          modelFormat: vllm
          spec:
            - key: PRECISION
              value: MXFP4
            - key: COUNT
              value: 2
            - key: NIM VERSION
              value: 1.12.1
            - key: DOWNLOAD SIZE
              value: 7GB
  labels:
    - OpenAI
    - signed images
    - NSPECT-LJGD-9W15
    - NVIDIA AI Enterprise Supported
    - NVIDIA NIM
  config:
    architectures:
      - Other
    modelType: NIM
  license: NVIDIA AI Foundation Models Community License
- name: Gemma 2
  displayName: Gemma 2
  modelHubID: gemma-2
  category: Text Generation
  type: HF
  description: Gemma 2 the second generation of the Google community Gemma lineage.  Gemma 2 is improved with higher performance with significant safety improvements and well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.
  modelVariants:
    - variantId: Gemma 2 9B
      displayName: Gemma 2 9B
      source:
        URL: https://huggingface.co/google/gemma-2-9b
      requireToken: true
      requireLicense: true
      licenseAgreements:
        - label: License Agreement
          url: https://ai.google.dev/gemma/terms
        - label: Use Policy
          url: https://ai.google.dev/gemma/prohibited_use_policy
      optimizationProfiles:
        - profileId: google/gemma-2-9b
          displayName: Gemma 2 9b A10G
          framework: vllm
          sha: vllm
          modelFormat: vllm
          spec:
            - key: GPU
              value: A10G
            - key: COUNT
              value: 1
        - profileId: google/gemma-2-9b
          displayName: Gemma 2 A100
          framework: vllm
          sha: vllm
          modelFormat: vllm
          spec:
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
        - profileId: google/gemma-2-9b
          displayName: Gemma 2 9b L40S
          framework: vllm
          sha: vllm
          modelFormat: vllm
          spec:
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
  labels:
    - google
    - Gemma
    - "Text Generation"
    - "Multilingual support"
  config:
    architectures:
      - Gemma2ForCausalLM
    modelType: Gemma2
  license: gemma
- name: Llama 3 SQLCoder
  displayName: Llama 3 SQLCoder
  modelHubID: llama-3-sqlcoder-8b
  category: Text Generation
  type: HF
  description: A capable language model for text to SQL generation for Postgres, Redshift and Snowflake that is on-par with the most capable generalist frontier models.
  modelVariants:
    - variantId: Llama 3 SQLCoder 8B
      displayName: Llama 3 SQLCoder 8B
      source:
        URL: https://huggingface.co/defog/llama-3-sqlcoder-8b
      requireToken: false
      requireLicense: false
      licenseAgreements:
        - label: License Agreement
          url: https://choosealicense.com/licenses/cc-by-sa-4.0/
      optimizationProfiles:
        - profileId: Defog/Llama-3-sqlcoder-8B
          displayName: Llama 3 SQLCoder 8B A10G
          framework: vllm
          sha: vllm
          modelFormat: vllm
          spec:
            - key: GPU
              value: A10G
            - key: COUNT
              value: 1
        - profileId: Defog/Llama-3-sqlcoder-8B
          displayName: Llama 3 SQLCoder 8B A100
          framework: vllm
          sha: vllm
          modelFormat: vllm
          spec:
            - key: GPU
              value: A100
            - key: COUNT
              value: 1
        - profileId: Defog/Llama-3-sqlcoder-8B
          displayName: Llama 3 SQLCoder 8B L40S
          framework: vllm
          sha: vllm
          modelFormat: vllm
          spec:
            - key: GPU
              value: L40S
            - key: COUNT
              value: 1
  labels:
    - Llama
    - "Text To SQL"
    - "Code Generation"
    - "Fine Tuned"
  config:
    architectures:
      - LlamaForCausalLM
    modelType: llama
  license: Creative Commons Attribution Share Alike 4.0