zabbix_export: version: '5.4' date: '2021-11-21T21:26:31Z' groups: - uuid: 7df96b18c230490a9a0a9e2307226338 name: Templates templates: - uuid: a021d16dab6741fdb0852d16df32b1f1 template: 'NVidia Sensors' name: 'NVidia Sensors' description: | ## Overview This template integrates NVidia SMI for a single graphics card with Zabbix. The template adds monitoring of: * GPU Utilisation * GPU Power Consumption * GPU Memory (Used, Free, Total) * GPU Temperature * GPU Fan Speed The following agent parameters can be used to add the metrics into Zabbix. UserParameter=gpu.temp,nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader,nounits -i 0 UserParameter=gpu.memtotal,nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits -i 0 UserParameter=gpu.used,nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i 0 UserParameter=gpu.free,nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits -i 0 UserParameter=gpu.fanspeed,nvidia-smi --query-gpu=fan.speed --format=csv,noheader,nounits -i 0 UserParameter=gpu.utilisation,nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i 0 UserParameter=gpu.power,nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits -i 0 ## Author Richard Kavanagh groups: - name: Templates items: - uuid: 1859c0a8b79346ecb9da26f04a939a55 name: 'GPU Fan Speed' key: gpu.fanspeed delay: '30' value_type: FLOAT units: '%' tags: - tag: Application value: Nvidia - uuid: 14bff6576d754b21a5acfc42b1403507 name: 'GPU Free Memory' key: gpu.free delay: '30' value_type: FLOAT units: B preprocessing: - type: MULTIPLIER parameters: - '1048576' tags: - tag: Application value: Nvidia - uuid: 11b6b2d40525492a872ab36a99b01d18 name: 'GPU Total Memory' key: gpu.memtotal delay: '30' value_type: FLOAT units: B preprocessing: - type: MULTIPLIER parameters: - '1048576' tags: - tag: Application value: Nvidia - uuid: 58a0c7ab4a9a4760b3a3fd5825969fd7 name: 'GPU Power' key: gpu.power delay: '30' value_type: FLOAT units: W tags: - tag: Application value: Nvidia - uuid: ceea4ab8fbda48cfaad3590d4b19e115 name: 'GPU Temperature' key: gpu.temp delay: '30' value_type: FLOAT units: C tags: - tag: Application value: Nvidia triggers: - uuid: d18b5023d7d14b6490406f5b402197c4 expression: 'last(/NVidia Sensors/gpu.temp,#2)>95' name: 'GPU Temperature over 95c {HOSTNAME}' priority: AVERAGE - uuid: 4544799689e44836b975d433ec3c5ef4 name: 'GPU Used Memory' key: gpu.used delay: '30' value_type: FLOAT units: B preprocessing: - type: MULTIPLIER parameters: - '1048576' tags: - tag: Application value: Nvidia - uuid: 1ae3048cdefd47839d1f76674777f280 name: 'GPU Utilisation' key: gpu.utilisation delay: '30' value_type: FLOAT units: '%' tags: - tag: Application value: Nvidia graphs: - uuid: 1fdc4e107c4d4707b193443c6e16ea68 name: 'GPU Memory' graph_items: - color: C80000 item: host: 'NVidia Sensors' key: gpu.free - sortorder: '1' color: 00C800 item: host: 'NVidia Sensors' key: gpu.memtotal - sortorder: '2' color: 0000C8 item: host: 'NVidia Sensors' key: gpu.used - uuid: 387c249ac6834aa0b9edcf3bb70daa16 name: 'GPU Power' graph_items: - color: C80000 item: host: 'NVidia Sensors' key: gpu.power - uuid: 0d8822621c85420fb6ff847e92312c4f name: 'GPU Temperature' graph_items: - color: C80000 item: host: 'NVidia Sensors' key: gpu.temp - sortorder: '1' color: 0000EE yaxisside: RIGHT item: host: 'NVidia Sensors' key: gpu.fanspeed - uuid: 82e6484d122e4e63a2b6cd6c8345b42d name: 'GPU Utilisation' graph_items: - color: C80000 item: host: 'NVidia Sensors' key: gpu.utilisation - sortorder: '1' color: 33FF33 yaxisside: RIGHT item: host: 'NVidia Sensors' key: gpu.power