{ "__inputs": [ { "name": "DS_PROMETHEUS", "label": "Prometheus", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" }, { "name": "DS_EXPRESSION", "label": "Expression", "description": "", "type": "datasource", "pluginId": "__expr__" } ], "__elements": {}, "__requires": [ { "type": "datasource", "id": "__expr__", "version": "1.0.0" }, { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "11.2.2" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" }, { "type": "panel", "id": "stat", "name": "Stat", "version": "" }, { "type": "panel", "id": "table", "name": "Table", "version": "" }, { "type": "panel", "id": "text", "name": "Text", "version": "" }, { "type": "panel", "id": "timeseries", "name": "Time series", "version": "" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": null, "links": [], "panels": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Number of GPUs on the compute node", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "links": [ { "targetBlank": true, "title": "Go to GPU Dashboard", "url": "/d/ae0aj8euc43r4b/gpu" } ], "mappings": [], "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] } }, "overrides": [] }, "gridPos": { "h": 3, "w": 2, "x": 0, "y": 0 }, "id": 1, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(group by(gpu_id) (${g_metrics_prefix}gpu_gfx_activity{hostname=\"$g_hostname\", card_model=~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"}))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "# of GPUs", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(group by(gpu_id) (${g_metrics_prefix}gpu_gfx_busy_instantaneous{hostname=\"$g_hostname\", card_model!~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"}))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "# of GPUs", "range": true, "refId": "B", "useBackend": false } ], "title": "GPUs", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Number of jobs running on this compute node", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "links": [ { "targetBlank": true, "title": "Go to Job Dashboard", "url": "/d/ce1x81pyv3dvkb/job" } ], "mappings": [], "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] } }, "overrides": [] }, "gridPos": { "h": 3, "w": 2, "x": 2, "y": 0 }, "id": 8, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "exemplar": false, "expr": "sum(group by(job_id) (${g_metrics_prefix}gpu_gfx_activity{hostname=\"$g_hostname\", job_id!=\"\", card_model=~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"}))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Jobs", "range": false, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "exemplar": false, "expr": "sum(group by(job_id) (${g_metrics_prefix}gpu_gfx_busy_instantaneous{hostname=\"$g_hostname\", job_id!=\"\", card_model!~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"}))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Jobs", "range": false, "refId": "A1", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "exemplar": false, "expr": "sum(group by(pod) (${g_metrics_prefix}gpu_gfx_activity{hostname=\"$g_hostname\", pod!=\"\", card_model=~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"}))", "hide": false, "instant": true, "legendFormat": "Jobs (Pods)", "range": false, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "exemplar": false, "expr": "sum(group by(pod) (${g_metrics_prefix}gpu_gfx_busy_instantaneous{hostname=\"$g_hostname\", pod!=\"\", card_model!~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"}))", "hide": false, "instant": true, "legendFormat": "Jobs (Pods)", "range": false, "refId": "B1" } ], "title": "Jobs", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "\\# of GPUs allocated by jobs", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "links": [ { "targetBlank": true, "title": "Go to GPU Dashboard", "url": "/d/ae0aj8euc43r4b/gpu" } ], "mappings": [], "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] } }, "overrides": [ { "matcher": { "id": "byFrameRefID", "options": "Unhealthy" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 8, "w": 4, "x": 4, "y": 0 }, "id": 19, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "vertical", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "text": { "titleSize": 13 }, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(group by(gpu_id) (${g_metrics_prefix}gpu_gfx_activity{hostname=\"$g_hostname\", job_id!=\"\", card_model=~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"}))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Allocated by Jobs", "range": false, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(group by(gpu_id) (${g_metrics_prefix}gpu_gfx_busy_instantaneous{hostname=\"$g_hostname\", job_id!=\"\", card_model!~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"}))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Allocated by Jobs", "range": false, "refId": "A1", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(group by(gpu_id) (${g_metrics_prefix}gpu_gfx_activity{hostname=\"$g_hostname\", pod!=\"\", card_model=~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"}))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Allocated by Pods", "range": false, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(group by(gpu_id) (${g_metrics_prefix}gpu_gfx_busy_instantaneous{hostname=\"$g_hostname\", pod!=\"\", card_model!~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"}))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Allocated by Pods", "range": false, "refId": "B1", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "exemplar": false, "expr": "sum(group by(gpu_id) (${g_metrics_prefix}gpu_gfx_activity{hostname=\"$g_hostname\", card_model=~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"} > 0))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Busy GPUs", "range": false, "refId": "C", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "exemplar": false, "expr": "sum(group by(gpu_id) (${g_metrics_prefix}gpu_gfx_busy_instantaneous{hostname=\"$g_hostname\", card_model!~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"} > 0))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Busy GPUs", "range": false, "refId": "C1", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "exemplar": false, "expr": "count(${g_metrics_prefix}gpu_health{hostname=\"$g_hostname\"} < 1)", "hide": false, "instant": true, "legendFormat": "Unhealthy GPUs", "range": false, "refId": "Unhealthy" } ], "title": "Allocated GPUs", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Current maximum PCIe speed", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "min": -5, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "MBs" }, "overrides": [] }, "gridPos": { "h": 3, "w": 4, "x": 8, "y": 0 }, "id": 12, "maxDataPoints": 60, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "${g_metrics_prefix}pcie_max_speed{hostname=\"$g_hostname\"}", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "PCIe Max Speed", "transformations": [ { "id": "calculateField", "options": { "alias": "PCIe Max Speed", "mode": "reduceRow", "reduce": { "reducer": "last" }, "replaceFields": true } } ], "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 6, "w": 4, "x": 12, "y": 0 }, "id": 15, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(delta(${g_metrics_prefix}pcie_recovery_count{hostname=\"$g_hostname\"}[$__interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Recovery", "range": false, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(delta(${g_metrics_prefix}pcie_replay_count{hostname=\"$g_hostname\"}[$__interval]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Replay", "range": false, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(delta(${g_metrics_prefix}pcie_replay_rollover_count{hostname=\"$g_hostname\"}[$__interval]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "Replay Rollover", "range": false, "refId": "C", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(delta(${g_metrics_prefix}pcie_nack_received_count{hostname=\"$g_hostname\"}[$__interval]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "NACK Received", "range": false, "refId": "D", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "sum(delta(${g_metrics_prefix}pcie_nack_sent_count{hostname=\"$g_hostname\"}[$__interval]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "NACK Sent", "range": false, "refId": "E", "useBackend": false } ], "title": "PCIe Counts", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Top 5 jobs by GPU usage, listed by the index of the GPU the job is running on and the pod/job ID", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "links": [ { "targetBlank": true, "title": "Go to Job Dashboard", "url": "/d/ce1x81pyv3dvkb/job?var-g_job_id=${__field.labels.job_id}&var-g_pod=${__field.labels.pod}" } ], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 12, "w": 4, "x": 16, "y": 0 }, "id": 16, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "horizontal", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "topk(5, ${g_metrics_prefix}gpu_gfx_activity{hostname=\"$g_hostname\", card_model=~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"})", "fullMetaSearch": false, "includeNullMetadata": true, "instant": true, "legendFormat": "[GPU: {{gpu_id}}] {{pod}}{{job_id}}", "range": false, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "topk(5, ${g_metrics_prefix}gpu_gfx_busy_instantaneous{hostname=\"$g_hostname\", card_model!~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": true, "legendFormat": "[GPU: {{gpu_id}}] {{pod}}{{job_id}}", "range": false, "refId": "B", "useBackend": false } ], "title": "Top 5 Jobs by GPU Usage", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Top 5 GPUs with highest VRAM used, labeled with GPU ID.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "links": [ { "targetBlank": true, "title": "Go to GPU Dashboard", "url": "/d/ae0aj8euc43r4b/gpu?var-g_gpu_uuid=${__field.labels.gpu_uuid}&var-g_hostname=${__field.labels.hostname}&var-g_gpu_id=${__field.labels.gpu_id}" } ], "mappings": [], "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 160000 } ] }, "unit": "decmbytes" }, "overrides": [] }, "gridPos": { "h": 12, "w": 4, "x": 20, "y": 0 }, "id": 17, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "exemplar": false, "expr": "topk(5, ${g_metrics_prefix}gpu_used_vram{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "includeNullMetadata": true, "instant": true, "legendFormat": "GPU: {{gpu_id}}", "range": false, "refId": "A", "useBackend": false } ], "title": "Top 5 Used VRAM", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Total package power usage, in Watts", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "watt" }, "overrides": [] }, "gridPos": { "h": 3, "w": 4, "x": 0, "y": 3 }, "id": 9, "maxDataPoints": 60, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(${g_metrics_prefix}gpu_average_package_power{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "hide": true, "includeNullMetadata": true, "legendFormat": "Average Package Power Usage", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(${g_metrics_prefix}gpu_package_power{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "hide": true, "includeNullMetadata": true, "instant": false, "legendFormat": "Package Power Usage", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "__expr__", "uid": "${DS_EXPRESSION}" }, "expression": "$A+$B", "hide": false, "refId": "Total Package Power Usage", "type": "math" } ], "title": "Total Power Usage", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "GBs" }, "overrides": [] }, "gridPos": { "h": 3, "w": 4, "x": 8, "y": 3 }, "id": 13, "maxDataPoints": 60, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(${g_metrics_prefix}pcie_bandwidth{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "Total current PCIe bandwidth", "range": true, "refId": "A", "useBackend": false } ], "title": "Total Current PCIe Bandwidth", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 3, "w": 4, "x": 0, "y": 6 }, "id": 10, "maxDataPoints": 60, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(${g_metrics_prefix}gpu_used_vram{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "hide": true, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(${g_metrics_prefix}gpu_total_vram{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "hide": true, "includeNullMetadata": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "__expr__", "uid": "${DS_EXPRESSION}" }, "expression": "($A / $B) * 100", "hide": false, "refId": "Memory Usage", "type": "math" } ], "title": "Memory Usage", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "displayName": "PCIe Bandwidth", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 6, "w": 8, "x": 8, "y": 6 }, "id": 14, "maxDataPoints": 60, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(${g_metrics_prefix}pcie_bandwidth{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "Total PCIe Bandwidth", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Total ECC counts across all GPUs in this compute node", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 4, "w": 4, "x": 4, "y": 8 }, "id": 18, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(delta(${g_metrics_prefix}gpu_ecc_correct_total{hostname=\"$g_hostname\"}[$__interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "Correctable", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(delta(${g_metrics_prefix}gpu_ecc_uncorrect_total{hostname=\"$g_hostname\"}[$__interval]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Uncorrectable", "range": true, "refId": "B", "useBackend": false } ], "title": "Total ECC Counts", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "joule" }, "overrides": [] }, "gridPos": { "h": 3, "w": 4, "x": 0, "y": 9 }, "id": 11, "maxDataPoints": 60, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(delta(${g_metrics_prefix}gpu_energy_consumed{hostname=\"$g_hostname\"}[$__interval]))", "fullMetaSearch": false, "hide": true, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "__expr__", "uid": "${DS_EXPRESSION}" }, "expression": "$A / 1000000", "hide": false, "refId": "Joules", "type": "math" } ], "title": "Energy Consumed", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Average of current CPU GFX activity", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "displayName": "GPU Usage", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 6, "w": 8, "x": 0, "y": 12 }, "id": 2, "maxDataPoints": 60, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(${g_metrics_prefix}gpu_gfx_activity{hostname=\"$g_hostname\", card_model=~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(${g_metrics_prefix}gpu_gfx_busy_instantaneous{hostname=\"$g_hostname\", card_model!~\"102-D65208-0C|102-D67305-00|102-D65209-0C\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "B", "useBackend": false } ], "title": "Average GPU Usage", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Used VRAM over time", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "displayName": "Used VRAM", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 6, "w": 8, "x": 8, "y": 12 }, "id": 4, "maxDataPoints": 60, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(${g_metrics_prefix}gpu_used_vram{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "hide": true, "includeNullMetadata": true, "legendFormat": "Used VRAM", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(${g_metrics_prefix}gpu_total_vram{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "hide": true, "includeNullMetadata": true, "instant": false, "legendFormat": "Total VRAM", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "__expr__", "uid": "${DS_EXPRESSION}" }, "expression": "($A / $B) * 100", "hide": false, "refId": "C", "type": "math" } ], "title": "Used VRAM", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Average GPU package power, in Watts", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "displayName": "GPU Power", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "watt" }, "overrides": [] }, "gridPos": { "h": 6, "w": 8, "x": 16, "y": 12 }, "id": 6, "maxDataPoints": 60, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(${g_metrics_prefix}gpu_package_power{hostname=\"$g_hostname\", card_model=~\"102-G30211-00|102-G30211-0C|102-G30211-4C|102-G30212-0C|102-G30213-00|102-G30213-0C\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "builder", "expr": "avg(${g_metrics_prefix}gpu_average_package_power{hostname=\"$g_hostname\", card_model!~\"102-G30211-00|102-G30211-0C|102-G30211-4C|102-G30212-0C|102-G30213-00|102-G30213-0C\"})", "hide": false, "instant": false, "legendFormat": "__auto", "range": true, "refId": "B" } ], "title": "Average GPU Power", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Current temperature, in Celsius", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "displayName": "GPU Temperature", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "celsius" }, "overrides": [ { "__systemRef": "hideSeriesFrom", "matcher": { "id": "byNames", "options": { "mode": "exclude", "names": [ "GPU Temperature" ], "prefix": "All except:", "readOnly": true } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": false, "tooltip": false, "viz": true } } ] } ] }, "gridPos": { "h": 6, "w": 8, "x": 0, "y": 18 }, "id": 3, "maxDataPoints": 60, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(${g_metrics_prefix}gpu_edge_temperature{hostname=\"$g_hostname\", card_model!~\"102-G30211-00|102-G30211-0C|102-G30211-4C|102-G30212-0C|102-G30213-00|102-G30213-0C\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "legendFormat": "Edge Temperature", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(${g_metrics_prefix}gpu_junction_temperature{hostname=\"$g_hostname\", card_model=~\"102-G30211-00|102-G30211-0C|102-G30211-4C|102-G30212-0C|102-G30213-00|102-G30213-0C\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Junction Temperature", "range": true, "refId": "B", "useBackend": false } ], "title": "Average GPU Temperature", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Current memory temperature, in Celsius", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "displayName": "Memory Temperature", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "celsius" }, "overrides": [] }, "gridPos": { "h": 6, "w": 8, "x": 8, "y": 18 }, "id": 5, "maxDataPoints": 60, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(${g_metrics_prefix}gpu_memory_temperature{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "Average Memory Temperature", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Average current temperatures, in Celsius, from:\n- HBM (4 sensors)\n- Edge\n- Junction/hotspot", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "celsius" }, "overrides": [] }, "gridPos": { "h": 6, "w": 8, "x": 16, "y": 18 }, "id": 7, "maxDataPoints": 60, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(${g_metrics_prefix}gpu_hbm_temperature{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "HBM Temperature", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(${g_metrics_prefix}gpu_edge_temperature{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Edge Temperature", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(${g_metrics_prefix}gpu_junction_temperature{hostname=\"$g_hostname\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Junction Temperature", "range": true, "refId": "C", "useBackend": false } ], "title": "Average Sensor Temperatures", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "List of all GPUs in this compute node. Click on the GPU ID values to go to that GPU's dashboard (opens new tab).", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": { "align": "auto", "cellOptions": { "type": "auto" }, "filterable": true, "inspect": false }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] } }, "overrides": [ { "matcher": { "id": "byName", "options": "GPU ID" }, "properties": [ { "id": "custom.width", "value": 100 }, { "id": "links", "value": [ { "targetBlank": true, "title": "Go to GPU Dashboard", "url": "/d/ae0aj8euc43r4b/gpu?var-g_gpu_uuid=${__data.fields[\"gpu_uuid\"]}&var-g_hostname=${__data.fields.HOSTNAME}&var-g_gpu_id=${__data.fields[\"GPU ID\"]}" } ] } ] }, { "matcher": { "id": "byName", "options": "GPU UUID" }, "properties": [ { "id": "custom.width", "value": 320 }, { "id": "links", "value": [ { "targetBlank": true, "title": "Go to GPU Dashboard", "url": "/d/ae0aj8euc43r4b/gpu?var-g_gpu_uuid=${__data.fields[\"gpu_uuid\"]}&var-g_hostname=${__data.fields.HOSTNAME}&var-g_gpu_id=${__data.fields[\"GPU ID\"]}" } ] } ] }, { "matcher": { "id": "byName", "options": "HEALTH" }, "properties": [ { "id": "custom.width", "value": 105 }, { "id": "mappings", "value": [ { "options": { "0": { "color": "red", "index": 0, "text": "unhealthy" }, "1": { "color": "green", "index": 1, "text": "healthy" } }, "type": "value" } ] }, { "id": "custom.cellOptions", "value": { "type": "color-text" } } ] } ] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 }, "id": 20, "options": { "cellHeight": "sm", "footer": { "countRows": false, "enablePagination": true, "fields": "", "reducer": [ "sum" ], "show": false }, "showHeader": true, "sortBy": [ { "desc": false, "displayName": "HEALTH" }, { "desc": false, "displayName": "HOSTNAME" }, { "desc": false, "displayName": "GPU ID" } ] }, "pluginVersion": "11.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", "expr": "${g_metrics_prefix}gpu_health{hostname=\"$g_hostname\"}", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "All GPUs", "transformations": [ { "id": "reduce", "options": { "labelsToFields": true, "reducers": [ "last" ] } }, { "id": "groupBy", "options": { "fields": { "Last": { "aggregations": [ "last" ], "operation": "aggregate" }, "gpu_id": { "aggregations": [ "last" ], "operation": "aggregate" }, "gpu_uuid": { "aggregations": [], "operation": "groupby" }, "hostname": { "aggregations": [ "last" ], "operation": "aggregate" } } } }, { "disabled": true, "id": "filterFieldsByName", "options": {} }, { "id": "organize", "options": { "excludeByName": {}, "includeByName": {}, "indexByName": { "gpu_id (last)": 1, "gpu_uuid": 2, "hostname (last)": 0 }, "renameByName": { "Last (last)": "HEALTH", "gpu_id (last)": "GPU ID", "gpu_uuid": "GPU UUID", "hostname (last)": "HOSTNAME" } } } ], "type": "table" }, { "gridPos": { "h": 2, "w": 2, "x": 22, "y": 24 }, "id": 21, "options": { "code": { "language": "plaintext", "showLineNumbers": false, "showMiniMap": false }, "content": "v1.3.1", "mode": "markdown" }, "pluginVersion": "11.2.2", "title": "Version", "type": "text" } ], "schemaVersion": 39, "tags": [], "templating": { "list": [ { "current": { "selected": false, "text": "", "value": "" }, "description": "string to prefix names of metrics queries (e.g. gpu_gfx_activity -> amd_gpu_gfx_activity)", "hide": 0, "label": "Metrics Prefix", "name": "g_metrics_prefix", "options": [ { "selected": true, "text": "", "value": "" } ], "query": "", "skipUrlSync": false, "type": "textbox" }, { "current": {}, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "definition": "label_values(cluster_name)", "hide": 0, "includeAll": false, "label": "Cluster", "multi": false, "name": "g_cluster_name", "options": [], "query": { "qryType": 1, "query": "label_values(cluster_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 7, "type": "query" }, { "current": {}, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "definition": "label_values({cluster_name=\"$g_cluster_name\"},hostname)", "hide": 0, "includeAll": false, "label": "Compute Node", "multi": false, "name": "g_hostname", "options": [], "query": { "qryType": 1, "query": "label_values({cluster_name=\"$g_cluster_name\"},hostname)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 7, "type": "query" } ] }, "time": { "from": "now-6h", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "Compute Node", "uid": "de1q9vq97fe2oc", "version": 1, "weekStart": "" }