{ "__inputs": [ { "name": "DS_PROMETHEUS", "label": "Prometheus", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" } ], "__elements": {}, "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "11.3.1" }, { "type": "panel", "id": "heatmap", "name": "Heatmap", "version": "" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" }, { "type": "panel", "id": "stat", "name": "Stat", "version": "" }, { "type": "panel", "id": "timeseries", "name": "Time series", "version": "" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "target": { "limit": 100, "matchAny": false, "tags": [], "type": "dashboard" }, "type": "dashboard" } ] }, "description": "Monitoring vLLM Inference Server", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": null, "links": [], "panels": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 7, "w": 3, "x": 0, "y": 0 }, "id": 16, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "sum(increase(vllm:request_success_total{finished_reason=\"stop\", model_name=\"$model_name\", job=\"${job}\"}[$__range]))", "instant": false, "legendFormat": "Success Request Count(stop)", "range": true, "refId": "A" } ], "title": "Success Request Count(Stop)", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 7, "w": 5, "x": 3, "y": 0 }, "id": 17, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "sum by(finished_reason) (increase(vllm:request_success_total{model_name=\"$model_name\"}[$__range]))", "instant": false, "legendFormat": "Success Request Count", "range": true, "refId": "A" } ], "title": "Success Request Count", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Heatmap of request prompt length", "fieldConfig": { "defaults": { "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "scaleDistribution": { "type": "linear" } } }, "overrides": [] }, "gridPos": { "h": 7, "w": 5, "x": 8, "y": 0 }, "id": 12, "options": { "calculate": false, "cellGap": 1, "cellValues": { "unit": "none" }, "color": { "exponent": 0.5, "fill": "dark-orange", "min": 0, "mode": "scheme", "reverse": false, "scale": "exponential", "scheme": "Spectral", "steps": 64 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-9 }, "legend": { "show": true }, "rowsFrame": { "layout": "auto", "value": "Request count" }, "tooltip": { "mode": "single", "showColorScale": false, "yHistogram": true }, "yAxis": { "axisLabel": "Prompt Length", "axisPlacement": "left", "reverse": false, "unit": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "sum by(le) (increase(vllm:request_prompt_tokens_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval]))", "format": "heatmap", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "{{le}}", "range": true, "refId": "A", "useBackend": false } ], "title": "Request Prompt Length", "type": "heatmap" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Heatmap of request generation length", "fieldConfig": { "defaults": { "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "scaleDistribution": { "type": "linear" } } }, "overrides": [] }, "gridPos": { "h": 7, "w": 5, "x": 13, "y": 0 }, "id": 13, "options": { "calculate": false, "cellGap": 1, "cellValues": { "unit": "none" }, "color": { "exponent": 0.5, "fill": "dark-orange", "min": 0, "mode": "scheme", "reverse": false, "scale": "exponential", "scheme": "Spectral", "steps": 64 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-9 }, "legend": { "show": true }, "rowsFrame": { "layout": "auto", "value": "Request count" }, "tooltip": { "mode": "single", "showColorScale": false, "yHistogram": true }, "yAxis": { "axisLabel": "Generation Length", "axisPlacement": "left", "reverse": false, "unit": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "sum by(le) (increase(vllm:request_generation_tokens_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval]))", "format": "heatmap", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "{{le}}", "range": true, "refId": "A", "useBackend": false } ], "title": "Request Generation Length", "type": "heatmap" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Prompt & Decode length", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "none" }, "overrides": [ { "__systemRef": "hideSeriesFrom", "matcher": { "id": "byNames", "options": { "mode": "exclude", "names": [ "Decode-P99" ], "prefix": "All except:", "readOnly": true } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": false, "tooltip": false, "viz": true } } ] } ] }, "gridPos": { "h": 7, "w": 6, "x": 18, "y": 0 }, "id": 18, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:request_prompt_tokens_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, "legendFormat": "Prompt-P99", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:request_prompt_tokens_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "Prompt-P90", "range": true, "refId": "C", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:request_prompt_tokens_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "Prompt-P50", "range": true, "refId": "D", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:request_generation_tokens_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "hide": false, "instant": false, "legendFormat": "Decode-P99", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "histogram_quantile(0.90, sum by(le) (rate(vllm:request_generation_tokens_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "hide": false, "instant": false, "legendFormat": "Decode-P90", "range": true, "refId": "E" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "histogram_quantile(0.50, sum by(le) (rate(vllm:request_generation_tokens_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "hide": false, "instant": false, "legendFormat": "Decode-P50", "range": true, "refId": "F" } ], "title": "Prompt & Decode Length", "type": "timeseries" }, { "datasource": { "default": false, "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "End to end request latency measured in seconds.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 7, "w": 8, "x": 0, "y": 7 }, "id": 9, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P99", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P95", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P90", "range": true, "refId": "C", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P50", "range": true, "refId": "D", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "avg(rate(vllm:e2e_request_latency_seconds_sum{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval]))", "hide": false, "instant": false, "legendFormat": "Average", "range": true, "refId": "E" } ], "title": "E2E Request Latency", "type": "timeseries" }, { "datasource": { "default": false, "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "P50, P90, P95, and P99 TTFT latency in seconds.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "__systemRef": "hideSeriesFrom", "matcher": { "id": "byNames", "options": { "mode": "exclude", "names": [ "Average" ], "prefix": "All except:", "readOnly": true } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": false, "tooltip": false, "viz": true } } ] } ] }, "gridPos": { "h": 7, "w": 8, "x": 8, "y": 7 }, "id": 5, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P99", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P95", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P90", "range": true, "refId": "C", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\", job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P50", "range": true, "refId": "D", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "avg(rate(vllm:time_to_first_token_seconds_sum{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval]))", "hide": false, "instant": false, "legendFormat": "Average", "range": true, "refId": "E" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "rate(vllm:time_to_first_token_seconds_sum{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])", "hide": false, "instant": false, "legendFormat": "Mean-{{pod}}", "range": true, "refId": "F" } ], "title": "Time To First Token Latency", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Inter token latency in seconds.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "__systemRef": "hideSeriesFrom", "matcher": { "id": "byNames", "options": { "mode": "exclude", "names": [ "Average" ], "prefix": "All except:", "readOnly": true } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": false, "tooltip": false, "viz": true } } ] } ] }, "gridPos": { "h": 7, "w": 8, "x": 16, "y": 7 }, "id": 10, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P99", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P95", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P90", "range": true, "refId": "C", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P50", "range": true, "refId": "D", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "avg(rate(vllm:time_per_output_token_seconds_sum{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval]))", "hide": false, "instant": false, "legendFormat": "Average", "range": true, "refId": "F" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "rate(vllm:time_per_output_token_seconds_sum{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval])", "hide": false, "instant": false, "legendFormat": "Mean-{{pod}}", "range": true, "refId": "E" } ], "title": "Time Per Output Token Latency", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Number of tokens processed per second", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 7, "w": 8, "x": 0, "y": 14 }, "id": 8, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "sum(rate(vllm:prompt_tokens_total{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, "legendFormat": "Prompt Tokens/Sec {{model_name}}", "range": true, "refId": "A", "useBackend": false } ], "title": "Prompt Token Throughput", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Number of tokens processed per second", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 7, "w": 8, "x": 8, "y": 14 }, "id": 14, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "sum(rate(vllm:generation_tokens_total{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "Generation Tokens/Sec {{pod}}", "range": true, "refId": "A", "useBackend": false } ], "title": "Decode Token Throughput", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Number of requests in RUNNING, WAITING, and SWAPPED state", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "none" }, "overrides": [ { "__systemRef": "hideSeriesFrom", "matcher": { "id": "byNames", "options": { "mode": "exclude", "names": [ "Num Waiting" ], "prefix": "All except:", "readOnly": true } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": false, "tooltip": false, "viz": true } } ] } ] }, "gridPos": { "h": 7, "w": 8, "x": 16, "y": 14 }, "id": 3, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "sum(vllm:num_requests_running{model_name=\"$model_name\",job=\"${job}\"})", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Num Running", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "sum(vllm:num_requests_swapped{model_name=\"$model_name\",job=\"${job}\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Num Swapped", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "sum(vllm:num_requests_waiting{model_name=\"$model_name\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Num Waiting", "range": true, "refId": "C", "useBackend": false } ], "title": "Scheduler State", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Percentage of prefix cache hit rate", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 7, "w": 8, "x": 0, "y": 21 }, "id": 15, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "avg(vllm:gpu_prefix_cache_hit_rate{model_name=\"$model_name\",job=\"${job}\"})", "instant": false, "legendFormat": "GPU Prefix Cache Hit Ratio", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "avg(vllm:cpu_prefix_cache_hit_rate{model_name=\"$model_name\",job=\"${job}\"})", "hide": false, "instant": false, "legendFormat": "CPU Prefix Cache Hit Ratio", "range": true, "refId": "B" } ], "title": "Prefix Cache Hit Rate", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Percentage of used cache blocks by vLLM.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 7, "w": 8, "x": 8, "y": 21 }, "id": 4, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "none", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "avg(vllm:gpu_cache_usage_perc{model_name=\"$model_name\",job=\"${job}\"})", "instant": false, "legendFormat": "GPU Cache Usage", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "avg(vllm:cpu_cache_usage_perc{model_name=\"$model_name\",job=\"${job}\"})", "hide": false, "instant": false, "legendFormat": "CPU Cache Usage", "range": true, "refId": "B" } ], "title": "Cache Utilization", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "description": "Number of finished requests by their finish reason: either an EOS token was generated or the max sequence length was reached.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [ { "__systemRef": "hideSeriesFrom", "matcher": { "id": "byNames", "options": { "mode": "exclude", "names": [ "Value" ], "prefix": "All except:", "readOnly": true } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": false, "tooltip": false, "viz": true } } ] } ] }, "gridPos": { "h": 7, "w": 8, "x": 16, "y": 21 }, "id": 11, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", "expr": "sum by(finished_reason) (increase(vllm:request_success_total{model_name=\"$model_name\",job=\"${job}\"}[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "interval": "", "legendFormat": "{{finished_reason}}", "range": true, "refId": "A", "useBackend": false } ], "title": "Finish Reason", "type": "timeseries" } ], "refresh": "", "schemaVersion": 40, "tags": [ "aibrix", "model", "vllm" ], "templating": { "list": [ { "current": {}, "includeAll": false, "label": "datasource", "name": "DS_PROMETHEUS", "options": [], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "current": {}, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "definition": "label_values(vllm:generation_tokens_total,model_name)", "includeAll": false, "label": "model_name", "name": "model_name", "options": [], "query": { "qryType": 1, "query": "label_values(vllm:generation_tokens_total,model_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", "type": "query" }, { "current": {}, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "definition": "label_values(vllm:generation_tokens_total,job)", "includeAll": false, "label": "job", "name": "job", "options": [], "query": { "qryType": 1, "query": "label_values(vllm:generation_tokens_total,job)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", "type": "query" } ] }, "time": { "from": "now-5m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "AIBrix Engine Dashboard (vLLM)", "uid": "ce2azhsbhel8gd", "version": 1, "weekStart": "" }