{ "__inputs": [ { "name": "DS_TEST-CLUSTER", "label": "test-cluster", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" } ], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "6.1.6" }, { "type": "panel", "id": "graph", "name": "Graph", "version": "" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": "${DS_TEST-CLUSTER}", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 1, "id": null, "iteration": 1566459338986, "links": [], "panels": [ { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 2742, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The storage size per TiKV instance", "editable": true, "error": false, "fill": 5, "grid": {}, "gridPos": { "h": 8, "w": 8, "x": 0, "y": 1 }, "id": 56, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 0, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(tikv_engine_size_bytes{instance=~\"$instance\"}) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Store size", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "decbytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The available capacity size of each TiKV instance", "editable": true, "error": false, "fill": 5, "grid": {}, "gridPos": { "h": 8, "w": 8, "x": 8, "y": 1 }, "id": 1706, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 0, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(tikv_store_size_bytes{instance=~\"$instance\", type=\"available\"}) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Available size", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "decbytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The capacity size per TiKV instance", "editable": true, "error": false, "fill": 5, "grid": {}, "gridPos": { "h": 8, "w": 8, "x": 16, "y": 1 }, "id": 1707, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 0, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(tikv_store_size_bytes{instance=~\"$instance\", type=\"capacity\"}) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Capacity size", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "decbytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": " \tThe CPU usage of each TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 9 }, "id": 1708, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "CPU", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": " \tThe memory usage of each TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 9 }, "id": 1709, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "avg(process_resident_memory_bytes{instance=~\"$instance\"}) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Memory", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": " \tThe I/O utilization per TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 17 }, "id": 1710, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(node_disk_io_time_seconds_total[1m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} - {{device}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "IO utilization", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The total bytes of read and write in each TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 17 }, "id": 1711, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_engine_flow_bytes{instance=~\"$instance\", db=\"kv\", type=\"wal_file_bytes\"}[1m])) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "{{instance}}-write", "refId": "A", "step": 10 }, { "expr": "sum(rate(tikv_engine_flow_bytes{instance=~\"$instance\", db=\"kv\", type=~\"bytes_read|iter_bytes_read\"}[1m])) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "{{instance}}-read", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "MBps", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The QPS of different kinds of commands in each TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 25 }, "id": 1713, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": false, "hideZero": false, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_grpc_msg_duration_seconds_count{instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (instance,type)", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "{{instance}} - {{type}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "QPS", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The total number of the gRPC message failures", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 25 }, "id": 1712, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_grpc_msg_fail_total{instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "{{instance}}-grpc-msg-fail", "refId": "A", "step": 10 }, { "expr": "sum(delta(tikv_pd_heartbeat_message_total{instance=~\"$instance\", type=\"noop\"}[1m])) by (instance) < 1", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-pd-heartbeat", "refId": "B" }, { "expr": "sum(rate(tikv_critical_error_total{instance=~\"$instance\"}[1m])) by (instance, type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{type}}", "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Errps", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The number of leaders per TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 33 }, "id": 1715, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "total", "lines": false } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(tikv_raftstore_region_count{instance=~\"$instance\", type=\"leader\"}) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A", "step": 10 }, { "expr": "delta(tikv_raftstore_region_count{instance=~\"$instance\", type=\"leader\"}[30s]) < -10", "format": "time_series", "hide": true, "intervalFactor": 2, "legendFormat": "", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Leader", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": " \tThe number of Regions on each TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 33 }, "id": 1714, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 300, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(tikv_raftstore_region_count{instance=~\"$instance\", type=\"region\"}) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Region", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": "", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } } ], "repeat": null, "title": "Cluster", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 1 }, "id": 2743, "panels": [ { "alert": { "conditions": [ { "evaluator": { "params": [ 0 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "alerting", "frequency": "60s", "handler": 1, "name": "Critical error alert", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "fill": 1, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 2 }, "id": 2741, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_critical_error_total{instance=~\"$instance\"}[1m])) by (instance, type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{type}}", "refId": "A" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Critical error", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "It contains some kinds of events such as write stall, channel full, scheduler busy, and coprocessor full, which will make the TiKV instance unavailable temporarily.", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 9 }, "id": 1584, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_scheduler_too_busy_total{instance=~\"$instance\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "scheduler-{{instance}}", "metric": "", "refId": "A", "step": 4 }, { "expr": "sum(rate(tikv_channel_full_total{instance=~\"$instance\"}[1m])) by (instance, type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "channelfull-{{instance}}-{{type}}", "metric": "", "refId": "B", "step": 4 }, { "expr": "sum(rate(tikv_coprocessor_request_error{instance=~\"$instance\", type='full'}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "coprocessor-{{instance}}", "metric": "", "refId": "C", "step": 4 }, { "expr": "avg(tikv_engine_write_stall{instance=~\"$instance\", type=\"write_stall_percentile99\"}) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "stall-{{instance}}", "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Server is busy", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "conditions": [ { "evaluator": { "params": [ 0 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "10s", "now" ] }, "reducer": { "params": [], "type": "max" }, "type": "query" } ], "executionErrorState": "alerting", "frequency": "10s", "handler": 1, "message": "TiKV server report failures", "name": "server report failures alert", "noDataState": "ok", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The total number of reporting failure messages", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 9 }, "id": 18, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_server_report_failure_msg_total{instance=~\"$instance\"}[1m])) by (type,instance,store_id)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} - {{type}} - to - {{store_id}}", "metric": "tikv_server_raft_store_msg_total", "refId": "A", "step": 10 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Server report failures", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The number of Raftstore errors per type on each TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 16 }, "id": 1718, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_storage_engine_async_request_total{instance=~\"$instance\", status!~\"success|all\"}[1m])) by (instance, status)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{status}}", "metric": "", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Raftstore error", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The number of different scheduler errors on each TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 16 }, "id": 1719, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_scheduler_stage_total{instance=~\"$instance\", stage=~\"snapshot_err|prepare_write_err\"}[1m])) by (instance, stage)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{stage}}", "metric": "", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Scheduler error", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The number of different coprocessor errors on each TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 23 }, "id": 1720, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_coprocessor_request_error{instance=~\"$instance\"}[1m])) by (instance, reason)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{reason}}", "metric": "", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Coprocessor error", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The number of different gRPC message errors on each TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 23 }, "id": 1721, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_grpc_msg_fail_total{instance=~\"$instance\"}[1m])) by (instance, type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{type}}", "metric": "", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "gRPC message error", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The count of dropped leader in each TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 30 }, "id": 1722, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "total", "lines": false } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(delta(tikv_raftstore_region_count{instance=~\"$instance\", type=\"leader\"}[1m])) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Leader drop", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The count of missing leaders per TiKV instance", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 30 }, "id": 1723, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "total", "lines": false } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(tikv_raftstore_leader_missing{instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Leader missing", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } } ], "repeat": null, "title": "Errors", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 2 }, "id": 2744, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": " \tThe total size of each column family", "editable": true, "error": false, "fill": 3, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 3 }, "id": 33, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideZero": false, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(tikv_engine_size_bytes{instance=~\"$instance\"}) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "CF size", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "decbytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The storage size per TiKV instance", "editable": true, "error": false, "fill": 5, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 3 }, "id": 1705, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 0, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(tikv_engine_size_bytes{instance=~\"$instance\"}) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Store size", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "decbytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "conditions": [ { "evaluator": { "params": [ 0 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "datasourceId": 1, "model": { "expr": "sum(rate(tikv_channel_full_total{instance=~\"$instance\"}[1m])) by (instance, type)", "intervalFactor": 2, "legendFormat": "{{instance}} - {{type}}", "metric": "", "refId": "A", "step": 10 }, "params": [ "A", "10s", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "alerting", "frequency": "10s", "handler": 1, "message": "TiKV channel full", "name": "TiKV channel full alert", "noDataState": "ok", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The total number of channel full errors on each TiKV instance", "editable": true, "error": false, "fill": 3, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 11 }, "id": 22, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_channel_full_total{instance=~\"$instance\"}[1m])) by (instance, type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} - {{type}}", "metric": "", "refId": "A", "step": 10 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Channel full", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "conditions": [ { "evaluator": { "params": [ 1073741824 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "B", "1m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "alerting", "frequency": "60s", "handler": 1, "name": "approximate region size alert", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The approximate Region size", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 11 }, "id": 1481, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 250, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(tikv_raftstore_region_size_bucket{instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "99%", "metric": "", "refId": "B", "step": 10 }, { "expr": "histogram_quantile(0.95, sum(rate(tikv_raftstore_region_size_bucket{instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "95%", "metric": "", "refId": "C", "step": 10 }, { "expr": "sum(rate(tikv_raftstore_region_size_sum{instance=~\"$instance\"}[1m])) / sum(rate(tikv_raftstore_region_size_count{instance=~\"$instance\"}[1m])) ", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", "metric": "", "refId": "D", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Approximate Region size", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } } ], "repeat": null, "title": "Server", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 3 }, "id": 2745, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The count of different kinds of gRPC message", "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }, "id": 95, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 300, "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_grpc_msg_duration_seconds_count{instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", "metric": "tikv_grpc_msg_duration_seconds_bucket", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "gRPC message count", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The count of different kinds of gRPC message which is failed", "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }, "id": 107, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 300, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_grpc_msg_fail_total{instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", "metric": "tikv_grpc_msg_fail_total", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "gRPC message failed", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The gRPC message duration per message type (P99)", "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 }, "id": 98, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": false, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 300, "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(tikv_grpc_msg_duration_seconds_bucket{instance=~\"$instance\", type!=\"kv_gc\"}[1m])) by (le, type))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "99% gRPC messge duration", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "s", "label": null, "logBase": 10, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 }, "id": 2532, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": false, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 300, "sort": "max", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_grpc_msg_duration_seconds_sum{instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_grpc_msg_duration_seconds_count[1m])) by (type)", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Average gRPC messge duration", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "s", "label": null, "logBase": 2, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } } ], "repeat": null, "title": "gRPC", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 4 }, "id": 2746, "panels": [ { "alert": { "conditions": [ { "evaluator": { "params": [ 1.7 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "datasourceId": 1, "model": { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"raftstore_.*\"}[1m])) by (instance)", "intervalFactor": 2, "legendFormat": "{{instance}}", "metric": "tikv_thread_cpu_seconds_total", "refId": "A", "step": 20 }, "params": [ "A", "1m", "now" ] }, "reducer": { "params": [], "type": "max" }, "type": "query" } ], "executionErrorState": "alerting", "for": "0m", "frequency": "60s", "handler": 1, "message": "TiKV raftstore thread CPU usage is high", "name": "TiKV raft store CPU alert", "noDataState": "ok", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The CPU utilization of raftstore thread", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 5 }, "id": 61, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"raftstore_.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "metric": "tikv_thread_cpu_seconds_total", "refId": "A", "step": 4 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.85 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Raft store CPU", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "conditions": [ { "evaluator": { "params": [ 1.8 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "1m", "now" ] }, "reducer": { "params": [], "type": "max" }, "type": "query" } ], "executionErrorState": "alerting", "for": "0m", "frequency": "1m", "handler": 1, "message": "TiKV async apply thread CPU usage is high", "name": "TiKV async apply CPU alert", "noDataState": "ok", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The CPU utilization of async apply", "editable": true, "error": false, "fill": 1, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 5 }, "id": 79, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"apply_[0-9]+\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "metric": "tikv_thread_cpu_seconds_total", "refId": "A", "step": 4 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.9 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Async apply CPU", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "conditions": [ { "evaluator": { "params": [ 3.6 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "1m", "now" ] }, "reducer": { "params": [], "type": "max" }, "type": "query" } ], "executionErrorState": "alerting", "for": "0m", "frequency": "1m", "handler": 1, "message": "TiKV scheduler worker thread CPU usage is high", "name": "TiKV scheduler worker CPU alert", "noDataState": "ok", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The CPU utilization of scheduler worker", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 12 }, "id": 64, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"sched_.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "metric": "tikv_thread_cpu_seconds_total", "refId": "A", "step": 4 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.9 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Scheduler worker CPU", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "conditions": [ { "evaluator": { "params": [ 3.6 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "1m", "now" ] }, "reducer": { "params": [], "type": "max" }, "type": "query" } ], "executionErrorState": "alerting", "for": "0m", "frequency": "1m", "handler": 1, "message": "TiKV gRPC poll thread CPU usage is high", "name": "TiKV gRPC poll CPU alert", "noDataState": "ok", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The CPU utilization of gRPC", "fill": 1, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 12 }, "id": 105, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"grpc.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A", "step": 4 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.9 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "gRPC poll CPU", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "conditions": [ { "evaluator": { "params": [ 7.2 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "1m", "now" ] }, "reducer": { "params": [], "type": "max" }, "type": "query" } ], "executionErrorState": "alerting", "for": "", "frequency": "1m", "handler": 1, "message": "TiKV Coprocessor thread CPU alert", "name": "TiKV Coprocessor CPU alert", "noDataState": "ok", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The CPU utilization of coprocessor", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 19 }, "id": 78, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"cop_normal.*\"}[1m])) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}} - normal", "refId": "A", "step": 4 }, { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"cop_high.*\"}[1m])) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}} - high", "refId": "B", "step": 4 }, { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"cop_low.*\"}[1m])) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}} - low", "refId": "C", "step": 4 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.9 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Coprocessor CPU", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "conditions": [ { "evaluator": { "params": [ 3.6 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "1m", "now" ] }, "reducer": { "params": [], "type": "max" }, "type": "query" } ], "executionErrorState": "alerting", "for": "", "frequency": "1m", "handler": 1, "message": "TiKV Storage ReadPool thread CPU usage is high", "name": "TiKV Storage ReadPool CPU alert", "noDataState": "ok", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The CPU utilization of readpool", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 19 }, "id": 1908, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"store_read_norm.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} - normal", "metric": "tikv_thread_cpu_seconds_total", "refId": "A", "step": 4 }, { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"store_read_high.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} - high", "metric": "tikv_thread_cpu_seconds_total", "refId": "B", "step": 4 }, { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"store_read_low.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} - low", "metric": "tikv_thread_cpu_seconds_total", "refId": "C", "step": 4 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.9 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Storage ReadPool CPU", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": " \tThe CPU utilization of split check", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 26 }, "id": 68, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"split_check\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "metric": "tikv_thread_cpu_seconds_total", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Split check CPU", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The CPU utilization of RocksDB", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 26 }, "id": 69, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"rocksdb.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "metric": "tikv_thread_cpu_seconds_total", "refId": "A", "step": 4 } ], "thresholds": [ { "colorMode": "warning", "fill": true, "line": true, "op": "gt", "value": 1 }, { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 4 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "RocksDB CPU", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "fill": 1, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 33 }, "id": 2531, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"gc_worker.*\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "GC worker CPU", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The CPU utilization of snapshot worker", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 33 }, "id": 67, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": true, "hideZero": false, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_thread_cpu_seconds_total{instance=~\"$instance\", name=~\"snapshot_worker\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "metric": "tikv_thread_cpu_seconds_total", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Snapshot worker CPU", "tooltip": { "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } } ], "repeat": null, "title": "Thread CPU", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, "id": 2747, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": " \tThe count of requests that TiKV sends to PD", "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }, "id": 1069, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 350, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_pd_request_duration_seconds_count{instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ type }}", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "PD requests", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The time consumed by requests that TiKV sends to PD", "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }, "id": 1070, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 350, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_pd_request_duration_seconds_sum{instance=~\"$instance\"}[1m])) by (type) / sum(rate(tikv_pd_request_duration_seconds_count{instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ type }}", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "PD request duration (average)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The total number of PD heartbeat messages", "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 }, "id": 1215, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 350, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_pd_heartbeat_message_total{instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ type }}", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "PD heartbeats", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "opm", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, "description": "The total number of peers validated by the PD worker", "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 }, "id": 1396, "legend": { "alignAsTable": true, "avg": false, "current": true, "max": true, "min": false, "rightSide": true, "show": true, "sideWidth": 350, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(tikv_pd_validate_peer_total{instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ type }}", "metric": "", "refId": "A", "step": 4 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "PD validate peers", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } } ], "repeat": null, "title": "PD", "type": "row" } ], "refresh": "1m", "schemaVersion": 18, "style": "dark", "tags": [], "templating": { "list": [ { "allValue": null, "current": {}, "datasource": "${DS_TEST-CLUSTER}", "definition": "", "hide": 0, "includeAll": true, "label": "db", "multi": true, "name": "db", "options": [], "query": "label_values(tikv_engine_block_cache_size_bytes, db)", "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": {}, "datasource": "${DS_TEST-CLUSTER}", "definition": "", "hide": 0, "includeAll": true, "label": "command", "multi": true, "name": "command", "options": [], "query": "label_values(tikv_storage_command_total, type)", "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".*", "current": {}, "datasource": "${DS_TEST-CLUSTER}", "definition": "", "hide": 0, "includeAll": true, "label": "Instance", "multi": false, "name": "instance", "options": [], "query": "label_values(tikv_engine_size_bytes, instance)", "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-5m", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "browser", "title": "Test-Cluster-TiKV-Summary", "uid": "X7VQmEzZk", "version": 5 }