zabbix_export: version: '5.4' date: '2021-11-21T22:05:50Z' groups: - uuid: d62544c11dbb4b49ba4dcdceab85213c name: Kube/Nodes - uuid: 7a85c330e6094633910367f65298be68 name: Kube/Pods - uuid: 7df96b18c230490a9a0a9e2307226338 name: Templates - uuid: da54d5db19e44356812df51299157f52 name: Templates/Kubernetes - uuid: 846977d1dfed4968bc5f8bdb363285bc name: 'Templates/Operating systems' templates: - uuid: 9d9884f83d4b464593634c9dd01a3634 template: 'Kube by Prom API' name: 'Kube by Prom API' description: | ## Description This template works out of the box as soon as Prometheus (Prometheus-operator) is available inside your cluster; it does not require any Zabbix agent installation or configuration. It allows external monitoring of the Kubernetes cluster through ingress, without any NodePort declaration. It uses the Prometheus API to create a Zabbix host for each pod available inside the Kubernetes cluster. {$PROM.API.URL} must contains the Prometheus entry point into your Kubernetes cluster. Zabbix pod hosts are created with the "Template Kube Pod by Prom API" template by default. ## Overview ### Description zabbix-kube-prom is a batch of Zabbix LLD templates for Zabbix server. It is used for external Kubernetes monitoring by Zabbix via Prometheus API. ### Installation 1. Install [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) into the Kubernetes cluster. 2. Import global Zabbix Template (zabbix-kube-prom.xml) into your Zabbix server. 3. Create or import a host identifying your Kubernetes cluster where Prometheus is deployed. 4. Let LLD create discovered nodes as new "Zabbix hosts" 5. Let LLD create discovered pods as new "Virtual Zabbix hosts ### Templates The global export (zabbix-kube-prom.xml) contains following templates: | Templates | Description | | --- | --- | | Template Kube by Prom API | Creates a Zabbix host for each pod and node discovered. | | Template Kube Node by Prom API | Template applied to the created host (node). | | Template Kube Pod by Prom API | Template applied to the created host (pod). | ### Licenses | Template | License | | --- | --- | | Template OS Linux by Prom | *GNU General Public License v2.0 or later*[Copyright (C) 2001-2021 Zabbix SIA](https://github.com/zabbix/zabbix/blob/master/README) | | Template Kube by Prom APITemplate Kube Node by Prom APITemplate Kube Pod by Prom API | *GNU General Public License v3.0*[Copyright (C) 2021 Diagnostica Stago](https://www.stago.com/) | --- ## Author Laurent Marchelli groups: - name: Templates - name: Templates/Kubernetes discovery_rules: - uuid: 8c44c690ce8c41acbec5e2584326ae9d name: 'Kube node' type: HTTP_AGENT key: prom.node.discovery filter: evaltype: AND conditions: - macro: '{#NODE.IP}' value: '{$PROM.NODE.IP.MATCHES}' formulaid: A - macro: '{#NODE.IP}' value: '{$PROM.NODE.IP.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: B - macro: '{#NODE.NAME}' value: '{$PROM.NODE.NAME.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: C host_prototypes: - uuid: bd6e11ae94314f61ac624cc9cb14ccc2 host: '{#NODE.IP}' name: '{#NODE.NAME}' group_links: - group: name: Kube/Nodes templates: - name: 'Kube Node by Prom API' url: '{$PROM.API.URL}/query' query_fields: - name: query value: kubelet_node_name lld_macro_paths: - lld_macro: '{#NODE.IP}' path: $.instance - lld_macro: '{#NODE.NAME}' path: $.node preprocessing: - type: JSONPATH parameters: - '$.data.result[?(@.metric.node=~''{$PROM.NODE.NAME.MATCHES}'')].metric' - type: JAVASCRIPT parameters: - | return JSON.stringify(JSON.parse(value).map(function(metric){ metric.instance=metric.instance.split(":")[0]; return metric})) - uuid: 47f43b4de41e46e1a4ee40bfb1b82ddc name: 'Kube pod' type: HTTP_AGENT key: prom.pod.discovery filter: evaltype: AND conditions: - macro: '{#NAMESPACE}' value: '{$PROM.POD.NAMESPACE.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: A - macro: '{#SERVICE}' value: '{$PROM.POD.SERVICE.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: C - macro: '{#PODNAME}' value: '{$PROM.POD.NAME.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: B host_prototypes: - uuid: 254315bd1f2b43beb60db42328ed9510 host: '{#PODNAME}' name: '{#PODNAME}' group_links: - group: name: Kube/Pods templates: - name: 'Kube Pod by Prom API' url: '{$PROM.API.URL}/query' query_fields: - name: query value: kube_pod_created lld_macro_paths: - lld_macro: '{#NAMESPACE}' path: $.namespace - lld_macro: '{#PODNAME}' path: $.pod - lld_macro: '{#SERVICE}' path: $.service preprocessing: - type: JSONPATH parameters: - '$.data.result[?(@.metric.namespace=~''{$PROM.POD.NAMESPACE.MATCHES}'' && @.metric.service=~''{$PROM.POD.SERVICE.MATCHES}'' && @.metric.pod=~''{$PROM.POD.NAME.MATCHES}'')].metric' macros: - macro: '{$CPU.UTIL.CRIT}' value: '90' - macro: '{$IF.ERRORS.WARN}' value: '2' - macro: '{$IF.UTIL.MAX}' value: '90' - macro: '{$IFCONTROL}' value: '1' - macro: '{$KERNEL.MAXFILES.MIN}' value: '256' - macro: '{$LOAD_AVG_PER_CPU.MAX.WARN}' value: '1.5' description: 'Load per CPU considered sustainable. Tune if needed.' - macro: '{$MEMORY.AVAILABLE.MIN}' value: 20M - macro: '{$MEMORY.UTIL.MAX}' value: '90' - macro: '{$NET.IF.IFALIAS.MATCHES}' value: '^.*$' - macro: '{$NET.IF.IFALIAS.NOT_MATCHES}' value: CHANGE_IF_NEEDED - macro: '{$NET.IF.IFNAME.MATCHES}' value: '^.*$' - macro: '{$NET.IF.IFNAME.NOT_MATCHES}' value: '(^Software Loopback Interface|^NULL[0-9.]*$|^[Ll]o[0-9.]*$|^[Ss]ystem$|^Nu[0-9.]*$|^veth[0-9a-z]+$|docker[0-9]+|br-[a-z0-9]{12})' description: 'Filter out loopbacks, nulls, docker veth links and docker0 bridge by default' - macro: '{$NET.IF.IFOPERSTATUS.MATCHES}' value: '^.*$' - macro: '{$NET.IF.IFOPERSTATUS.NOT_MATCHES}' value: ^7$ description: 'Ignore notPresent(7)' - macro: '{$NODE_EXPORTER_PORT}' value: '9100' description: 'TCP Port node_exporter is listening on.' - macro: '{$PROM.API.URL}' value: 'http://prometheus.k8scluster.nuci7.lan:8080/api/v1/' description: 'Prometheus API URL. Can be overridden on the host or linked template level.' - macro: '{$PROM.NODE.IP.MATCHES}' value: '^.*$' description: 'This macro is used in node discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.NODE.IP.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'This macro is used in node discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.NODE.NAME.MATCHES}' value: '^.*$' description: 'This macro is used in node discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.NODE.NAME.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'This macro is used in node discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.DEVICE.MATCHES}' value: '^.*$' description: 'Device regex used in pod''s metric discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.DEVICE.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'Device interface regex used in pod''s metric discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.IFNAME.MATCHES}' value: '^.*$' description: 'Network interface regex used in pod''s metric discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.IFNAME.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'Network interface regex used in pod''s metric discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.NAME.MATCHES}' value: '^.*$' description: 'This macro is used in pod discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.NAME.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'This macro is used in pod discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.NAMESPACE.MATCHES}' value: '^.*$' description: 'This macro is used in pod discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.NAMESPACE.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'This macro is used in pod discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.SERVICE.MATCHES}' value: '^.*$' description: 'This macro is used in pod discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.SERVICE.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'This macro is used in pod discovery. Can be overridden on the host or linked template level.' - macro: '{$SWAP.PFREE.MIN.WARN}' value: '50' - macro: '{$SYSTEM.FUZZYTIME.MAX}' value: '60' - macro: '{$VFS.DEV.DEVNAME.MATCHES}' value: .+ description: 'This macro is used in block devices discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.DEV.DEVNAME.NOT_MATCHES}' value: '^(loop[0-9]*|sd[a-z][0-9]+|nbd[0-9]+|sr[0-9]+|fd[0-9]+|dm-[0-9]+|ram[0-9]+|ploop[a-z0-9]+|md[0-9]*|hcp[0-9]*|zram[0-9]*)' description: 'This macro is used in block devices discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.DEV.READ.AWAIT.WARN}' value: '20' description: 'Disk read average response time (in ms) before the trigger would fire' - macro: '{$VFS.DEV.WRITE.AWAIT.WARN}' value: '20' description: 'Disk write average response time (in ms) before the trigger would fire' - macro: '{$VFS.FS.FSDEVICE.MATCHES}' value: ^.+$ description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.FSDEVICE.NOT_MATCHES}' value: ^\s$ description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.FSNAME.MATCHES}' value: .+ description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.FSNAME.NOT_MATCHES}' value: ^(/dev|/sys|/run|/proc|.+/shm$) description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.FSTYPE.MATCHES}' value: ^(btrfs|ext2|ext3|ext4|reiser|xfs|ffs|ufs|jfs|jfs2|vxfs|hfs|apfs|refs|ntfs|fat32|zfs)$ description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.FSTYPE.NOT_MATCHES}' value: ^\s$ description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.INODE.PFREE.MIN.CRIT}' value: '10' - macro: '{$VFS.FS.INODE.PFREE.MIN.WARN}' value: '20' - macro: '{$VFS.FS.PUSED.MAX.CRIT}' value: '90' - macro: '{$VFS.FS.PUSED.MAX.WARN}' value: '80' - uuid: 34d7e9fa6ba84e34a016c136bbe1c2a8 template: 'Kube Node by Prom API' name: 'Kube Node by Prom API' description: | ## Description This template works out of the box as soon as Prometheus (Prometheus-operator) is available inside your cluster; it does not require any Zabbix agent installation or configuration. It allows external monitoring of the Kubernetes cluster through ingress, without any NodePort declaration. It uses the Prometheus API to create a Zabbix host for each pod available inside the Kubernetes cluster. {$PROM.API.URL} must contains the Prometheus entry point into your Kubernetes cluster. Zabbix pod hosts are created with the "Template Kube Pod by Prom API" template by default. ## Overview ### Description zabbix-kube-prom is a batch of Zabbix LLD templates for Zabbix server. It is used for external Kubernetes monitoring by Zabbix via Prometheus API. ### Installation 1. Install [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) into the Kubernetes cluster. 2. Import global Zabbix Template (zabbix-kube-prom.xml) into your Zabbix server. 3. Create or import a host identifying your Kubernetes cluster where Prometheus is deployed. 4. Let LLD create discovered nodes as new "Zabbix hosts" 5. Let LLD create discovered pods as new "Virtual Zabbix hosts ### Templates The global export (zabbix-kube-prom.xml) contains following templates: | Templates | Description | | --- | --- | | Template Kube by Prom API | Creates a Zabbix host for each pod and node discovered. | | Template Kube Node by Prom API | Template applied to the created host (node). | | Template Kube Pod by Prom API | Template applied to the created host (pod). | ### Licenses | Template | License | | --- | --- | | Template OS Linux by Prom | *GNU General Public License v2.0 or later*[Copyright (C) 2001-2021 Zabbix SIA](https://github.com/zabbix/zabbix/blob/master/README) | | Template Kube by Prom APITemplate Kube Node by Prom APITemplate Kube Pod by Prom API | *GNU General Public License v3.0*[Copyright (C) 2021 Diagnostica Stago](https://www.stago.com/) | --- ## Author Laurent Marchelli ## Description Official Linux template using node exporter. Known Issues: Description: node_exporter v0.16.0 renamed many metrics. CPU utilization for 'guest' and 'guest_nice' metrics are not supported in this template with node_exporter < 0.16. Disk IO metrics are not supported. Other metrics provided as 'best effort'. See https://github.com/prometheus/node_exporter/releases/tag/v0.16.0 for details. Version: below 0.16.0 Description: metric node_network_info with label 'device' cannot be found, so network discovery is not possible. Version: below 0.18 You can discuss this template or leave feedback on our forum https://www.zabbix.com/forum/zabbix-suggestions-and-feedback/387225-discussion-thread-for-official-zabbix-template-for-linux Template tooling version used: 0.34 ## Overview ### Description zabbix-kube-prom is a batch of Zabbix LLD templates for Zabbix server. It is used for external Kubernetes monitoring by Zabbix via Prometheus API. ### Installation 1. Install [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) into the Kubernetes cluster. 2. Import global Zabbix Template (zabbix-kube-prom.xml) into your Zabbix server. 3. Create or import a host identifying your Kubernetes cluster where Prometheus is deployed. 4. Let LLD create discovered nodes as new "Zabbix hosts" 5. Let LLD create discovered pods as new "Virtual Zabbix hosts ### Templates The global export (zabbix-kube-prom.xml) contains following templates: | Templates | Description | | --- | --- | | Template Kube by Prom API | Creates a Zabbix host for each pod and node discovered. | | Template Kube Node by Prom API | Template applied to the created host (node). | | Template Kube Pod by Prom API | Template applied to the created host (pod). | ### Licenses | Template | License | | --- | --- | | Template OS Linux by Prom | *GNU General Public License v2.0 or later*[Copyright (C) 2001-2021 Zabbix SIA](https://github.com/zabbix/zabbix/blob/master/README) | | Template Kube by Prom APITemplate Kube Node by Prom APITemplate Kube Pod by Prom API | *GNU General Public License v3.0*[Copyright (C) 2021 Diagnostica Stago](https://www.stago.com/) | --- ## Author Laurent Marchelli groups: - name: 'Templates/Operating systems' items: - uuid: 49f3f89a606148389ff65201ddff42fa name: 'Version of node_exporter running' type: DEPENDENT key: 'agent.version[node_exporter]' delay: '0' history: 7d trends: '0' value_type: CHAR preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_uname_info'')].metric.version' - type: JAVASCRIPT parameters: - 'return JSON.parse(value)[0];' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: node_exporter.get tags: - tag: Application value: 'Monitoring agent' - uuid: 941eab7b09fc4d5eadc32fbdbe379b57 name: 'Number of open file descriptors' type: DEPENDENT key: 'fd.open[node_exporter]' delay: '0' history: 7d value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_filefd_allocated'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: General - uuid: 0476755a317a4601864f490304cbbea2 name: 'Maximum number of open file descriptors' type: DEPENDENT key: 'kernel.maxfiles[node_exporter]' delay: '0' history: 7d value_type: FLOAT description: 'It could be increased by using sysctrl utility or modifying file /etc/sysctl.conf.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_filefd_maximum'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: node_exporter.get tags: - tag: Application value: General triggers: - uuid: ee5a34b3c17045c784a8e80e9ae67409 expression: 'last(/Kube Node by Prom API/kernel.maxfiles[node_exporter])<{$KERNEL.MAXFILES.MIN}' name: 'Configured max number of open filedescriptors is too low (< {$KERNEL.MAXFILES.MIN})' priority: INFO dependencies: - name: 'Running out of file descriptors (less than < 20% free)' expression: 'last(/Kube Node by Prom API/fd.open[node_exporter])/last(/Kube Node by Prom API/kernel.maxfiles[node_exporter])*100>80' - uuid: 6225fbb9792c4cf897f8d208bed289fa name: 'Get node_exporter metrics' type: HTTP_AGENT key: node_exporter.get history: 1h trends: '0' value_type: TEXT preprocessing: - type: JSONPATH parameters: - $.data.result url: '{$PROM.API.URL}/query' query_fields: - name: query value: 'sum({__name__=~''^node_.*$'',instance=~''^{HOST.HOST}:{$NODE_EXPORTER_PORT}$'',container=''node-exporter''}) by (__name__,cpu,mode,device,ifalias,operstate,filesystem,mountpoint,fstype,nodename,machine,sysname,release,version)' tags: - tag: Application value: 'Zabbix raw items' triggers: - uuid: a9515f4c19004491a9af42d6350ffe17 expression: 'nodata(/Kube Node by Prom API/node_exporter.get,30m)=1' name: 'node_exporter is not available (or no data for 30m)' priority: WARNING description: 'Failed to fetch system metrics from node_exporter in time.' manual_close: 'YES' - uuid: c32bb849367641b3a2a820f0de6a2dde name: 'System boot time' type: DEPENDENT key: 'system.boottime[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: unixtime preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_boot_time_seconds'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: General - uuid: fa8026672cde457089495c7845e0c7c0 name: 'CPU guest time' type: DEPENDENT key: 'system.cpu.guest[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'Guest time (time spent running a virtual CPU for a guest operating system)' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_guest_seconds_total'' && @.metric.mode==''user'')].value[1]' - type: JAVASCRIPT parameters: - | //calculates average, all cpu utilization var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj) },0)/valueArr.length; - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 6ca1ec9e86474b47acf2e32c9d047e56 name: 'CPU guest nice time' type: DEPENDENT key: 'system.cpu.guest_nice[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'Time spent running a niced guest (virtual CPU for guest operating systems under the control of the Linux kernel)' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_guest_seconds_total'' && @.metric.mode==''nice'')].value[1]' - type: JAVASCRIPT parameters: - | //calculates average, all cpu utilization var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj) },0)/valueArr.length; - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 5e6da3c0d4c543cf94bac0e219448362 name: 'CPU idle time' type: DEPENDENT key: 'system.cpu.idle[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'The time the CPU has spent doing nothing.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_seconds_total'' && @.metric.mode==''idle'')].value[1]' - type: JAVASCRIPT parameters: - | //calculates average, all cpu utilization var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj) },0)/valueArr.length; - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 0ced8706880f4f098ea93b95ffc14809 name: 'CPU interrupt time' type: DEPENDENT key: 'system.cpu.interrupt[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'The amount of time the CPU has been servicing hardware interrupts.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_seconds_total'' && @.metric.mode==''irq'')].value[1]' - type: JAVASCRIPT parameters: - | //calculates average, all cpu utilization var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj) },0)/valueArr.length; - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 7c96e977575b4d538646448559daf0e1 name: 'Interrupts per second' type: DEPENDENT key: 'system.cpu.intr[node_exporter]' delay: '0' history: 7d value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_intr_total'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: ee6e918f788f4df49a69556df0060eaf name: 'CPU iowait time' type: DEPENDENT key: 'system.cpu.iowait[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'Amount of time the CPU has been waiting for I/O to complete.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_seconds_total'' && @.metric.mode==''iowait'')].value[1]' - type: JAVASCRIPT parameters: - | //calculates average, all cpu utilization var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj) },0)/valueArr.length; - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 98078d6ae69048f3b4729590e4183cb9 name: 'Load average (1m avg)' type: DEPENDENT key: 'system.cpu.load.avg1[node_exporter]' delay: '0' history: 7d value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_load1'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 4b5630be349f42838f6216de81ff7a2d name: 'Load average (5m avg)' type: DEPENDENT key: 'system.cpu.load.avg5[node_exporter]' delay: '0' history: 7d value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_load5'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 8e1ab11e568d4ffcace2c1aba42b1831 name: 'Load average (15m avg)' type: DEPENDENT key: 'system.cpu.load.avg15[node_exporter]' delay: '0' history: 7d value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_load15'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: d9b66884c76c464faed11e53e9ab291b name: 'CPU nice time' type: DEPENDENT key: 'system.cpu.nice[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'The time the CPU has spent running users'' processes that have been niced.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_seconds_total'' && @.metric.mode==''nice'')].value[1]' - type: JAVASCRIPT parameters: - | //calculates average, all cpu utilization var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj) },0)/valueArr.length; - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 1d31f2c9c9954e02bc9b069ff127870a name: 'Number of CPUs' type: DEPENDENT key: 'system.cpu.num[node_exporter]' delay: '0' history: 7d preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_seconds_total'' && @.metric[''mode'']==''idle'')].value[1]' - type: JAVASCRIPT parameters: - | //count the number of cores return JSON.parse(value).length master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: aa0606833a2c4fd48d617a8ee32f14f0 name: 'CPU softirq time' type: DEPENDENT key: 'system.cpu.softirq[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'The amount of time the CPU has been servicing software interrupts.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_seconds_total'' && @.metric.mode==''softirq'')].value[1]' - type: JAVASCRIPT parameters: - | //calculates average, all cpu utilization var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj) },0)/valueArr.length; - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: fa1deb769a3e4b9daff1f603ab91ea9d name: 'CPU steal time' type: DEPENDENT key: 'system.cpu.steal[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'The amount of CPU ''stolen'' from this virtual machine by the hypervisor for other tasks (such as running another virtual machine).' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_seconds_total'' && @.metric.mode==''steal'')].value[1]' - type: JAVASCRIPT parameters: - | //calculates average, all cpu utilization var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj) },0)/valueArr.length; - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 517c8ee30ac24fd286e638990113f7ab name: 'Context switches per second' type: DEPENDENT key: 'system.cpu.switches[node_exporter]' delay: '0' history: 7d value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_context_switches_total'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 3c30a46f586c4610ad1fb1b7f973d676 name: 'CPU system time' type: DEPENDENT key: 'system.cpu.system[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'The time the CPU has spent running the kernel and its processes.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_seconds_total'' && @.metric.mode==''system'')].value[1]' - type: JAVASCRIPT parameters: - | //calculates average, all cpu utilization var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj) },0)/valueArr.length; - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 09500b36603741d681541b0a6e434ae9 name: 'CPU user time' type: DEPENDENT key: 'system.cpu.user[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'The time the CPU has spent running users'' processes that are not niced.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_cpu_seconds_total'' && @.metric.mode==''user'')].value[1]' - type: JAVASCRIPT parameters: - | //calculates average, all cpu utilization var valueArr = JSON.parse(value); return valueArr.reduce(function(acc,obj){ return acc + parseFloat(obj) },0)/valueArr.length; - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: CPU - uuid: 31570bb206a9471688e6427dacaf4fde name: 'CPU utilization' type: DEPENDENT key: 'system.cpu.util[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'CPU utilization in %' preprocessing: - type: JAVASCRIPT parameters: - | //Calculate utilization return (100 - value) master_item: key: 'system.cpu.idle[node_exporter]' tags: - tag: Application value: CPU triggers: - uuid: 5775a0081f1d4da4b6642e9781b42c13 expression: 'min(/Kube Node by Prom API/system.cpu.util[node_exporter],5m)>{$CPU.UTIL.CRIT}' name: 'High CPU utilization (over {$CPU.UTIL.CRIT}% for 5m)' opdata: 'Current utilization: {ITEM.LASTVALUE1}' priority: WARNING description: 'CPU utilization is too high. The system might be slow to respond.' dependencies: - name: 'Load average is too high (per CPU load over {$LOAD_AVG_PER_CPU.MAX.WARN} for 5m)' expression: | min(/Kube Node by Prom API/system.cpu.load.avg1[node_exporter],5m)/last(/Kube Node by Prom API/system.cpu.num[node_exporter])>{$LOAD_AVG_PER_CPU.MAX.WARN} and last(/Kube Node by Prom API/system.cpu.load.avg5[node_exporter])>0 and last(/Kube Node by Prom API/system.cpu.load.avg15[node_exporter])>0 - uuid: df8c79e4448d4b18a5b322a5ed40884f name: 'System description' type: DEPENDENT key: 'system.descr[node_exporter]' delay: '0' history: 2w trends: '0' value_type: CHAR description: 'Labeled system information as provided by the uname system call.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_uname_info'')].metric' - type: JAVASCRIPT parameters: - | var info = JSON.parse(value)[0]; return info.sysname + ' version: ' + info.release + ' ' + info.version; - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: node_exporter.get tags: - tag: Application value: General - uuid: 1cc406c9317748b8bbbc56e900c5bb54 name: 'System local time' type: DEPENDENT key: 'system.localtime[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: unixtime description: 'System local time of the host.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_time_seconds'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: General triggers: - uuid: 5b657323e460441ebc181429f3bb0683 expression: 'fuzzytime(/Kube Node by Prom API/system.localtime[node_exporter],{$SYSTEM.FUZZYTIME.MAX})=0' name: 'System time is out of sync (diff with Zabbix server > {$SYSTEM.FUZZYTIME.MAX}s)' priority: WARNING description: 'The host system time is different from the Zabbix server time.' manual_close: 'YES' - uuid: c2c97999c7c041d1a52e8305bfb5fc35 name: 'System name' type: DEPENDENT key: 'system.name[node_exporter]' delay: '0' history: 2w trends: '0' value_type: CHAR description: 'System host name.' inventory_link: NAME preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_uname_info'')].metric.nodename' - type: JAVASCRIPT parameters: - 'return JSON.parse(value)[0];' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: node_exporter.get tags: - tag: Application value: General triggers: - uuid: 3fd0c0feadf04e0690d70c22ec690bb8 expression: '(last(/Kube Node by Prom API/system.name[node_exporter],#1)<>last(/Kube Node by Prom API/system.name[node_exporter],#2))=1 and length(last(/Kube Node by Prom API/system.name[node_exporter]))>0' name: 'System name has changed (new name: {ITEM.VALUE})' priority: INFO description: 'System name has changed. Ack to close.' manual_close: 'YES' - uuid: 2f751477cb754fc8a889e05df24b4b3a name: 'Operating system architecture' type: DEPENDENT key: 'system.sw.arch[node_exporter]' delay: '0' history: 2w trends: '0' value_type: CHAR description: 'Operating system architecture of the host.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_uname_info'')].metric.machine' - type: JAVASCRIPT parameters: - 'return JSON.parse(value)[0];' - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: node_exporter.get tags: - tag: Application value: Inventory - uuid: 4c669251e9fe4efda396e0e5eec5ee1f name: 'Operating system' type: DEPENDENT key: 'system.sw.os[node_exporter]' delay: '0' history: 2w trends: '0' value_type: CHAR inventory_link: OS preprocessing: - type: DISCARD_UNCHANGED_HEARTBEAT parameters: - 1d master_item: key: 'system.descr[node_exporter]' tags: - tag: Application value: Inventory triggers: - uuid: a52d8d065cc142dfa94ee6cf83712a7d expression: '(last(/Kube Node by Prom API/system.sw.os[node_exporter],#1)<>last(/Kube Node by Prom API/system.sw.os[node_exporter],#2))=1 and length(last(/Kube Node by Prom API/system.sw.os[node_exporter]))>0' recovery_mode: NONE name: 'Operating system description has changed' priority: INFO description: 'Operating system description has changed. Possible reasons that system has been updated or replaced. Ack to close.' manual_close: 'YES' dependencies: - name: 'System name has changed (new name: {ITEM.VALUE})' expression: '(last(/Kube Node by Prom API/system.name[node_exporter],#1)<>last(/Kube Node by Prom API/system.name[node_exporter],#2))=1 and length(last(/Kube Node by Prom API/system.name[node_exporter]))>0' - uuid: baa9f3f9ba1644c48d624e604196b45a name: 'Free swap space' type: DEPENDENT key: 'system.swap.free[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: B description: 'The free space of swap volume/file in bytes.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_memory_SwapFree_bytes'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: Memory - uuid: 343969d1a90b4cc5b8bd2658493f1680 name: 'Free swap space in %' type: CALCULATED key: 'system.swap.pfree[node_exporter]' history: 7d value_type: FLOAT units: '%' params: 'last(//system.swap.free[node_exporter])/last(//system.swap.total[node_exporter])*100' description: 'The free space of swap volume/file in percent.' tags: - tag: Application value: Memory - uuid: e46c9e037b854120a0af1ce5790747a6 name: 'Total swap space' type: DEPENDENT key: 'system.swap.total[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: B description: 'The total space of swap volume/file in bytes.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_memory_SwapTotal_bytes'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: Memory - uuid: 0cdefd28150f4363ab8086d2ae798bed name: 'System uptime' type: DEPENDENT key: 'system.uptime[node_exporter]' delay: '0' history: 2w trends: '0' units: uptime description: 'System uptime in ''N days, hh:mm:ss'' format.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_boot_time_seconds'')].value[1]' - type: JAVASCRIPT parameters: - | //use boottime to calculate uptime return (Math.floor(Date.now()/1000)-Number(JSON.parse(value)[0])); master_item: key: node_exporter.get tags: - tag: Application value: Status triggers: - uuid: bbb1836f4da8440b851ecb1f76fa7e52 expression: 'last(/Kube Node by Prom API/system.uptime[node_exporter])<10m' name: '{HOST.NAME} has been restarted (uptime < 10m)' priority: WARNING description: 'The device uptime is less than 10 minutes' manual_close: 'YES' - uuid: 5e09405be401403c9a29e304a6851e1d name: 'Available memory' type: DEPENDENT key: 'vm.memory.available[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: B description: 'Available memory, in Linux, available = free + buffers + cache. On other platforms calculation may vary. See also: https://www.zabbix.com/documentation/current/manual/appendix/items/vm.memory.size_params' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_memory_MemAvailable_bytes'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: Memory - uuid: 8f970fde1a624ef79d7cf0cf83fc42a0 name: 'Total memory' type: DEPENDENT key: 'vm.memory.total[node_exporter]' delay: '0' history: 7d value_type: FLOAT units: B description: 'Total memory in Bytes' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_memory_MemTotal_bytes'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: Memory - uuid: 0ea315797a724496af42386842255ec3 name: 'Memory utilization' type: CALCULATED key: 'vm.memory.util[node_exporter]' history: 7d value_type: FLOAT units: '%' params: '(last(//vm.memory.total[node_exporter])-last(//vm.memory.available[node_exporter]))/last(//vm.memory.total[node_exporter])*100' description: 'Memory used percentage is calculated as (total-available)/total*100' tags: - tag: Application value: Memory triggers: - uuid: 09bd7bbe40db42e6945ec67dd15e8244 expression: 'min(/Kube Node by Prom API/vm.memory.util[node_exporter],5m)>{$MEMORY.UTIL.MAX}' name: 'High memory utilization ( >{$MEMORY.UTIL.MAX}% for 5m)' priority: AVERAGE description: 'The system is running out of free memory.' dependencies: - name: 'Lack of available memory ( < {$MEMORY.AVAILABLE.MIN} of {ITEM.VALUE2})' expression: 'min(/Kube Node by Prom API/vm.memory.available[node_exporter],5m)<{$MEMORY.AVAILABLE.MIN} and last(/Kube Node by Prom API/vm.memory.total[node_exporter])>0' discovery_rules: - uuid: 608ff42ca676424ca3e8bf9a3ec4ab7f name: 'Network interface discovery' type: DEPENDENT key: 'net.if.discovery[node_exporter]' delay: '0' filter: evaltype: AND conditions: - macro: '{#IFNAME}' value: '{$NET.IF.IFNAME.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: B - macro: '{#IFALIAS}' value: '{$NET.IF.IFALIAS.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: A - macro: '{#IFOPERSTATUS}' value: '{$NET.IF.IFOPERSTATUS.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: C description: 'Discovery of network interfaces. Requires node_exporter v0.18 and up.' item_prototypes: - uuid: c0bcb8c7cb1041cb983f53e22b95daa9 name: 'Interface {#IFNAME}({#IFALIAS}): Inbound packets discarded' type: DEPENDENT key: 'net.if.in.discards[node_exporter,"{#IFNAME}"]' delay: '0' history: 7d value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_network_receive_drop_total'' && @.metric.device==''{#IFNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: 'Interface {#IFNAME}({#IFALIAS})' - uuid: 9bd8744515794e2697ea6cb1ccbd29be name: 'Interface {#IFNAME}({#IFALIAS}): Inbound packets with errors' type: DEPENDENT key: 'net.if.in.errors[node_exporter,"{#IFNAME}"]' delay: '0' history: 7d value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_network_receive_errs_total'' && @.metric.device==''{#IFNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: 'Interface {#IFNAME}({#IFALIAS})' - uuid: 24b06a3ea3774537a08fbba3b55d9f21 name: 'Interface {#IFNAME}({#IFALIAS}): Bits received' type: DEPENDENT key: 'net.if.in[node_exporter,"{#IFNAME}"]' delay: '0' history: 7d value_type: FLOAT units: bps preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_network_receive_bytes_total'' && @.metric.device==''{#IFNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '8' master_item: key: node_exporter.get tags: - tag: Application value: 'Interface {#IFNAME}({#IFALIAS})' - uuid: f2c8f8ab6e624002b604c1175a333236 name: 'Interface {#IFNAME}({#IFALIAS}): Outbound packets discarded' type: DEPENDENT key: 'net.if.out.discards[node_exporter,"{#IFNAME}"]' delay: '0' history: 7d value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_network_transmit_drop_total'' && @.metric.device==''{#IFNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: 'Interface {#IFNAME}({#IFALIAS})' - uuid: cf031b6c7ce74825940973921b933e5f name: 'Interface {#IFNAME}({#IFALIAS}): Outbound packets with errors' type: DEPENDENT key: 'net.if.out.errors[node_exporter"{#IFNAME}"]' delay: '0' history: 7d value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_network_transmit_errs_total'' && @.metric.device==''{#IFNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: 'Interface {#IFNAME}({#IFALIAS})' - uuid: f2aac76f151e4b349edfcd191ba9b442 name: 'Interface {#IFNAME}({#IFALIAS}): Bits sent' type: DEPENDENT key: 'net.if.out[node_exporter,"{#IFNAME}"]' delay: '0' history: 7d value_type: FLOAT units: bps preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_network_transmit_bytes_total'' && @.metric.device==''{#IFNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '8' master_item: key: node_exporter.get tags: - tag: Application value: 'Interface {#IFNAME}({#IFALIAS})' - uuid: 52bfc1015c2e45d28a1213290b81de88 name: 'Interface {#IFNAME}({#IFALIAS}): Speed' type: DEPENDENT key: 'net.if.speed[node_exporter,"{#IFNAME}"]' delay: '0' history: 7d trends: '0' units: bps description: 'Sets value to 0 if metric is missing in node_exporter output.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_network_speed_bytes'' && @.metric.device==''{#IFNAME}'')].value[1]' error_handler: CUSTOM_VALUE error_handler_params: '["0"]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: MULTIPLIER parameters: - '8' master_item: key: node_exporter.get tags: - tag: Application value: 'Interface {#IFNAME}({#IFALIAS})' - uuid: 7623056b6ef948d1bf20d49861e17371 name: 'Interface {#IFNAME}({#IFALIAS}): Operational status' type: DEPENDENT key: 'net.if.status[node_exporter,"{#IFNAME}"]' delay: '0' history: 7d trends: '0' description: | Indicates the interface RFC2863 operational state as a string. Possible values are:"unknown", "notpresent", "down", "lowerlayerdown", "testing","dormant", "up". Reference: https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-class-net valuemap: name: 'IF-MIB::ifOperStatus' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_network_info'' && @.metric.device==''{#IFNAME}'')].metric.operstate' - type: JAVASCRIPT parameters: - | var newvalue; switch(JSON.parse(value)[0]) { case "up": newvalue = 1; break; case "down": newvalue = 2; break; case "testing": newvalue = 4; break; case "unknown": newvalue = 5; break; case "dormant": newvalue = 6; break; case "notPresent": newvalue = 7; break; default: newvalue = "Problem parsing interface operstate in JS"; } return newvalue; master_item: key: node_exporter.get tags: - tag: Application value: 'Interface {#IFNAME}({#IFALIAS})' trigger_prototypes: - uuid: 70f6e3b646ad4bc6b4e843e01c106029 expression: '{$IFCONTROL:"{#IFNAME}"}=1 and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])=2 and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"],#1)<>last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"],#2))=1)' recovery_mode: RECOVERY_EXPRESSION recovery_expression: 'last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])<>2' name: 'Interface {#IFNAME}({#IFALIAS}): Link down' opdata: 'Current state: {ITEM.LASTVALUE1}' priority: AVERAGE description: | This trigger expression works as follows: 1. Can be triggered if operations status is down. 2. {$IFCONTROL:"{#IFNAME}"}=1 - user can redefine Context macro to value - 0. That marks this interface as not important. No new trigger will be fired if this interface is down. 3. {TEMPLATE_NAME:METRIC.diff()}=1) - trigger fires only if operational status was up(1) sometime before. (So, do not fire 'ethernal off' interfaces.) WARNING: if closed manually - won't fire again on next poll, because of .diff. manual_close: 'YES' - uuid: 0eed79c205b5413f8b2929bfb60ca70e name: 'Interface {#IFNAME}({#IFALIAS}): Interface type' type: DEPENDENT key: 'net.if.type[node_exporter,"{#IFNAME}"]' delay: '0' history: 7d trends: '0' description: 'node_network_protocol_type protocol_type value of /sys/class/net/.' valuemap: name: 'Linux::Interface protocol types' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_network_protocol_type'' && @.metric.device==''{#IFNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: 'Interface {#IFNAME}({#IFALIAS})' trigger_prototypes: - uuid: a2903c63e70b4f4fb953a9f5aedc5d6c expression: | change(/Kube Node by Prom API/net.if.speed[node_exporter,"{#IFNAME}"])<0 and last(/Kube Node by Prom API/net.if.speed[node_exporter,"{#IFNAME}"])>0 and ( last(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])=6 or last(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])=7 or last(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])=11 or last(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])=62 or last(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])=69 or last(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])=117 ) and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])<>2) recovery_mode: RECOVERY_EXPRESSION recovery_expression: | (change(/Kube Node by Prom API/net.if.speed[node_exporter,"{#IFNAME}"])>0 and last(/Kube Node by Prom API/net.if.speed[node_exporter,"{#IFNAME}"],#2)>0) or (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])=2) name: 'Interface {#IFNAME}({#IFALIAS}): Ethernet has changed to lower speed than it was before' opdata: 'Current reported speed: {ITEM.LASTVALUE1}' priority: INFO description: 'This Ethernet connection has transitioned down from its known maximum speed. This might be a sign of autonegotiation issues. Ack to close.' manual_close: 'YES' dependencies: - name: 'Interface {#IFNAME}({#IFALIAS}): Link down' expression: '{$IFCONTROL:"{#IFNAME}"}=1 and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])=2 and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"],#1)<>last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"],#2))=1)' recovery_expression: 'last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])<>2' - uuid: a5bd4d85b70a42b6b34d582a10f86cd0 expression: | change(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])<0 and last(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])>0 and (last(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])=6 or last(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])=1) and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])<>2) recovery_mode: RECOVERY_EXPRESSION recovery_expression: | (change(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"])>0 and last(/Kube Node by Prom API/net.if.type[node_exporter,"{#IFNAME}"],#2)>0) or (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])=2) name: 'Interface {#IFNAME}({#IFALIAS}): Ethernet has changed to lower speed than it was before' opdata: 'Current reported speed: {ITEM.LASTVALUE1}' priority: INFO description: 'This Ethernet connection has transitioned down from its known maximum speed. This might be a sign of autonegotiation issues. Ack to close.' manual_close: 'YES' dependencies: - name: 'Interface {#IFNAME}({#IFALIAS}): Link down' expression: '{$IFCONTROL:"{#IFNAME}"}=1 and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])=2 and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"],#1)<>last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"],#2))=1)' recovery_expression: 'last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])<>2' - uuid: c250c690d26543088ca15e56f0dffd2f expression: | (avg(/Kube Node by Prom API/net.if.in[node_exporter,"{#IFNAME}"],15m)>({$IF.UTIL.MAX:"{#IFNAME}"}/100)*last(/Kube Node by Prom API/net.if.speed[node_exporter,"{#IFNAME}"]) or avg(/Kube Node by Prom API/net.if.out[node_exporter,"{#IFNAME}"],15m)>({$IF.UTIL.MAX:"{#IFNAME}"}/100)*last(/Kube Node by Prom API/net.if.speed[node_exporter,"{#IFNAME}"])) and last(/Kube Node by Prom API/net.if.speed[node_exporter,"{#IFNAME}"])>0 recovery_mode: RECOVERY_EXPRESSION recovery_expression: | avg(/Kube Node by Prom API/net.if.in[node_exporter,"{#IFNAME}"],15m)<(({$IF.UTIL.MAX:"{#IFNAME}"}-3)/100)*last(/Kube Node by Prom API/net.if.speed[node_exporter,"{#IFNAME}"]) and avg(/Kube Node by Prom API/net.if.out[node_exporter,"{#IFNAME}"],15m)<(({$IF.UTIL.MAX:"{#IFNAME}"}-3)/100)*last(/Kube Node by Prom API/net.if.speed[node_exporter,"{#IFNAME}"]) name: 'Interface {#IFNAME}({#IFALIAS}): High bandwidth usage ( > {$IF.UTIL.MAX:"{#IFNAME}"}% )' opdata: 'In: {ITEM.LASTVALUE1}, out: {ITEM.LASTVALUE3}, speed: {ITEM.LASTVALUE2}' priority: WARNING description: 'The network interface utilization is close to its estimated maximum bandwidth.' manual_close: 'YES' dependencies: - name: 'Interface {#IFNAME}({#IFALIAS}): Link down' expression: '{$IFCONTROL:"{#IFNAME}"}=1 and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])=2 and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"],#1)<>last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"],#2))=1)' recovery_expression: 'last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])<>2' - uuid: 6d0dab3389e3447ca9bba4d3c96e3914 expression: | min(/Kube Node by Prom API/net.if.in.errors[node_exporter,"{#IFNAME}"],5m)>{$IF.ERRORS.WARN:"{#IFNAME}"} or min(/Kube Node by Prom API/net.if.out.errors[node_exporter"{#IFNAME}"],5m)>{$IF.ERRORS.WARN:"{#IFNAME}"} recovery_mode: RECOVERY_EXPRESSION recovery_expression: | max(/Kube Node by Prom API/net.if.in.errors[node_exporter,"{#IFNAME}"],5m)<{$IF.ERRORS.WARN:"{#IFNAME}"}*0.8 and max(/Kube Node by Prom API/net.if.out.errors[node_exporter"{#IFNAME}"],5m)<{$IF.ERRORS.WARN:"{#IFNAME}"}*0.8 name: 'Interface {#IFNAME}({#IFALIAS}): High error rate ( > {$IF.ERRORS.WARN:"{#IFNAME}"} for 5m)' opdata: 'errors in: {ITEM.LASTVALUE1}, errors out: {ITEM.LASTVALUE2}' priority: WARNING description: 'Recovers when below 80% of {$IF.ERRORS.WARN:"{#IFNAME}"} threshold' manual_close: 'YES' dependencies: - name: 'Interface {#IFNAME}({#IFALIAS}): Link down' expression: '{$IFCONTROL:"{#IFNAME}"}=1 and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])=2 and (last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"],#1)<>last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"],#2))=1)' recovery_expression: 'last(/Kube Node by Prom API/net.if.status[node_exporter,"{#IFNAME}"])<>2' graph_prototypes: - uuid: 071839fed58e443586cf6407d0d8dae3 name: 'Interface {#IFNAME}({#IFALIAS}): Network traffic' graph_items: - drawtype: GRADIENT_LINE color: 1A7C11 item: host: 'Kube Node by Prom API' key: 'net.if.in[node_exporter,"{#IFNAME}"]' - sortorder: '1' drawtype: BOLD_LINE color: 2774A4 item: host: 'Kube Node by Prom API' key: 'net.if.out[node_exporter,"{#IFNAME}"]' - sortorder: '2' color: F63100 yaxisside: RIGHT item: host: 'Kube Node by Prom API' key: 'net.if.out.errors[node_exporter"{#IFNAME}"]' - sortorder: '3' color: A54F10 yaxisside: RIGHT item: host: 'Kube Node by Prom API' key: 'net.if.in.errors[node_exporter,"{#IFNAME}"]' - sortorder: '4' color: FC6EA3 yaxisside: RIGHT item: host: 'Kube Node by Prom API' key: 'net.if.out.discards[node_exporter,"{#IFNAME}"]' - sortorder: '5' color: 6C59DC yaxisside: RIGHT item: host: 'Kube Node by Prom API' key: 'net.if.in.discards[node_exporter,"{#IFNAME}"]' master_item: key: node_exporter.get lld_macro_paths: - lld_macro: '{#IFALIAS}' path: $.ifalias - lld_macro: '{#IFNAME}' path: $.device - lld_macro: '{#IFOPERSTATUS}' path: $.operstate preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_network_info'' && @.metric.device=~''{$NET.IF.IFNAME.MATCHES}'' && @.metric.ifalias=~''{$NET.IF.IFALIAS.MATCHES}'' && @.metric.operstate=~''{$NET.IF.IFOPERSTATUS.MATCHES}'')].metric' - type: JAVASCRIPT parameters: - | return JSON.stringify(JSON.parse(value).map( function(metric){if(!("ifalias" in metric)) {metric.ifalias=""} return metric} )) - uuid: a4ad9f9d6fa149f593579a59bc4b12f7 name: 'Block devices discovery' type: DEPENDENT key: 'vfs.dev.discovery[node_exporter]' delay: '0' filter: evaltype: AND conditions: - macro: '{#DEVNAME}' value: '{$VFS.DEV.DEVNAME.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: A item_prototypes: - uuid: 8bd87641ac22475da7e3964e1dbb543a name: '{#DEVNAME}: Disk average queue size (avgqu-sz)' type: DEPENDENT key: 'vfs.dev.queue_size[node_exporter,"{#DEVNAME}"]' delay: '0' history: 7d value_type: FLOAT description: 'Current average disk queue, the number of requests outstanding on the disk at the time the performance data is collected.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_disk_io_time_weighted_seconds_total'' && @.metric.device==''{#DEVNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: 'Disk {#DEVNAME}' - uuid: 828454ff80094d59a51af850e7ad371b name: '{#DEVNAME}: Disk read request avg waiting time (r_await)' type: CALCULATED key: 'vfs.dev.read.await[node_exporter,"{#DEVNAME}"]' history: 7d value_type: FLOAT units: '!ms' params: '(last(//vfs.dev.read.time.rate[node_exporter,"{#DEVNAME}"])/(last(//vfs.dev.read.rate[node_exporter,"{#DEVNAME}"])+(last(//vfs.dev.read.rate[node_exporter,"{#DEVNAME}"])=0)))*1000*(last(//vfs.dev.read.rate[node_exporter,"{#DEVNAME}"]) > 0)' description: 'This formula contains two boolean expressions that evaluates to 1 or 0 in order to set calculated metric to zero and to avoid division by zero exception.' tags: - tag: Application value: 'Disk {#DEVNAME}' - uuid: 69cd9479ffca477a819e96bd89229480 name: '{#DEVNAME}: Disk read rate' type: DEPENDENT key: 'vfs.dev.read.rate[node_exporter,"{#DEVNAME}"]' delay: '0' history: 7d value_type: FLOAT units: '!r/s' description: 'r/s. The number (after merges) of read requests completed per second for the device.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_disk_reads_completed_total'' && @.metric.device==''{#DEVNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: 'Disk {#DEVNAME}' - uuid: aef72dc0fdf94657a67d7c24def0e33e name: '{#DEVNAME}: Disk read time (rate)' type: DEPENDENT key: 'vfs.dev.read.time.rate[node_exporter,"{#DEVNAME}"]' delay: '0' history: 7d value_type: FLOAT description: 'Rate of total read time counter. Used in r_await calculation' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_disk_read_time_seconds_total'' && @.metric.device==''{#DEVNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: 'Disk {#DEVNAME}' - uuid: fd16853cf55d40418f03a040bc87c959 name: '{#DEVNAME}: Disk utilization' type: DEPENDENT key: 'vfs.dev.util[node_exporter,"{#DEVNAME}"]' delay: '0' history: 7d value_type: FLOAT units: '%' description: 'This item is the percentage of elapsed time that the selected disk drive was busy servicing read or writes requests.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_disk_io_time_seconds_total'' && @.metric.device==''{#DEVNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' - type: MULTIPLIER parameters: - '100' master_item: key: node_exporter.get tags: - tag: Application value: 'Disk {#DEVNAME}' - uuid: 82cf93a74cbf4a9bbd04ac3af104c4aa name: '{#DEVNAME}: Disk write request avg waiting time (w_await)' type: CALCULATED key: 'vfs.dev.write.await[node_exporter,"{#DEVNAME}"]' history: 7d value_type: FLOAT units: '!ms' params: '(last(//vfs.dev.write.time.rate[node_exporter,"{#DEVNAME}"])/(last(//vfs.dev.write.rate[node_exporter,"{#DEVNAME}"])+(last(//vfs.dev.write.rate[node_exporter,"{#DEVNAME}"])=0)))*1000*(last(//vfs.dev.write.rate[node_exporter,"{#DEVNAME}"]) > 0)' description: 'This formula contains two boolean expressions that evaluates to 1 or 0 in order to set calculated metric to zero and to avoid division by zero exception.' tags: - tag: Application value: 'Disk {#DEVNAME}' - uuid: e523bc6b5b9f4dd39bc0b8b9fa93f789 name: '{#DEVNAME}: Disk write rate' type: DEPENDENT key: 'vfs.dev.write.rate[node_exporter,"{#DEVNAME}"]' delay: '0' history: 7d value_type: FLOAT units: '!w/s' description: 'w/s. The number (after merges) of write requests completed per second for the device.' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_disk_writes_completed_total'' && @.metric.device==''{#DEVNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: 'Disk {#DEVNAME}' - uuid: 79bad63c5eb9403e8cd7ac12b4eb8f97 name: '{#DEVNAME}: Disk write time (rate)' type: DEPENDENT key: 'vfs.dev.write.time.rate[node_exporter,"{#DEVNAME}"]' delay: '0' history: 7d value_type: FLOAT description: 'Rate of total write time counter. Used in w_await calculation' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_disk_write_time_seconds_total'' && @.metric.device==''{#DEVNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' - type: CHANGE_PER_SECOND parameters: - '' master_item: key: node_exporter.get tags: - tag: Application value: 'Disk {#DEVNAME}' trigger_prototypes: - uuid: f782b691c3264ba19e98252032563e25 expression: 'min(/Kube Node by Prom API/vfs.dev.read.await[node_exporter,"{#DEVNAME}"],15m) > {$VFS.DEV.READ.AWAIT.WARN:"{#DEVNAME}"} or min(/Kube Node by Prom API/vfs.dev.write.await[node_exporter,"{#DEVNAME}"],15m) > {$VFS.DEV.WRITE.AWAIT.WARN:"{#DEVNAME}"}' name: '{#DEVNAME}: Disk read/write request responses are too high (read > {$VFS.DEV.READ.AWAIT.WARN:"{#DEVNAME}"} ms for 15m or write > {$VFS.DEV.WRITE.AWAIT.WARN:"{#DEVNAME}"} ms for 15m)' priority: WARNING description: 'This trigger might indicate disk {#DEVNAME} saturation.' manual_close: 'YES' graph_prototypes: - uuid: 9473644509524dbc990c5d1b9fcae2e7 name: '{#DEVNAME}: Disk average waiting time' graph_items: - color: 1A7C11 item: host: 'Kube Node by Prom API' key: 'vfs.dev.read.await[node_exporter,"{#DEVNAME}"]' - sortorder: '1' drawtype: GRADIENT_LINE color: 2774A4 item: host: 'Kube Node by Prom API' key: 'vfs.dev.write.await[node_exporter,"{#DEVNAME}"]' - uuid: e2fe7d2f62cf4431bfe89d0652c29ad3 name: '{#DEVNAME}: Disk read/write rates' graph_items: - color: 1A7C11 item: host: 'Kube Node by Prom API' key: 'vfs.dev.read.rate[node_exporter,"{#DEVNAME}"]' - sortorder: '1' drawtype: GRADIENT_LINE color: 2774A4 item: host: 'Kube Node by Prom API' key: 'vfs.dev.write.rate[node_exporter,"{#DEVNAME}"]' - uuid: 3499a26eb2384c76bb5f9e06f25ad0a5 name: '{#DEVNAME}: Disk utilization and queue' graph_items: - color: 1A7C11 yaxisside: RIGHT item: host: 'Kube Node by Prom API' key: 'vfs.dev.queue_size[node_exporter,"{#DEVNAME}"]' - sortorder: '1' drawtype: GRADIENT_LINE color: 2774A4 item: host: 'Kube Node by Prom API' key: 'vfs.dev.util[node_exporter,"{#DEVNAME}"]' master_item: key: node_exporter.get lld_macro_paths: - lld_macro: '{#DEVNAME}' path: $.device preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_disk_io_now'' && @.metric.device=~''{$VFS.DEV.DEVNAME.MATCHES}'')].metric' - uuid: b8172c8e9bba459baf36c69149adb6d8 name: 'Mounted filesystem discovery' type: DEPENDENT key: 'vfs.fs.discovery[node_exporter]' delay: '0' filter: evaltype: AND conditions: - macro: '{#FSTYPE}' value: '{$VFS.FS.FSTYPE.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: C - macro: '{#FSNAME}' value: '{$VFS.FS.FSNAME.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: B - macro: '{#FSDEVICE}' value: '{$VFS.FS.FSDEVICE.NOT_MATCHES}' operator: NOT_MATCHES_REGEX formulaid: A description: 'Discovery of file systems of different types.' item_prototypes: - uuid: fee02b926dbb4be5898c26d702a10b07 name: '{#FSNAME}: Free space' type: DEPENDENT key: 'vfs.fs.free[node_exporter,"{#FSNAME}"]' delay: '0' history: 7d value_type: FLOAT units: B preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_filesystem_avail_bytes'' && @.metric.device==''{#FSDEVICE}'' && @.metric.fstype==''{#FSTYPE}'' && @.metric.mountpoint==''{#FSNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: 'Filesystem {#FSNAME}' - uuid: f4a03c3c30374b8aaa2f0914c57d98aa name: '{#FSNAME}: Free inodes in %' type: DEPENDENT key: 'vfs.fs.inode.pfree[node_exporter,"{#FSNAME}"]' delay: '0' history: 7d value_type: FLOAT units: '%' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']=~''node_filesystem_files.*'' && @.metric.device==''{#FSDEVICE}'' && @.metric.fstype==''{#FSTYPE}'' && @.metric.mountpoint==''{#FSNAME}'')]' - type: JAVASCRIPT parameters: - | //count vfs.fs.inode.pfree var inode_free; var inode_total; JSON.parse(value).forEach(function(value) { if (value.metric['__name__'] == 'node_filesystem_files'){ inode_total = value.value[1]; } else if (value.metric['__name__'] == 'node_filesystem_files_free'){ inode_free = value.value[1]; } }); return (inode_free/inode_total)*100; master_item: key: node_exporter.get tags: - tag: Application value: 'Filesystem {#FSNAME}' trigger_prototypes: - uuid: 7d7e00c5279742c99a3693f38866e9be expression: 'min(/Kube Node by Prom API/vfs.fs.inode.pfree[node_exporter,"{#FSNAME}"],5m)<{$VFS.FS.INODE.PFREE.MIN.CRIT:"{#FSNAME}"}' name: '{#FSNAME}: Running out of free inodes (free < {$VFS.FS.INODE.PFREE.MIN.CRIT:"{#FSNAME}"}%)' opdata: 'Free inodes: {ITEM.LASTVALUE1}' priority: AVERAGE description: | It may become impossible to write to disk if there are no index nodes left. As symptoms, 'No space left on device' or 'Disk is full' errors may be seen even though free space is available. - uuid: 4706d21a86314374ab15d169d23f2499 expression: 'min(/Kube Node by Prom API/vfs.fs.inode.pfree[node_exporter,"{#FSNAME}"],5m)<{$VFS.FS.INODE.PFREE.MIN.WARN:"{#FSNAME}"}' name: '{#FSNAME}: Running out of free inodes (free < {$VFS.FS.INODE.PFREE.MIN.WARN:"{#FSNAME}"}%)' opdata: 'Free inodes: {ITEM.LASTVALUE1}' priority: WARNING description: | It may become impossible to write to disk if there are no index nodes left. As symptoms, 'No space left on device' or 'Disk is full' errors may be seen even though free space is available. dependencies: - name: '{#FSNAME}: Running out of free inodes (free < {$VFS.FS.INODE.PFREE.MIN.CRIT:"{#FSNAME}"}%)' expression: 'min(/Kube Node by Prom API/vfs.fs.inode.pfree[node_exporter,"{#FSNAME}"],5m)<{$VFS.FS.INODE.PFREE.MIN.CRIT:"{#FSNAME}"}' - uuid: e34733aff3b54e63a8b62e4115e28445 name: '{#FSNAME}: Space utilization' type: CALCULATED key: 'vfs.fs.pused[node_exporter,"{#FSNAME}"]' history: 7d value_type: FLOAT units: '%' params: '(last(//vfs.fs.used[node_exporter,"{#FSNAME}"])/last(//vfs.fs.total[node_exporter,"{#FSNAME}"]))*100' description: 'Space utilization in % for {#FSNAME}' tags: - tag: Application value: 'Filesystem {#FSNAME}' - uuid: fbbe8c9cba7c4db2a748a8b6ca004878 name: '{#FSNAME}: Total space' type: DEPENDENT key: 'vfs.fs.total[node_exporter,"{#FSNAME}"]' delay: '0' history: 7d value_type: FLOAT units: B description: 'Total space in Bytes' preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_filesystem_size_bytes'' && @.metric.device==''{#FSDEVICE}'' && @.metric.fstype==''{#FSTYPE}'' && @.metric.mountpoint==''{#FSNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number)' master_item: key: node_exporter.get tags: - tag: Application value: 'Filesystem {#FSNAME}' - uuid: f71e2149f2d84635bde7d1a1006dbafb name: '{#FSNAME}: Used space' type: CALCULATED key: 'vfs.fs.used[node_exporter,"{#FSNAME}"]' history: 7d value_type: FLOAT units: B params: '(last(//vfs.fs.total[node_exporter,"{#FSNAME}"])-last(//vfs.fs.free[node_exporter,"{#FSNAME}"]))' description: 'Used storage in Bytes' tags: - tag: Application value: 'Filesystem {#FSNAME}' trigger_prototypes: - uuid: 0ca86d5bd2304214ae13d858d09e08ee expression: | last(/Kube Node by Prom API/vfs.fs.pused[node_exporter,"{#FSNAME}"])>{$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"} and ((last(/Kube Node by Prom API/vfs.fs.total[node_exporter,"{#FSNAME}"])-last(/Kube Node by Prom API/vfs.fs.used[node_exporter,"{#FSNAME}"]))<5G or timeleft(/Kube Node by Prom API/vfs.fs.pused[node_exporter,"{#FSNAME}"],1h,100)<1d) name: '{#FSNAME}: Disk space is critically low (used > {$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"}%)' opdata: 'Space used: {ITEM.LASTVALUE3} of {ITEM.LASTVALUE2} ({ITEM.LASTVALUE1})' priority: AVERAGE description: | Two conditions should match: First, space utilization should be above {$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"}. Second condition should be one of the following: - The disk free space is less than 5G. - The disk will be full in less than 24 hours. manual_close: 'YES' - uuid: f9bc6b2ccf1d4caa94479043f74b1c8a expression: | last(/Kube Node by Prom API/vfs.fs.pused[node_exporter,"{#FSNAME}"])>{$VFS.FS.PUSED.MAX.WARN:"{#FSNAME}"} and ((last(/Kube Node by Prom API/vfs.fs.total[node_exporter,"{#FSNAME}"])-last(/Kube Node by Prom API/vfs.fs.used[node_exporter,"{#FSNAME}"]))<10G or timeleft(/Kube Node by Prom API/vfs.fs.pused[node_exporter,"{#FSNAME}"],1h,100)<1d) name: '{#FSNAME}: Disk space is low (used > {$VFS.FS.PUSED.MAX.WARN:"{#FSNAME}"}%)' opdata: 'Space used: {ITEM.LASTVALUE3} of {ITEM.LASTVALUE2} ({ITEM.LASTVALUE1})' priority: WARNING description: | Two conditions should match: First, space utilization should be above {$VFS.FS.PUSED.MAX.WARN:"{#FSNAME}"}. Second condition should be one of the following: - The disk free space is less than 10G. - The disk will be full in less than 24 hours. manual_close: 'YES' dependencies: - name: '{#FSNAME}: Disk space is critically low (used > {$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"}%)' expression: | last(/Kube Node by Prom API/vfs.fs.pused[node_exporter,"{#FSNAME}"])>{$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"} and ((last(/Kube Node by Prom API/vfs.fs.total[node_exporter,"{#FSNAME}"])-last(/Kube Node by Prom API/vfs.fs.used[node_exporter,"{#FSNAME}"]))<5G or timeleft(/Kube Node by Prom API/vfs.fs.pused[node_exporter,"{#FSNAME}"],1h,100)<1d) graph_prototypes: - uuid: 087bb6702dbf4026beda054e69480196 name: '{#FSNAME}: Disk space usage' width: '600' height: '340' type: PIE show_3d: 'YES' graph_items: - color: '969696' calc_fnc: LAST type: GRAPH_SUM item: host: 'Kube Node by Prom API' key: 'vfs.fs.total[node_exporter,"{#FSNAME}"]' - sortorder: '1' color: C80000 calc_fnc: LAST item: host: 'Kube Node by Prom API' key: 'vfs.fs.used[node_exporter,"{#FSNAME}"]' master_item: key: node_exporter.get lld_macro_paths: - lld_macro: '{#FSDEVICE}' path: $.device - lld_macro: '{#FSNAME}' path: $.mountpoint - lld_macro: '{#FSTYPE}' path: $.fstype preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''node_filesystem_size_bytes'' && @.metric.device=~''{$VFS.FS.FSDEVICE.MATCHES}'' && @.metric.fstype=~''{$VFS.FS.FSTYPE.MATCHES}'' && @.metric.mountpoint=~''{$VFS.FS.FSNAME.MATCHES}'')].metric' macros: - macro: '{$CPU.UTIL.CRIT}' value: '90' - macro: '{$IF.ERRORS.WARN}' value: '2' - macro: '{$IF.UTIL.MAX}' value: '90' - macro: '{$IFCONTROL}' value: '1' - macro: '{$KERNEL.MAXFILES.MIN}' value: '256' - macro: '{$LOAD_AVG_PER_CPU.MAX.WARN}' value: '1.5' description: 'Load per CPU considered sustainable. Tune if needed.' - macro: '{$MEMORY.AVAILABLE.MIN}' value: 20M - macro: '{$MEMORY.UTIL.MAX}' value: '90' - macro: '{$NET.IF.IFALIAS.MATCHES}' value: '^.*$' - macro: '{$NET.IF.IFALIAS.NOT_MATCHES}' value: CHANGE_IF_NEEDED - macro: '{$NET.IF.IFNAME.MATCHES}' value: '^.*$' - macro: '{$NET.IF.IFNAME.NOT_MATCHES}' value: '(^Software Loopback Interface|^NULL[0-9.]*$|^[Ll]o[0-9.]*$|^[Ss]ystem$|^Nu[0-9.]*$|^veth[0-9a-z]+$|docker[0-9]+|br-[a-z0-9]{12})' description: 'Filter out loopbacks, nulls, docker veth links and docker0 bridge by default' - macro: '{$NET.IF.IFOPERSTATUS.MATCHES}' value: '^.*$' - macro: '{$NET.IF.IFOPERSTATUS.NOT_MATCHES}' value: ^7$ description: 'Ignore notPresent(7)' - macro: '{$NODE_EXPORTER_PORT}' value: '9100' description: 'TCP Port node_exporter is listening on.' - macro: '{$PROM.API.URL}' - macro: '{$SWAP.PFREE.MIN.WARN}' value: '50' - macro: '{$SYSTEM.FUZZYTIME.MAX}' value: '60' - macro: '{$VFS.DEV.DEVNAME.MATCHES}' value: .+ description: 'This macro is used in block devices discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.DEV.DEVNAME.NOT_MATCHES}' value: '^(loop[0-9]*|sd[a-z][0-9]+|nbd[0-9]+|sr[0-9]+|fd[0-9]+|dm-[0-9]+|ram[0-9]+|ploop[a-z0-9]+|md[0-9]*|hcp[0-9]*|zram[0-9]*)' description: 'This macro is used in block devices discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.DEV.READ.AWAIT.WARN}' value: '20' description: 'Disk read average response time (in ms) before the trigger would fire' - macro: '{$VFS.DEV.WRITE.AWAIT.WARN}' value: '20' description: 'Disk write average response time (in ms) before the trigger would fire' - macro: '{$VFS.FS.FSDEVICE.MATCHES}' value: ^.+$ description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.FSDEVICE.NOT_MATCHES}' value: ^\s$ description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.FSNAME.MATCHES}' value: .+ description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.FSNAME.NOT_MATCHES}' value: ^(/dev|/sys|/run|/proc|.+/shm$) description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.FSTYPE.MATCHES}' value: ^(btrfs|ext2|ext3|ext4|reiser|xfs|ffs|ufs|jfs|jfs2|vxfs|hfs|apfs|refs|ntfs|fat32|zfs)$ description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.FSTYPE.NOT_MATCHES}' value: ^\s$ description: 'This macro is used in filesystems discovery. Can be overridden on the host or linked template level' - macro: '{$VFS.FS.INODE.PFREE.MIN.CRIT}' value: '10' - macro: '{$VFS.FS.INODE.PFREE.MIN.WARN}' value: '20' - macro: '{$VFS.FS.PUSED.MAX.CRIT}' value: '90' - macro: '{$VFS.FS.PUSED.MAX.WARN}' value: '80' dashboards: - uuid: c62b2588f54f4398872f3b9f44c2f996 name: 'Network interfaces' pages: - widgets: - type: GRAPH_PROTOTYPE width: '24' height: '5' fields: - type: INTEGER name: source_type value: '2' - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: name: 'Interface {#IFNAME}({#IFALIAS}): Network traffic' host: 'Kube Node by Prom API' - uuid: 3ae50d1c790a4f9a91dcc040f235731d name: 'System performance' pages: - widgets: - type: GRAPH_CLASSIC width: '12' height: '5' fields: - type: INTEGER name: source_type value: '0' - type: GRAPH name: graphid value: name: 'System load' host: 'Kube Node by Prom API' - type: GRAPH_CLASSIC x: '12' width: '12' height: '5' fields: - type: INTEGER name: source_type value: '0' - type: GRAPH name: graphid value: name: 'CPU usage' host: 'Kube Node by Prom API' - type: GRAPH_CLASSIC 'y': '5' width: '12' height: '5' fields: - type: INTEGER name: source_type value: '0' - type: GRAPH name: graphid value: name: 'Memory usage' host: 'Kube Node by Prom API' - type: GRAPH_CLASSIC x: '12' 'y': '5' width: '12' height: '5' fields: - type: INTEGER name: source_type value: '0' - type: GRAPH name: graphid value: name: 'Swap usage' host: 'Kube Node by Prom API' - type: GRAPH_PROTOTYPE 'y': '10' width: '24' height: '5' fields: - type: INTEGER name: source_type value: '2' - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: name: '{#FSNAME}: Disk space usage' host: 'Kube Node by Prom API' - type: GRAPH_PROTOTYPE 'y': '15' width: '24' height: '5' fields: - type: INTEGER name: source_type value: '2' - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: name: '{#DEVNAME}: Disk read/write rates' host: 'Kube Node by Prom API' - type: GRAPH_PROTOTYPE 'y': '20' width: '24' height: '5' fields: - type: INTEGER name: source_type value: '2' - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: name: '{#DEVNAME}: Disk average waiting time' host: 'Kube Node by Prom API' - type: GRAPH_PROTOTYPE 'y': '25' width: '24' height: '5' fields: - type: INTEGER name: source_type value: '2' - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: name: '{#DEVNAME}: Disk utilization and queue' host: 'Kube Node by Prom API' - type: GRAPH_PROTOTYPE 'y': '30' width: '24' height: '5' fields: - type: INTEGER name: source_type value: '2' - type: INTEGER name: columns value: '1' - type: INTEGER name: rows value: '1' - type: GRAPH_PROTOTYPE name: graphid value: name: 'Interface {#IFNAME}({#IFALIAS}): Network traffic' host: 'Kube Node by Prom API' valuemaps: - uuid: 849cf5e20c2649499ea50c0a6b97512f name: 'IF-MIB::ifOperStatus' mappings: - value: '1' newvalue: up - value: '2' newvalue: down - value: '4' newvalue: unknown - value: '5' newvalue: dormant - value: '6' newvalue: notPresent - value: '7' newvalue: lowerLayerDown - uuid: b2ae618dc7ff48618c03287f45793ffe name: 'Linux::Interface protocol types' mappings: - value: '0' newvalue: 'from KA9Q: NET/ROM pseudo' - value: '1' newvalue: Ethernet - value: '2' newvalue: 'Experimental Ethernet' - value: '3' newvalue: 'AX.25 Level 2' - value: '4' newvalue: 'PROnet token ring' - value: '5' newvalue: Chaosnet - value: '6' newvalue: 'IEEE 802.2 Ethernet/TR/TB' - value: '7' newvalue: ARCnet - value: '8' newvalue: APPLEtalk - value: '15' newvalue: 'Frame Relay DLCI' - value: '19' newvalue: ATM - value: '23' newvalue: 'Metricom STRIP (new IANA id)' - value: '24' newvalue: 'IEEE 1394 IPv4 - RFC 2734' - value: '27' newvalue: EUI-64 - value: '32' newvalue: InfiniBand - value: '256' newvalue: ARPHRD_SLIP - value: '257' newvalue: ARPHRD_CSLIP - value: '258' newvalue: ARPHRD_SLIP6 - value: '259' newvalue: ARPHRD_CSLIP6 - value: '260' newvalue: 'Notional KISS type' - value: '264' newvalue: ARPHRD_ADAPT - value: '270' newvalue: ARPHRD_ROSE - value: '271' newvalue: 'CCITT X.25' - value: '272' newvalue: 'Boards with X.25 in firmware' - value: '280' newvalue: 'Controller Area Network' - value: '512' newvalue: ARPHRD_PPP - value: '513' newvalue: 'Cisco HDLC' - value: '516' newvalue: LAPB - value: '517' newvalue: 'Digital''s DDCMP protocol' - value: '518' newvalue: 'Raw HDLC' - value: '519' newvalue: 'Raw IP' - value: '768' newvalue: 'IPIP tunnel' - value: '769' newvalue: 'IP6IP6 tunnel' - value: '770' newvalue: 'Frame Relay Access Device' - value: '771' newvalue: 'SKIP vif' - value: '772' newvalue: 'Loopback device' - value: '773' newvalue: 'Localtalk device' - value: '774' newvalue: 'Fiber Distributed Data Interface' - value: '775' newvalue: 'AP1000 BIF' - value: '776' newvalue: 'sit0 device - IPv6-in-IPv4' - value: '777' newvalue: 'IP over DDP tunneller' - value: '778' newvalue: 'GRE over IP' - value: '779' newvalue: 'PIMSM register interface' - value: '780' newvalue: 'High Performance Parallel Interface' - value: '781' newvalue: 'Nexus 64Mbps Ash' - value: '782' newvalue: 'Acorn Econet' - value: '783' newvalue: Linux-IrDA - value: '784' newvalue: 'Point to point fibrechannel' - value: '785' newvalue: 'Fibrechannel arbitrated loop' - value: '786' newvalue: 'Fibrechannel public loop' - value: '787' newvalue: 'Fibrechannel fabric' - value: '800' newvalue: 'Magic type ident for TR' - value: '801' newvalue: 'IEEE 802.11' - value: '802' newvalue: 'IEEE 802.11 + Prism2 header' - value: '803' newvalue: 'IEEE 802.11 + radiotap header' - value: '804' newvalue: ARPHRD_IEEE802154 - value: '805' newvalue: 'IEEE 802.15.4 network monitor' - value: '820' newvalue: 'PhoNet media type' - value: '821' newvalue: 'PhoNet pipe header' - value: '822' newvalue: 'CAIF media type' - value: '823' newvalue: 'GRE over IPv6' - value: '824' newvalue: 'Netlink header' - value: '825' newvalue: 'IPv6 over LoWPAN' - value: '826' newvalue: 'Vsock monitor header' - uuid: 8e1772853b3949f091d3a7263c6b6a6d template: 'Kube Pod by Prom API' name: 'Kube Pod by Prom API' description: | ## Description This template works out of the box as soon as Prometheus (Prometheus-operator) is available inside your cluster; it does not require any Zabbix agent installation or configuration. It allows external monitoring of the Kubernetes cluster through ingress, without any NodePort declaration. It uses the Prometheus API to create a Zabbix host for each pod available inside the Kubernetes cluster. {$PROM.API.URL} must contains the Prometheus entry point into your Kubernetes cluster. Zabbix pod hosts are created with the "Template Kube Pod by Prom API" template by default. ## Overview ### Description zabbix-kube-prom is a batch of Zabbix LLD templates for Zabbix server. It is used for external Kubernetes monitoring by Zabbix via Prometheus API. ### Installation 1. Install [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) into the Kubernetes cluster. 2. Import global Zabbix Template (zabbix-kube-prom.xml) into your Zabbix server. 3. Create or import a host identifying your Kubernetes cluster where Prometheus is deployed. 4. Let LLD create discovered nodes as new "Zabbix hosts" 5. Let LLD create discovered pods as new "Virtual Zabbix hosts ### Templates The global export (zabbix-kube-prom.xml) contains following templates: | Templates | Description | | --- | --- | | Template Kube by Prom API | Creates a Zabbix host for each pod and node discovered. | | Template Kube Node by Prom API | Template applied to the created host (node). | | Template Kube Pod by Prom API | Template applied to the created host (pod). | ### Licenses | Template | License | | --- | --- | | Template OS Linux by Prom | *GNU General Public License v2.0 or later*[Copyright (C) 2001-2021 Zabbix SIA](https://github.com/zabbix/zabbix/blob/master/README) | | Template Kube by Prom APITemplate Kube Node by Prom APITemplate Kube Pod by Prom API | *GNU General Public License v3.0*[Copyright (C) 2021 Diagnostica Stago](https://www.stago.com/) | --- ## Author Laurent Marchelli ## Description Official Linux template using node exporter. Known Issues: Description: node_exporter v0.16.0 renamed many metrics. CPU utilization for 'guest' and 'guest_nice' metrics are not supported in this template with node_exporter < 0.16. Disk IO metrics are not supported. Other metrics provided as 'best effort'. See https://github.com/prometheus/node_exporter/releases/tag/v0.16.0 for details. Version: below 0.16.0 Description: metric node_network_info with label 'device' cannot be found, so network discovery is not possible. Version: below 0.18 You can discuss this template or leave feedback on our forum https://www.zabbix.com/forum/zabbix-suggestions-and-feedback/387225-discussion-thread-for-official-zabbix-template-for-linux Template tooling version used: 0.34 ## Overview ### Description zabbix-kube-prom is a batch of Zabbix LLD templates for Zabbix server. It is used for external Kubernetes monitoring by Zabbix via Prometheus API. ### Installation 1. Install [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) into the Kubernetes cluster. 2. Import global Zabbix Template (zabbix-kube-prom.xml) into your Zabbix server. 3. Create or import a host identifying your Kubernetes cluster where Prometheus is deployed. 4. Let LLD create discovered nodes as new "Zabbix hosts" 5. Let LLD create discovered pods as new "Virtual Zabbix hosts ### Templates The global export (zabbix-kube-prom.xml) contains following templates: | Templates | Description | | --- | --- | | Template Kube by Prom API | Creates a Zabbix host for each pod and node discovered. | | Template Kube Node by Prom API | Template applied to the created host (node). | | Template Kube Pod by Prom API | Template applied to the created host (pod). | ### Licenses | Template | License | | --- | --- | | Template OS Linux by Prom | *GNU General Public License v2.0 or later*[Copyright (C) 2001-2021 Zabbix SIA](https://github.com/zabbix/zabbix/blob/master/README) | | Template Kube by Prom APITemplate Kube Node by Prom APITemplate Kube Pod by Prom API | *GNU General Public License v3.0*[Copyright (C) 2021 Diagnostica Stago](https://www.stago.com/) | --- ## Author Laurent Marchelli ## Overview ### Description zabbix-kube-prom is a batch of Zabbix LLD templates for Zabbix server. It is used for external Kubernetes monitoring by Zabbix via Prometheus API. ### Installation 1. Install [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) into the Kubernetes cluster. 2. Import global Zabbix Template (zabbix-kube-prom.xml) into your Zabbix server. 3. Create or import a host identifying your Kubernetes cluster where Prometheus is deployed. 4. Let LLD create discovered nodes as new "Zabbix hosts" 5. Let LLD create discovered pods as new "Virtual Zabbix hosts ### Templates The global export (zabbix-kube-prom.xml) contains following templates: | Templates | Description | | --- | --- | | Template Kube by Prom API | Creates a Zabbix host for each pod and node discovered. | | Template Kube Node by Prom API | Template applied to the created host (node). | | Template Kube Pod by Prom API | Template applied to the created host (pod). | ### Licenses | Template | License | | --- | --- | | Template OS Linux by Prom | *GNU General Public License v2.0 or later*[Copyright (C) 2001-2021 Zabbix SIA](https://github.com/zabbix/zabbix/blob/master/README) | | Template Kube by Prom APITemplate Kube Node by Prom APITemplate Kube Pod by Prom API | *GNU General Public License v3.0*[Copyright (C) 2021 Diagnostica Stago](https://www.stago.com/) | --- ## Author Laurent Marchelli groups: - name: Templates - name: Templates/Kubernetes items: - uuid: feeb35124d8a4995907ab8b66d47fee1 name: 'Metrics cpu' type: HTTP_AGENT key: 'prom.pod.metrics[cpu]' delay: 30s history: '0' trends: '0' value_type: TEXT preprocessing: - type: JSONPATH parameters: - $.data.result url: '{$PROM.API.URL}/query' query_fields: - name: query value: 'sum({__name__=~"^container_cpu_.*$",pod="{HOST.NAME}",container!="POD",container!=""}) by (__name__,container)' tags: - tag: Application value: '_Raw items' - uuid: 697fd47ab75f48d8b7a01be3c9e94f4c name: 'Metrics cpu_usage' type: HTTP_AGENT key: 'prom.pod.metrics[cpu_usage]' delay: 30s history: '0' trends: '0' value_type: TEXT preprocessing: - type: JSONPATH parameters: - $.data.result url: '{$PROM.API.URL}/query' query_fields: - name: query value: 'sum({__name__="node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate",pod="{HOST.NAME}",container!="POD"}) by (__name__,container)' tags: - tag: Application value: '_Raw items' - uuid: 91abca8bc87a4c8b83d0a7e44de79642 name: 'Metrics memory' type: HTTP_AGENT key: 'prom.pod.metrics[memory]' history: '0' trends: '0' value_type: TEXT preprocessing: - type: JSONPATH parameters: - $.data.result url: '{$PROM.API.URL}/query' query_fields: - name: query value: 'sum({__name__=~"^container_memory_.*$",pod="{HOST.NAME}",container!="POD",container!=""}) by (__name__,container)' tags: - tag: Application value: '_Raw items' - uuid: f72db150d7974201867965096cf06842 name: 'Metrics monitoring' type: HTTP_AGENT key: 'prom.pod.metrics[monitoring]' history: '0' trends: '0' value_type: TEXT preprocessing: - type: JSONPATH parameters: - $.data.result url: '{$PROM.API.URL}/query' query_fields: - name: query value: 'sum({__name__=~"^prober_.*$",pod="{HOST.NAME}",container!="POD",container!=""}) by(__name__,container,probe_type,result)' tags: - tag: Application value: '_Raw items' - uuid: 6232c7ba88ab44179ce0c7b03868d8c5 name: 'Metrics network' type: HTTP_AGENT key: 'prom.pod.metrics[network]' history: '0' trends: '0' value_type: TEXT preprocessing: - type: JSONPATH parameters: - $.data.result url: '{$PROM.API.URL}/query' query_fields: - name: query value: 'sum({__name__=~"^container_network_.*$",pod="{HOST.NAME}",container="POD"}) by (__name__,interface)' tags: - tag: Application value: '_Raw items' - uuid: 98728ac967d3437a98a0792a4ce62f07 name: '_New Metrics' type: HTTP_AGENT key: 'prom.pod.metrics[new]' delay: 5s history: 1h trends: '0' status: DISABLED value_type: TEXT preprocessing: - type: JSONPATH parameters: - $.data.result url: '{$PROM.API.URL}/query' query_fields: - name: query value: '{pod="{HOST.NAME}",container!="POD",container!=""}' tags: - tag: Application value: '_Raw items' - uuid: 4d4ceadffdb84822a6d2e62a01025c88 name: 'Metrics spec' type: HTTP_AGENT key: 'prom.pod.metrics[spec]' history: '0' trends: '0' value_type: TEXT preprocessing: - type: JSONPATH parameters: - $.data.result url: '{$PROM.API.URL}/query' query_fields: - name: query value: 'sum({__name__=~"^container_spec_.*$",pod="{HOST.NAME}",container!="POD",container!=""}) by (__name__,container)' tags: - tag: Application value: '_Raw items' - uuid: 1434a968e0fd47a38e5ef16f4a160b09 name: 'Metrics storage_fs' type: HTTP_AGENT key: 'prom.pod.metrics[storage,fs]' history: '0' trends: '0' value_type: TEXT preprocessing: - type: JSONPATH parameters: - $.data.result url: '{$PROM.API.URL}/query' query_fields: - name: query value: 'sum({__name__=~"^container_fs_.*$",pod="{HOST.NAME}",container!="POD",container!=""}) by (__name__,container,device)' tags: - tag: Application value: '_Raw items' - uuid: bf14428256374a549061edd3e560d934 name: 'Metrics storage' type: HTTP_AGENT key: 'prom.pod.metrics[storage]' history: '0' trends: '0' value_type: TEXT preprocessing: - type: JSONPATH parameters: - $.data.result url: '{$PROM.API.URL}/query' query_fields: - name: query value: 'sum({__name__=~"^.*container_(file|ulimits|log)_.*$",pod="{HOST.NAME}",container!="POD",container!=""}) by (__name__,container)' tags: - tag: Application value: '_Raw items' discovery_rules: - uuid: 9418510d987848b4aa25ece747d8c3c6 name: 'Discovery cpu' type: DEPENDENT key: 'prom.pod.discovery[cpu]' delay: '0' item_prototypes: - uuid: 5672995f32ff48b1891507787705eb43 name: '{#CONTAINER} - {#METRIC}' type: DEPENDENT key: 'prom.pod.metrics[cpu,{#CONTAINER},{#METRIC}]' delay: '0' value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''{#METRIC}'' && @.metric.container==''{#CONTAINER}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number);' master_item: key: 'prom.pod.metrics[cpu]' tags: - tag: Application value: CPU graph_prototypes: - uuid: 68d9911c225048bd8529a1ad8ab7e0c8 name: '{#CONTAINER} - {#METRIC}' graph_items: - color: 1A7C11 calc_fnc: ALL item: host: 'Kube Pod by Prom API' key: 'prom.pod.metrics[cpu,{#CONTAINER},{#METRIC}]' master_item: key: 'prom.pod.metrics[cpu]' lld_macro_paths: - lld_macro: '{#METRIC}' path: '$.metric[''__name__'']' - lld_macro: '{#CONTAINER}' path: $.metric.container - uuid: 388a364a71584dc693c50b85ce3a2449 name: 'Discovery cpu_usage' type: DEPENDENT key: 'prom.pod.discovery[cpu_usage]' delay: '0' item_prototypes: - uuid: 92ecbae104ba42359f3849538c251945 name: '{#CONTAINER} - container_cpu_usage_seconds_total' type: DEPENDENT key: 'prom.pod.metrics[cpu_usage,{#CONTAINER},container_cpu_usage_seconds_total]' delay: '0' value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''{#METRIC}'' && @.metric.container==''{#CONTAINER}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number);' master_item: key: 'prom.pod.metrics[cpu_usage]' tags: - tag: Application value: CPU graph_prototypes: - uuid: cdc0fbba893d461bb023e814413966bb name: '{#CONTAINER} - container_cpu_usage_seconds_total' graph_items: - color: 1A7C11 item: host: 'Kube Pod by Prom API' key: 'prom.pod.metrics[cpu_usage,{#CONTAINER},container_cpu_usage_seconds_total]' master_item: key: 'prom.pod.metrics[cpu_usage]' lld_macro_paths: - lld_macro: '{#CONTAINER}' path: $.metric.container - lld_macro: '{#METRIC}' path: '$.metric[''__name__'']' - uuid: 666400551625411cac707258a1aaccc3 name: 'Discovery memory' type: DEPENDENT key: 'prom.pod.discovery[memory]' delay: '0' item_prototypes: - uuid: 1ef882bb092448f0b64b7ee0f0a51ee0 name: '{#CONTAINER} - {#METRIC}' type: DEPENDENT key: 'prom.pod.metrics[memory,{#CONTAINER},{#METRIC}]' delay: '0' value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''{#METRIC}'' && @.metric.container==''{#CONTAINER}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number);' master_item: key: 'prom.pod.metrics[memory]' tags: - tag: Application value: Memory master_item: key: 'prom.pod.metrics[memory]' lld_macro_paths: - lld_macro: '{#METRIC}' path: '$.metric[''__name__'']' - lld_macro: '{#CONTAINER}' path: $.metric.container - uuid: 9f17986e8b1340da9fb173749fda8be5 name: 'Discovery monitoring' type: DEPENDENT key: 'prom.pod.discovery[monitoring]' delay: '0' item_prototypes: - uuid: 82452047acc24e94a2e609161b2ef491 name: '{#METRIC} ({#TYPE},{#RESULT})' type: DEPENDENT key: 'prom.pod.metrics[monitoring,{#METRIC},{#TYPE},{#RESULT}]' delay: '0' value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''{#METRIC}'' && @.metric.probe_type==''{#TYPE}'' && @.metric.result==''{#RESULT}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number);' master_item: key: 'prom.pod.metrics[monitoring]' tags: - tag: Application value: Monitoring master_item: key: 'prom.pod.metrics[monitoring]' lld_macro_paths: - lld_macro: '{#METRIC}' path: '$.metric[''__name__'']' - lld_macro: '{#RESULT}' path: $.metric.result - lld_macro: '{#TYPE}' path: $.metric.probe_type - uuid: a0561b8420b741679f256146fb99e856 name: 'Discovery network' type: DEPENDENT key: 'prom.pod.discovery[network]' delay: '0' item_prototypes: - uuid: 97f0bc5d5a564a78a2a2879299f07b4b name: 'Network {#IFNAME}: {#METRIC}' type: DEPENDENT key: 'prom.pod.metrics[network,{#METRIC},{#IFNAME}]' delay: '0' value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''{#METRIC}'' && @.metric.interface==''{#IFNAME}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number);' master_item: key: 'prom.pod.metrics[network]' tags: - tag: Application value: 'Network {#IFNAME}' master_item: key: 'prom.pod.metrics[network]' lld_macro_paths: - lld_macro: '{#IFNAME}' path: $.metric.interface - lld_macro: '{#METRIC}' path: '$.metric[''__name__'']' - uuid: a14e640d0b554426957203cab580be34 name: '_New Discovery' type: DEPENDENT key: 'prom.pod.discovery[new]' delay: '0' status: DISABLED filter: evaltype: AND conditions: - macro: '{#METRIC}' value: '^container_cpu_.*$' operator: NOT_MATCHES_REGEX formulaid: A - macro: '{#METRIC}' value: '^container_memory_.*$' operator: NOT_MATCHES_REGEX formulaid: B - macro: '{#METRIC}' value: '^prober_.*$' operator: NOT_MATCHES_REGEX formulaid: C - macro: '{#METRIC}' value: '^container_spec_.*$' operator: NOT_MATCHES_REGEX formulaid: D - macro: '{#METRIC}' value: '^.*container_(file|ulimits|log)_.*$' operator: NOT_MATCHES_REGEX formulaid: E - macro: '{#METRIC}' value: '^container_fs_.*$' operator: NOT_MATCHES_REGEX formulaid: F - macro: '{#METRIC}' value: '^kube_pod_.*$' operator: NOT_MATCHES_REGEX formulaid: G item_prototypes: - uuid: 28f3cec9d1b24853824c7b88ec6ab23a name: '{#CONTAINER} - {#METRIC}' type: DEPENDENT key: 'prom.pod.metrics[new,{#CONTAINER},{#METRIC}]' delay: '0' value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''{#METRIC}'' && @.metric.container==''{#CONTAINER}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number);' master_item: key: 'prom.pod.metrics[new]' tags: - tag: Application value: '_New Metrics' master_item: key: 'prom.pod.metrics[new]' lld_macro_paths: - lld_macro: '{#METRIC}' path: '$.metric[''__name__'']' - lld_macro: '{#CONTAINER}' path: $.metric.container - uuid: 31284178d9ed4dd0afeb1f4fd2bbeb91 name: 'Discovery spec' type: DEPENDENT key: 'prom.pod.discovery[spec]' delay: '0' item_prototypes: - uuid: 818ac87069374c51b62fac0d026bfc76 name: '{#CONTAINER} - {#METRIC}' type: DEPENDENT key: 'prom.pod.metrics[spec,{#CONTAINER},{#METRIC}]' delay: '0' value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''{#METRIC}'' && @.metric.container==''{#CONTAINER}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number);' master_item: key: 'prom.pod.metrics[spec]' tags: - tag: Application value: Spec master_item: key: 'prom.pod.metrics[spec]' lld_macro_paths: - lld_macro: '{#METRIC}' path: '$.metric[''__name__'']' - lld_macro: '{#CONTAINER}' path: $.metric.container - uuid: fb6e4b4e94c2466b8ffa3942dbf1e2e7 name: 'Discovery storage_fs' type: DEPENDENT key: 'prom.pod.discovery[storage,fs]' delay: '0' filter: evaltype: AND item_prototypes: - uuid: bd9371d3f9854ae1a6c4cb38fe7e988b name: '{#CONTAINER} - Storage {#DEVICE}: {#METRIC}' type: DEPENDENT key: 'prom.pod.metrics[storage,{#CONTAINER},{#METRIC},{#DEVICE}]' delay: '0' value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''{#METRIC}'' && @.metric.container==''{#CONTAINER}'' && @.metric.device==''{#DEVICE}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number);' master_item: key: 'prom.pod.metrics[storage,fs]' tags: - tag: Application value: 'Storage {#DEVICE}' master_item: key: 'prom.pod.metrics[storage,fs]' lld_macro_paths: - lld_macro: '{#METRIC}' path: '$.metric[''__name__'']' - lld_macro: '{#DEVICE}' path: $.metric.device - lld_macro: '{#CONTAINER}' path: $.metric.container - uuid: c104708b7a764df7a663d0ca437dbc7b name: 'Discovery storage' type: DEPENDENT key: 'prom.pod.discovery[storage]' delay: '0' item_prototypes: - uuid: 05c8be77c2ba4b55b9d53b2809e04b59 name: '{#CONTAINER} - {#METRIC}' type: DEPENDENT key: 'prom.pod.metrics[storage,{#CONTAINER},{#METRIC}]' delay: '0' value_type: FLOAT preprocessing: - type: JSONPATH parameters: - '$[?(@.metric[''__name__'']==''{#METRIC}'' && @.metric.container==''{#CONTAINER}'')].value[1]' - type: JAVASCRIPT parameters: - 'return JSON.parse(value).map(Number);' master_item: key: 'prom.pod.metrics[storage]' tags: - tag: Application value: Storage master_item: key: 'prom.pod.metrics[storage]' lld_macro_paths: - lld_macro: '{#METRIC}' path: '$.metric[''__name__'']' - lld_macro: '{#CONTAINER}' path: $.metric.container macros: - macro: '{$PROM.POD.DEVICE.MATCHES}' value: '^.*$' description: 'Device regex used in pod''s metric discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.DEVICE.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'Device interface regex used in pod''s metric discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.IFNAME.MATCHES}' value: '^.*$' description: 'Network interface regex used in pod''s metric discovery. Can be overridden on the host or linked template level.' - macro: '{$PROM.POD.IFNAME.NOT_MATCHES}' value: CHANGE_IF_NEEDED description: 'Network interface regex used in pod''s metric discovery. Can be overridden on the host or linked template level.' triggers: - uuid: 8d3d656e6b9c44baa985518f95c79e31 expression: 'min(/Kube Node by Prom API/system.swap.pfree[node_exporter],5m)<{$SWAP.PFREE.MIN.WARN} and last(/Kube Node by Prom API/system.swap.total[node_exporter])>0' name: 'High swap space usage ( less than {$SWAP.PFREE.MIN.WARN}% free)' opdata: 'Free: {ITEM.LASTVALUE1}, total: {ITEM.LASTVALUE2}' priority: WARNING description: 'This trigger is ignored, if there is no swap configured' dependencies: - name: 'High memory utilization ( >{$MEMORY.UTIL.MAX}% for 5m)' expression: 'min(/Kube Node by Prom API/vm.memory.util[node_exporter],5m)>{$MEMORY.UTIL.MAX}' - name: 'Lack of available memory ( < {$MEMORY.AVAILABLE.MIN} of {ITEM.VALUE2})' expression: 'min(/Kube Node by Prom API/vm.memory.available[node_exporter],5m)<{$MEMORY.AVAILABLE.MIN} and last(/Kube Node by Prom API/vm.memory.total[node_exporter])>0' - uuid: fa6e7389943249f0b81d9e117ae53a5d expression: 'min(/Kube Node by Prom API/vm.memory.available[node_exporter],5m)<{$MEMORY.AVAILABLE.MIN} and last(/Kube Node by Prom API/vm.memory.total[node_exporter])>0' name: 'Lack of available memory ( < {$MEMORY.AVAILABLE.MIN} of {ITEM.VALUE2})' opdata: 'Available: {ITEM.LASTVALUE1}, total: {ITEM.LASTVALUE2}' priority: AVERAGE - uuid: 799fa15010c24b1eac2cd909aae4255f expression: | min(/Kube Node by Prom API/system.cpu.load.avg1[node_exporter],5m)/last(/Kube Node by Prom API/system.cpu.num[node_exporter])>{$LOAD_AVG_PER_CPU.MAX.WARN} and last(/Kube Node by Prom API/system.cpu.load.avg5[node_exporter])>0 and last(/Kube Node by Prom API/system.cpu.load.avg15[node_exporter])>0 name: 'Load average is too high (per CPU load over {$LOAD_AVG_PER_CPU.MAX.WARN} for 5m)' opdata: 'Load averages(1m 5m 15m): ({ITEM.LASTVALUE1} {ITEM.LASTVALUE3} {ITEM.LASTVALUE4}), # of CPUs: {ITEM.LASTVALUE2}' priority: AVERAGE description: 'Per CPU load average is too high. Your system may be slow to respond.' - uuid: 23e891ea138645feb7b9234a197f56ac expression: 'last(/Kube Node by Prom API/fd.open[node_exporter])/last(/Kube Node by Prom API/kernel.maxfiles[node_exporter])*100>80' name: 'Running out of file descriptors (less than < 20% free)' opdata: '{ITEM.LASTVALUE1} of {ITEM.LASTVALUE2} file descriptors are in use.' priority: WARNING graphs: - uuid: 83778435485246fb98e126fe10992173 name: 'CPU jumps' graph_items: - color: 1A7C11 item: host: 'Kube Node by Prom API' key: 'system.cpu.switches[node_exporter]' - sortorder: '1' color: 2774A4 item: host: 'Kube Node by Prom API' key: 'system.cpu.intr[node_exporter]' - uuid: a9aa69d678d74964b11b9c088b0b540d name: 'CPU usage' type: STACKED ymin_type_1: FIXED ymax_type_1: FIXED graph_items: - color: 1A7C11 item: host: 'Kube Node by Prom API' key: 'system.cpu.system[node_exporter]' - sortorder: '1' color: 2774A4 item: host: 'Kube Node by Prom API' key: 'system.cpu.user[node_exporter]' - sortorder: '2' color: F63100 item: host: 'Kube Node by Prom API' key: 'system.cpu.nice[node_exporter]' - sortorder: '3' color: A54F10 item: host: 'Kube Node by Prom API' key: 'system.cpu.iowait[node_exporter]' - sortorder: '4' color: FC6EA3 item: host: 'Kube Node by Prom API' key: 'system.cpu.steal[node_exporter]' - sortorder: '5' color: 6C59DC item: host: 'Kube Node by Prom API' key: 'system.cpu.interrupt[node_exporter]' - sortorder: '6' color: AC8C14 item: host: 'Kube Node by Prom API' key: 'system.cpu.softirq[node_exporter]' - sortorder: '7' color: 611F27 item: host: 'Kube Node by Prom API' key: 'system.cpu.guest[node_exporter]' - sortorder: '8' color: F230E0 item: host: 'Kube Node by Prom API' key: 'system.cpu.guest_nice[node_exporter]' - uuid: 3f4fdfe0c27c4eccbf889a12d379e7d0 name: 'CPU utilization' ymin_type_1: FIXED ymax_type_1: FIXED graph_items: - drawtype: GRADIENT_LINE color: 1A7C11 item: host: 'Kube Node by Prom API' key: 'system.cpu.util[node_exporter]' - uuid: 77b4c8807ab245239cc30cd44d22d249 name: 'Memory usage' ymin_type_1: FIXED graph_items: - drawtype: BOLD_LINE color: 1A7C11 item: host: 'Kube Node by Prom API' key: 'vm.memory.total[node_exporter]' - sortorder: '1' drawtype: GRADIENT_LINE color: 2774A4 item: host: 'Kube Node by Prom API' key: 'vm.memory.available[node_exporter]' - uuid: 75df4da0b5e74abca7f5f924950fc373 name: 'Memory utilization' ymin_type_1: FIXED ymax_type_1: FIXED graph_items: - drawtype: GRADIENT_LINE color: 1A7C11 item: host: 'Kube Node by Prom API' key: 'vm.memory.util[node_exporter]' - uuid: 8befc595b9ff48e49f844e55431f49c4 name: 'Swap usage' graph_items: - color: 1A7C11 item: host: 'Kube Node by Prom API' key: 'system.swap.free[node_exporter]' - sortorder: '1' color: 2774A4 item: host: 'Kube Node by Prom API' key: 'system.swap.total[node_exporter]' - uuid: 949e8aef2e264e1686076e4f4e6b69cf name: 'System load' ymin_type_1: FIXED graph_items: - color: 1A7C11 item: host: 'Kube Node by Prom API' key: 'system.cpu.load.avg1[node_exporter]' - sortorder: '1' color: 2774A4 item: host: 'Kube Node by Prom API' key: 'system.cpu.load.avg5[node_exporter]' - sortorder: '2' color: F63100 item: host: 'Kube Node by Prom API' key: 'system.cpu.load.avg15[node_exporter]' - sortorder: '3' color: A54F10 yaxisside: RIGHT item: host: 'Kube Node by Prom API' key: 'system.cpu.num[node_exporter]'