<?xml version="1.0" encoding="UTF-8"?> <zabbix_export> <version>5.0</version> <date>2021-09-23T09:16:55Z</date> <groups> <group> <name>Templates Users</name> </group> </groups> <templates> <template> <template>Template SMARTMONTOOLS</template> <name>Template SMARTMONTOOLS</name> <description># # .VERSION # 0.8 # # .DESCRIPTION # Author: Nikitin Maksim # Github: https://github.com/nikimaxim/zbx-smartmonitor.git # Note: Zabbix lld for smartctl # # .TESTING # Smartmontools: 7.1 and later #</description> <groups> <group> <name>Templates Users</name> </group> </groups> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <items> <item> <name>Smartctl version</name> <key>smartctl.version</key> <delay>1h</delay> <trends>0</trends> <value_type>CHAR</value_type> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <triggers> <trigger> <expression>{diff()}=1</expression> <name>Version of smartctl was changed on {HOST.NAME}</name> <priority>INFO</priority> </trigger> </triggers> </item> </items> <discovery_rules> <discovery_rule> <name>Smartctl storage discovery</name> <key>storage.discovery</key> <delay>12h</delay> <filter> <conditions> <condition> <macro>{#STORAGE.SMART}</macro> <value>1</value> <formulaid>A</formulaid> </condition> </conditions> </filter> <item_prototypes> <item_prototype> <name>{#STORAGE.NAME}: Error сount</name> <type>DEPENDENT</type> <key>storage.error["{#STORAGE.CMD}"]</key> <delay>0</delay> <trends>0</trends> <value_type>CHAR</value_type> <description>HDD SSD NVME</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>JAVASCRIPT</type> <params>if (value.search("No Errors Logged") != -1) { return "No Errors Logged" } else if (value.search("ATA Error Count:") != -1) { return value.match(/(?:ATA Error Count:).+\b([0-9]+)/i)[1] } else { return "Not supported" } </params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> <trigger_prototypes> <trigger_prototype> <expression>{str(No Errors Logged)}=0 and {str(Not supported)}=0</expression> <name>{#STORAGE.NAME}: SMART error status is not "No Errors Logged"</name> <priority>AVERAGE</priority> <manual_close>YES</manual_close> </trigger_prototype> </trigger_prototypes> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: Smartctl</name> <key>storage.get["{#STORAGE.CMD}"]</key> <delay>1h;10m/1-7,08:00-21:00</delay> <history>30d</history> <trends>0</trends> <value_type>TEXT</value_type> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>JAVASCRIPT</type> <params>return value.replace(/\s\(.+\)|\s\[.+\]|[\u00A0]/g, '') // Deleting data in brackets and spaces</params> </step> </preprocessing> <trigger_prototypes> <trigger_prototype> <expression>{str("FAILING_NOW")}=1</expression> <name>{#STORAGE.NAME}: One or more SMART attributes are "FAILING_NOW"</name> <priority>AVERAGE</priority> <manual_close>YES</manual_close> <dependencies> <dependency> <name>{#STORAGE.NAME}: SMART test status is "FAILED"</name> <expression>{Template SMARTMONTOOLS:storage.health["{#STORAGE.CMD}"].str("FAILED!")}=1</expression> </dependency> </dependencies> </trigger_prototype> </trigger_prototypes> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: Test result</name> <type>DEPENDENT</type> <key>storage.health["{#STORAGE.CMD}"]</key> <delay>0</delay> <trends>0</trends> <value_type>CHAR</value_type> <description>HDD SSD</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:SMART overall-health self-assessment test result:|SMART Health Status:) +\b([\S ]+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> <trigger_prototypes> <trigger_prototype> <expression>{str("FAILED!")}=1</expression> <name>{#STORAGE.NAME}: SMART test status is "FAILED"</name> <priority>AVERAGE</priority> </trigger_prototype> </trigger_prototypes> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: Device model</name> <type>DEPENDENT</type> <key>storage.model["{#STORAGE.CMD}"]</key> <delay>0</delay> <trends>0</trends> <value_type>CHAR</value_type> <description>HDD SSD NVME</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:Vendor:|Product:|Device [Mm]odel:|Model [Nn]umber:).+ (.+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: Serial number</name> <type>DEPENDENT</type> <key>storage.sn["{#STORAGE.CMD}"]</key> <delay>0</delay> <trends>0</trends> <value_type>CHAR</value_type> <description>HDD SSD NVME</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:Serial [Nn]umber:).+ ([\d\w-]+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> <trigger_prototypes> <trigger_prototype> <expression>{diff()}=1</expression> <recovery_mode>NONE</recovery_mode> <name>{#STORAGE.NAME}: Disk has been replaced (new serial number received)</name> <priority>INFO</priority> <manual_close>YES</manual_close> </trigger_prototype> </trigger_prototypes> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: ID 05 Reallocated sectors count</name> <type>DEPENDENT</type> <key>storage["{#STORAGE.CMD}", 05 Reallocated_Sector_Ct]</key> <delay>0</delay> <description>HDD SSD</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:5 Reallocated|Elements in grown defect list:|5 Retired_Block_Count).+ ([0-9]+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> <trigger_prototypes> <trigger_prototype> <expression>{diff()}=1</expression> <recovery_mode>RECOVERY_EXPRESSION</recovery_mode> <recovery_expression>{last()}=0 or {delta(864000)}=0</recovery_expression> <name>{#STORAGE.NAME}: SMART reallocated sectors count has been registered</name> <priority>AVERAGE</priority> <manual_close>YES</manual_close> <dependencies> <dependency> <name>{#STORAGE.NAME}: SMART test status is "FAILED"</name> <expression>{Template SMARTMONTOOLS:storage.health["{#STORAGE.CMD}"].str("FAILED!")}=1</expression> </dependency> </dependencies> </trigger_prototype> </trigger_prototypes> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: ID 09 Power on hours</name> <type>DEPENDENT</type> <key>storage["{#STORAGE.CMD}", 09 Power_On_Hours]</key> <delay>0</delay> <units>s</units> <description>HDD SSD NVME</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:9 Power_On_Hours|Power On Hours:|[Nn]umber of hours powered up|Accumulated power on time).+ ([0-9]+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> <step> <type>MULTIPLIER</type> <params>3600</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: ID 10 Spin retry count</name> <type>DEPENDENT</type> <key>storage["{#STORAGE.CMD}", 10 Spin_Retry_Count]</key> <delay>0</delay> <description>HDD</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:10 Spin_Retry_Count).+ ([0-9]+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> <trigger_prototypes> <trigger_prototype> <expression>{delta(7d)}>0</expression> <name>{#STORAGE.NAME}: SMART spin retry count has been registered</name> <priority>AVERAGE</priority> <manual_close>YES</manual_close> </trigger_prototype> </trigger_prototypes> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: ID 190/194 Temperature</name> <type>DEPENDENT</type> <key>storage["{#STORAGE.CMD}", 190/194 Temperature_Celsius]</key> <delay>0</delay> <units>°С</units> <description>HDD SSD NVME</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:190 Temperature|190 Airflow_Temperature|194 Temperature|Drive Temperature:|Temperature:).+ \w* (\d+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> <trigger_prototypes> <trigger_prototype> <expression>{avg(5m)}>{$TEMP_CRIT} and {last()}<=100</expression> <recovery_mode>RECOVERY_EXPRESSION</recovery_mode> <recovery_expression>{avg(5m)}<{$TEMP_CRIT}-3</recovery_expression> <name>{#STORAGE.NAME}: Disk temperature is "CRITICAL"</name> <priority>AVERAGE</priority> <manual_close>YES</manual_close> </trigger_prototype> <trigger_prototype> <expression>{avg(5m)}>{$TEMP_WARN}</expression> <recovery_mode>RECOVERY_EXPRESSION</recovery_mode> <recovery_expression>{avg(5m)}<{$TEMP_WARN}-3</recovery_expression> <name>{#STORAGE.NAME}: Disk temperature is "WARNING"</name> <priority>WARNING</priority> <manual_close>YES</manual_close> <dependencies> <dependency> <name>{#STORAGE.NAME}: Disk temperature is "CRITICAL"</name> <expression>{Template SMARTMONTOOLS:storage["{#STORAGE.CMD}", 190/194 Temperature_Celsius].avg(5m)}>{$TEMP_CRIT} and {Template SMARTMONTOOLS:storage["{#STORAGE.CMD}", 190/194 Temperature_Celsius].last()}<=100</expression> <recovery_expression>{Template SMARTMONTOOLS:storage["{#STORAGE.CMD}", 190/194 Temperature_Celsius].avg(5m)}<{$TEMP_CRIT}-3</recovery_expression> </dependency> </dependencies> </trigger_prototype> </trigger_prototypes> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: ID 197 Current pending sector count</name> <type>DEPENDENT</type> <key>storage["{#STORAGE.CMD}", 197 Current_Pending_Sector]</key> <delay>0</delay> <description>HDD</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:197 Current_Pending_Sector|197\s+\w+_\w+).+ ([0-9]+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> <trigger_prototypes> <trigger_prototype> <expression>{delta(3d)}>0</expression> <name>{#STORAGE.NAME}: SMART current pending sector counter has been registered</name> <priority>WARNING</priority> <manual_close>YES</manual_close> <dependencies> <dependency> <name>{#STORAGE.NAME}: SMART reallocated sectors count has been registered</name> <expression>{Template SMARTMONTOOLS:storage["{#STORAGE.CMD}", 05 Reallocated_Sector_Ct].diff()}=1</expression> <recovery_expression>{Template SMARTMONTOOLS:storage["{#STORAGE.CMD}", 05 Reallocated_Sector_Ct].last()}=0 or {Template SMARTMONTOOLS:storage["{#STORAGE.CMD}", 05 Reallocated_Sector_Ct].delta(864000)}=0</recovery_expression> </dependency> <dependency> <name>{#STORAGE.NAME}: SMART test status is "FAILED"</name> <expression>{Template SMARTMONTOOLS:storage.health["{#STORAGE.CMD}"].str("FAILED!")}=1</expression> </dependency> </dependencies> </trigger_prototype> </trigger_prototypes> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: ID 198 Uncorrectable errors count</name> <type>DEPENDENT</type> <key>storage["{#STORAGE.CMD}", 198 Offline_Uncorrectable]</key> <delay>0</delay> <description>HDD SSD</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:Offline_Uncorrectable).+ ([0-9]+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> <trigger_prototypes> <trigger_prototype> <expression>{diff()}=1</expression> <name>{#STORAGE.NAME}: SMART uncorrectable sector count has been registered</name> <priority>AVERAGE</priority> <manual_close>YES</manual_close> <dependencies> <dependency> <name>{#STORAGE.NAME}: SMART test status is "FAILED"</name> <expression>{Template SMARTMONTOOLS:storage.health["{#STORAGE.CMD}"].str("FAILED!")}=1</expression> </dependency> </dependencies> </trigger_prototype> </trigger_prototypes> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: ID 199/218 CRC error count</name> <type>DEPENDENT</type> <key>storage["{#STORAGE.CMD}", 199/218 UDMA_CRC_Error]</key> <delay>0</delay> <description>HDD SSD NVME</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:199 UDMA_CRC_Error|199 CRC_Error_Count|Non-medium error count:|Media and Data Integrity Errors:|199 SATA_CRC_Error|218 CRC_Error_Count).+ ([0-9]+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> <trigger_prototypes> <trigger_prototype> <expression>{last()}>100 and {delta(1d)}>0</expression> <name>{#STORAGE.NAME}: SMART UDMA CRC error count too high has been registered</name> <priority>AVERAGE</priority> <manual_close>YES</manual_close> </trigger_prototype> </trigger_prototypes> </item_prototype> <item_prototype> <name>{#STORAGE.NAME}: ID 231 SSD life left</name> <type>DEPENDENT</type> <key>storage["{#STORAGE.CMD}", 231 SSD_Life_Left]</key> <delay>0</delay> <units>%</units> <description>SSD NVME</description> <applications> <application> <name>SMART monitoring (smartctl)</name> </application> </applications> <preprocessing> <step> <type>REGEX</type> <params>(?:231 SSD_Life_Left |Available Spare: |231\s+\w+_\w+).+ ([0-9]+) \1</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> <step> <type>IN_RANGE</type> <params>0 100</params> <error_handler>CUSTOM_ERROR</error_handler> <error_handler_params>Not supported</error_handler_params> </step> </preprocessing> <master_item> <key>storage.get["{#STORAGE.CMD}"]</key> </master_item> <trigger_prototypes> <trigger_prototype> <expression>{last()}<=10</expression> <recovery_mode>RECOVERY_EXPRESSION</recovery_mode> <recovery_expression>{last()}>10</recovery_expression> <name>{#STORAGE.NAME}: SMART SSD life left</name> <priority>AVERAGE</priority> <manual_close>YES</manual_close> </trigger_prototype> </trigger_prototypes> </item_prototype> </item_prototypes> <graph_prototypes> <graph_prototype> <name>{#STORAGE.NAME}: Smartctl temperature</name> <graph_items> <graph_item> <color>1A7C11</color> <item> <host>Template SMARTMONTOOLS</host> <key>storage["{#STORAGE.CMD}", 190/194 Temperature_Celsius]</key> </item> </graph_item> </graph_items> </graph_prototype> </graph_prototypes> </discovery_rule> </discovery_rules> <macros> <macro> <macro>{$TEMP_CRIT}</macro> <value>65</value> </macro> <macro> <macro>{$TEMP_WARN}</macro> <value>55</value> </macro> </macros> </template> </templates> </zabbix_export>