apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: windows-system-template spec: name: Windows System Template color: '#7A65F2' --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: inputs-win-perf-counters spec: name: inputs.win_perf_counters color: '#326BBA' --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: outputs-influxdb-v2 spec: name: outputs.influxdb_v2 color: '#108174' --- apiVersion: influxdata.com/v2alpha1 kind: Bucket metadata: name: telegraf spec: name: telegraf retentionRules: - everySeconds: 604800 type: expire --- apiVersion: influxdata.com/v2alpha1 kind: Variable metadata: name: bucket spec: name: bucket associations: - kind: Label name: windows-system-template language: flux query: |- buckets() |> filter(fn: (r) => r.name !~ /^_/) |> rename(columns: {name: "_value"}) |> keep(columns: ["_value"]) type: query --- apiVersion: influxdata.com/v2alpha1 kind: Variable metadata: name: windows-host spec: name: windows_host associations: - kind: Label name: windows-system-template language: flux query: |- import "influxdata/influxdb/v1" v1.measurementTagValues(bucket: v.bucket, measurement: "win_system", tag: "host") type: query --- apiVersion: influxdata.com/v2alpha1 kind: Telegraf metadata: name: windows-system-monitoring spec: name: Windows System Monitoring associations: - kind: Label name: inputs-win-perf-counters - kind: Label name: outputs-influxdb-v2 - kind: Label name: windows-system-template config: | # Telegraf Configuration # # Telegraf is entirely plugin driven. All metrics are gathered from the # declared inputs, and sent to the declared outputs. # # Plugins must be declared in here to be active. # To deactivate a plugin, comment out the name and any variables. # # Use 'telegraf -config telegraf.conf -test' to see what metrics a config # file would generate. # # Environment variables can be used anywhere in this config file, simply surround # them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"), # for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR}) # Global tags can be specified here in key="value" format. [global_tags] # dc = "us-east-1" # will tag all metrics with dc=us-east-1 # rack = "1a" ## Environment variables can be used as tags, and throughout the config file # user = "$USER" # Configuration for telegraf agent [agent] ## Default data collection interval for all inputs interval = "10s" ## Rounds collection interval to 'interval' ## ie, if interval="10s" then always collect on :00, :10, :20, etc. round_interval = true ## Telegraf will send metrics to outputs in batches of at most ## metric_batch_size metrics. ## This controls the size of writes that Telegraf sends to output plugins. metric_batch_size = 1000 ## Maximum number of unwritten metrics per output. Increasing this value ## allows for longer periods of output downtime without dropping metrics at the ## cost of higher maximum memory usage. metric_buffer_limit = 10000 ## Collection jitter is used to jitter the collection by a random amount. ## Each plugin will sleep for a random time within jitter before collecting. ## This can be used to avoid many plugins querying things like sysfs at the ## same time, which can have a measurable effect on the system. collection_jitter = "0s" ## Default flushing interval for all outputs. Maximum flush_interval will be ## flush_interval + flush_jitter flush_interval = "10s" ## Jitter the flush interval by a random amount. This is primarily to avoid ## large write spikes for users running a large number of telegraf instances. ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s flush_jitter = "0s" ## By default or when set to "0s", precision will be set to the same ## timestamp order as the collection interval, with the maximum being 1s. ## ie, when interval = "10s", precision will be "1s" ## when interval = "250ms", precision will be "1ms" ## Precision will NOT be used for service inputs. It is up to each individual ## service input to set the timestamp at the appropriate precision. ## Valid time units are "ns", "us" (or "µs"), "ms", "s". precision = "" ## Log at debug level. # debug = false ## Log only error level messages. # quiet = false ## Log target controls the destination for logs and can be one of "file", ## "stderr" or, on Windows, "eventlog". When set to "file", the output file ## is determined by the "logfile" setting. # logtarget = "file" ## Name of the file to be logged to when using the "file" logtarget. If set to ## the empty string then logs are written to stderr. # logfile = "" ## The logfile will be rotated after the time interval specified. When set ## to 0 no time based rotation is performed. Logs are rotated only when ## written to, if there is no log activity rotation may be delayed. # logfile_rotation_interval = "0d" ## The logfile will be rotated when it becomes larger than the specified ## size. When set to 0 no size based rotation is performed. # logfile_rotation_max_size = "0MB" ## Maximum number of rotated archives to keep, any older logs are deleted. ## If set to -1, no archives are removed. # logfile_rotation_max_archives = 5 ## Override default hostname, if empty use os.Hostname() hostname = "" ## If set to true, do no set the "host" tag in the telegraf agent. omit_hostname = false ############################################################################### # OUTPUT PLUGINS # ############################################################################### # Configuration for sending metrics to InfluxDB [[outputs.influxdb_v2]] ## The URLs of the InfluxDB cluster nodes. ## ## Multiple URLs can be specified for a single cluster, only ONE of the ## urls will be written to each interval. ## ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"] urls = ["$INFLUX_HOST"] ## Token for authentication. token = "$INFLUX_TOKEN" ## Organization is the name of the organization you wish to write to; must exist. organization = "$INFLUX_ORG" ## Destination bucket to write into. bucket = "telegraf" ## The value of this tag will be used to determine the bucket. If this ## tag is not set the 'bucket' option is used as the default. # bucket_tag = "" ## If true, the bucket tag will not be added to the metric. # exclude_bucket_tag = false ## Timeout for HTTP messages. # timeout = "5s" ## Additional HTTP headers # http_headers = {"X-Special-Header" = "Special-Value"} ## HTTP Proxy override, if unset values the standard proxy environment ## variables are consulted to determine which proxy, if any, should be used. # http_proxy = "http://corporate.proxy:3128" ## HTTP User-Agent # user_agent = "telegraf" ## Content-Encoding for write request body, can be set to "gzip" to ## compress body or "identity" to apply no encoding. # content_encoding = "gzip" ## Enable or disable uint support for writing uints influxdb 2.0. # influx_uint_support = false ## Optional TLS Config for use on HTTP connections. # tls_ca = "/etc/telegraf/ca.pem" # tls_cert = "/etc/telegraf/cert.pem" # tls_key = "/etc/telegraf/key.pem" ## Use TLS but skip chain & host verification # insecure_skip_verify = false ############################################################################### # INPUT PLUGINS # ############################################################################### # Windows Performance Counters plugin. # These are the recommended method of monitoring system metrics on windows, # as the regular system plugins (inputs.cpu, inputs.mem, etc.) rely on WMI, # which utilize more system resources. # # See more configuration examples at: # https://github.com/influxdata/telegraf/tree/master/plugins/inputs/win_perf_counters [[inputs.win_perf_counters]] [[inputs.win_perf_counters.object]] # Processor usage, alternative to native, reports on a per core. ObjectName = "Processor" Instances = ["*"] Counters = [ "% Idle Time", "% Interrupt Time", "% Privileged Time", "% User Time", "% Processor Time", "% DPC Time", ] Measurement = "win_cpu" # Set to true to include _Total instance when querying for all (*). IncludeTotal=true [[inputs.win_perf_counters.object]] # Disk times and queues ObjectName = "LogicalDisk" Instances = ["*"] Counters = [ "% Idle Time", "% Disk Time", "% Disk Read Time", "% Disk Write Time", "% Free Space", "Current Disk Queue Length", "Free Megabytes", ] Measurement = "win_disk" # Set to true to include _Total instance when querying for all (*). #IncludeTotal=false [[inputs.win_perf_counters.object]] ObjectName = "PhysicalDisk" Instances = ["*"] Counters = [ "Disk Read Bytes/sec", "Disk Write Bytes/sec", "Current Disk Queue Length", "Disk Reads/sec", "Disk Writes/sec", "% Disk Time", "% Disk Read Time", "% Disk Write Time", ] Measurement = "win_diskio" [[inputs.win_perf_counters.object]] ObjectName = "Network Interface" Instances = ["*"] Counters = [ "Bytes Received/sec", "Bytes Sent/sec", "Packets Received/sec", "Packets Sent/sec", "Packets Received Discarded", "Packets Outbound Discarded", "Packets Received Errors", "Packets Outbound Errors", ] Measurement = "win_net" [[inputs.win_perf_counters.object]] ObjectName = "System" Counters = [ "Context Switches/sec", "System Calls/sec", "Processor Queue Length", "System Up Time", ] Instances = ["------"] Measurement = "win_system" # Set to true to include _Total instance when querying for all (*). #IncludeTotal=false [[inputs.win_perf_counters.object]] # Example query where the Instance portion must be removed to get data back, # such as from the Memory object. ObjectName = "Memory" Counters = [ "Available Bytes", "Cache Faults/sec", "Demand Zero Faults/sec", "Page Faults/sec", "Pages/sec", "Transition Faults/sec", "Pool Nonpaged Bytes", "Pool Paged Bytes", "Standby Cache Reserve Bytes", "Standby Cache Normal Priority Bytes", "Standby Cache Core Bytes", ] # Use 6 x - to remove the Instance bit from the query. Instances = ["------"] Measurement = "win_mem" # Set to true to include _Total instance when querying for all (*). #IncludeTotal=false [[inputs.win_perf_counters.object]] # Example query where the Instance portion must be removed to get data back, # such as from the Paging File object. ObjectName = "Paging File" Counters = [ "% Usage", ] Instances = ["_Total"] Measurement = "win_swap" # Windows system plugins using WMI (disabled by default, using # win_perf_counters over WMI is recommended) description: A set of plugins for monitoring your Windows system. --- apiVersion: influxdata.com/v2alpha1 kind: Dashboard metadata: name: windows-system spec: name: Windows System associations: - kind: Label name: windows-system-template charts: - height: 1 kind: Markdown name: Name this Cell note: This dashboard gives you an overview of your Windows System metrics and will work with the default Windows Telegraf config. See the [Telegraf Win Perf Counters Documentation](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/win_perf_counters) for more information. queries: null width: 12 - colors: - hex: '#00C9FF' name: laser type: text height: 1 kind: Single_Stat name: System Uptime queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_system") |> filter(fn: (r) => r._field == "System_Up_Time") |> filter(fn: (r) => r.host == v.windows_host) |> last() |> map(fn: (r) => ({r with _value: r._value / 86400.0})) |> yield(name: "last") suffix: ' days' width: 3 yPos: 1 - axes: - base: "10" name: x scale: linear - base: "10" name: y scale: linear suffix: '%' - base: "10" name: y2 scale: linear geom: line height: 3 kind: Xy name: Disk Usage queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_disk") |> filter(fn: (r) => r._field == "Percent_Free_Space") |> filter(fn: (r) => r.host == v.windows_host) |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) |> map(fn: (r) => ({ r with _value: 100.0 - r._value })) |> set(key: "_field", value: "Percent Used Space") //|> yield(name: "mean") width: 3 xCol: _time yCol: _value yPos: 2 - axes: - base: "10" name: x scale: linear - base: "2" label: Bytes name: y scale: linear - base: "10" name: y2 scale: linear geom: line height: 3 kind: Xy name: Disk IO (bytes/sec) position: overlaid queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_diskio") |> filter(fn: (r) => r._field =~ /_Bytes_persec$/) |> filter(fn: (r) => r.host == v.windows_host) |> derivative(unit: 1s, nonNegative: false) |> yield(name: "derivative") width: 4 xCol: _time yCol: _value yPos: 5 - colors: - hex: '#00C9FF' name: laser type: text decimalPlaces: 2 height: 1 kind: Single_Stat name: Processor Queue Length queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_system") |> filter(fn: (r) => r._field == "Processor_Queue_Length") |> filter(fn: (r) => r.host == v.windows_host) |> last() width: 2 xPos: 3 yPos: 1 - axes: - base: "10" name: y scale: linear suffix: '%' - base: "10" name: y2 scale: linear - base: "10" name: x scale: linear geom: line height: 3 kind: Xy name: CPU Usage queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_cpu") |> filter(fn: (r) => r._field =~ /^Percent_/) |> filter(fn: (r) => r.instance == "_Total") |> filter(fn: (r) => r.host == v.windows_host) |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) |> drop(columns: ["objectname"]) width: 3 xCol: _time xPos: 3 yCol: _value yPos: 2 - axes: - base: "10" name: x scale: linear - base: "2" label: Bytes name: y scale: linear - base: "10" name: y2 scale: linear geom: line height: 3 kind: Xy name: Network queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_net") |> filter(fn: (r) => r.host == v.windows_host) |> filter(fn: (r) => r._field =~ /^Bytes_(Received|Sent)_persec$/) |> derivative(unit: 1s, nonNegative: false) width: 4 xCol: _time xPos: 4 yCol: _value yPos: 5 - colors: - hex: '#00C9FF' name: laser type: text decimalPlaces: 2 height: 1 kind: Single_Stat name: System Calls / Sec queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_system") |> filter(fn: (r) => r._field == "System_Calls_persec") |> filter(fn: (r) => r.host == v.windows_host) |> last() width: 2 xPos: 5 yPos: 1 - axes: - base: "10" name: x scale: linear - base: "10" label: Load name: y scale: linear - base: "10" name: y2 scale: linear geom: line height: 3 kind: Xy name: System Load queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_system") |> filter(fn: (r) => r._field =~ /_persec$/) |> filter(fn: (r) => r.host == v.windows_host) |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) |> yield(name: "mean") width: 3 xCol: _time xPos: 6 yCol: _value yPos: 2 - colors: - hex: '#00C9FF' name: laser type: text decimalPlaces: 2 height: 1 kind: Single_Stat name: Available Memory queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_mem") |> filter(fn: (r) => r._field == "Available_Bytes") |> filter(fn: (r) => r.host == v.windows_host) |> last() |> map(fn: (r) => ({r with _value: float(v: r._value) / 1024.0 / 1024.0 / 1024.0})) |> yield(name: "last") suffix: ' GB' width: 2 xPos: 7 yPos: 1 - axes: - base: "10" name: x scale: linear - base: "2" name: y scale: linear - base: "10" name: y2 scale: linear geom: line height: 3 kind: Xy name: Swap queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_swap") |> filter(fn: (r) => r.host == v.windows_host) |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) |> yield(name: "mean") width: 4 xCol: _time xPos: 8 yCol: _value yPos: 5 - axes: - base: "10" name: x scale: linear - base: "10" name: y scale: linear suffix: '%' - base: "10" name: y2 scale: linear colors: - hex: '#00C9FF' name: laser type: text - hex: '#8F8AF4' name: Do Androids Dream of Electric Sheep? type: scale - hex: '#A51414' name: Do Androids Dream of Electric Sheep? type: scale - hex: '#F4CF31' name: Do Androids Dream of Electric Sheep? type: scale decimalPlaces: 1 height: 4 kind: Single_Stat_Plus_Line name: Available Memory (bytes) queries: - query: |- from(bucket: v.bucket) |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "win_mem") |> filter(fn: (r) => r._field == "Available_Bytes") |> filter(fn: (r) => r.host == v.windows_host) |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false) |> yield(name: "mean") width: 3 xCol: _time xPos: 9 yCol: _value yPos: 1 description: A collection of useful visualizations for monitoring your Windows system stats