apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: linux-system-template
spec:
    name: Linux System Template
    color: '#7A65F2'
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: inputs-cpu
spec:
    name: inputs.cpu
    color: '#326BBA'
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: inputs-disk
spec:
    name: inputs.disk
    color: '#326BBA'
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: inputs-diskio
spec:
    name: inputs.diskio
    color: '#326BBA'
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: inputs-kernel
spec:
    name: inputs.kernel
    color: '#326BBA'
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: inputs-mem
spec:
    name: inputs.mem
    color: '#326BBA'
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: inputs-net
spec:
    name: inputs.net
    color: '#326BBA'
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: inputs-processes
spec:
    name: inputs.processes
    color: '#326BBA'
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: inputs-swap
spec:
    name: inputs.swap
    color: '#326BBA'
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: inputs-system
spec:
    name: inputs.system
    color: '#326BBA'
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
    name: outputs-influxdb-v2
spec:
    name: outputs.influxdb_v2
    color: '#108174'
---
apiVersion: influxdata.com/v2alpha1
kind: Bucket
metadata:
    name: telegraf
spec:
    name: telegraf
    retentionRules:
      - everySeconds: 604800
        type: expire
---
apiVersion: influxdata.com/v2alpha1
kind: Variable
metadata:
    name: bucket
spec:
    name: bucket
    associations:
      - kind: Label
        name: linux-system-template
    language: flux
    query: |-
        buckets()
          |> filter(fn: (r) => r.name !~ /^_/)
          |> rename(columns: {name: "_value"})
          |> keep(columns: ["_value"])
    type: query
---
apiVersion: influxdata.com/v2alpha1
kind: Variable
metadata:
    name: linux-host
spec:
    name: linux_host
    associations:
      - kind: Label
        name: linux-system-template
    language: flux
    query: |-
        import "influxdata/influxdb/v1"
        v1.measurementTagValues(bucket: v.bucket, measurement: "cpu", tag: "host")
    type: query
---
apiVersion: influxdata.com/v2alpha1
kind: Telegraf
metadata:
    name: linux-system-monitoring
spec:
    name: Linux System Monitoring
    associations:
      - kind: Label
        name: inputs-cpu
      - kind: Label
        name: inputs-system
      - kind: Label
        name: inputs-swap
      - kind: Label
        name: inputs-processes
      - kind: Label
        name: inputs-net
      - kind: Label
        name: inputs-mem
      - kind: Label
        name: inputs-kernel
      - kind: Label
        name: inputs-disk
      - kind: Label
        name: inputs-diskio
      - kind: Label
        name: outputs-influxdb-v2
      - kind: Label
        name: linux-system-template
    config: |
        # Telegraf Configuration
        #
        # Telegraf is entirely plugin driven. All metrics are gathered from the
        # declared inputs, and sent to the declared outputs.
        #
        # Plugins must be declared in here to be active.
        # To deactivate a plugin, comment out the name and any variables.
        #
        # Use 'telegraf -config telegraf.conf -test' to see what metrics a config
        # file would generate.
        #
        # Environment variables can be used anywhere in this config file, simply surround
        # them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"),
        # for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR})


        # Global tags can be specified here in key="value" format.
        [global_tags]
          # dc = "us-east-1" # will tag all metrics with dc=us-east-1
          # rack = "1a"
          ## Environment variables can be used as tags, and throughout the config file
          # user = "$USER"


        # Configuration for telegraf agent
        [agent]
          ## Default data collection interval for all inputs
          interval = "10s"
          ## Rounds collection interval to 'interval'
          ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
          round_interval = true

          ## Telegraf will send metrics to outputs in batches of at most
          ## metric_batch_size metrics.
          ## This controls the size of writes that Telegraf sends to output plugins.
          metric_batch_size = 1000

          ## Maximum number of unwritten metrics per output.  Increasing this value
          ## allows for longer periods of output downtime without dropping metrics at the
          ## cost of higher maximum memory usage.
          metric_buffer_limit = 10000

          ## Collection jitter is used to jitter the collection by a random amount.
          ## Each plugin will sleep for a random time within jitter before collecting.
          ## This can be used to avoid many plugins querying things like sysfs at the
          ## same time, which can have a measurable effect on the system.
          collection_jitter = "0s"

          ## Default flushing interval for all outputs. Maximum flush_interval will be
          ## flush_interval + flush_jitter
          flush_interval = "10s"
          ## Jitter the flush interval by a random amount. This is primarily to avoid
          ## large write spikes for users running a large number of telegraf instances.
          ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
          flush_jitter = "0s"

          ## By default or when set to "0s", precision will be set to the same
          ## timestamp order as the collection interval, with the maximum being 1s.
          ##   ie, when interval = "10s", precision will be "1s"
          ##       when interval = "250ms", precision will be "1ms"
          ## Precision will NOT be used for service inputs. It is up to each individual
          ## service input to set the timestamp at the appropriate precision.
          ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
          precision = ""

          ## Log at debug level.
          # debug = false
          ## Log only error level messages.
          # quiet = false

          ## Log target controls the destination for logs and can be one of "file",
          ## "stderr" or, on Windows, "eventlog".  When set to "file", the output file
          ## is determined by the "logfile" setting.
          # logtarget = "file"

          ## Name of the file to be logged to when using the "file" logtarget.  If set to
          ## the empty string then logs are written to stderr.
          # logfile = ""

          ## The logfile will be rotated after the time interval specified.  When set
          ## to 0 no time based rotation is performed.  Logs are rotated only when
          ## written to, if there is no log activity rotation may be delayed.
          # logfile_rotation_interval = "0d"

          ## The logfile will be rotated when it becomes larger than the specified
          ## size.  When set to 0 no size based rotation is performed.
          # logfile_rotation_max_size = "0MB"

          ## Maximum number of rotated archives to keep, any older logs are deleted.
          ## If set to -1, no archives are removed.
          # logfile_rotation_max_archives = 5

          ## Override default hostname, if empty use os.Hostname()
          hostname = ""
          ## If set to true, do no set the "host" tag in the telegraf agent.
          omit_hostname = false


        ###############################################################################
        #                            OUTPUT PLUGINS                                   #
        ###############################################################################

        # Configuration for sending metrics to InfluxDB
        [[outputs.influxdb_v2]]
          ## The URLs of the InfluxDB cluster nodes.
          ##
          ## Multiple URLs can be specified for a single cluster, only ONE of the
          ## urls will be written to each interval.
          ##   ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"]
          urls = ["$INFLUX_HOST"]
          ## Token for authentication.
          token = "$INFLUX_TOKEN"
          ## Organization is the name of the organization you wish to write to; must exist.
          organization = "$INFLUX_ORG"
          ## Destination bucket to write into.
          bucket = "telegraf"
          ## The value of this tag will be used to determine the bucket.  If this
          ## tag is not set the 'bucket' option is used as the default.
          # bucket_tag = ""
          ## If true, the bucket tag will not be added to the metric.
          # exclude_bucket_tag = false
          ## Timeout for HTTP messages.
          # timeout = "5s"
          ## Additional HTTP headers
          # http_headers = {"X-Special-Header" = "Special-Value"}
          ## HTTP Proxy override, if unset values the standard proxy environment
          ## variables are consulted to determine which proxy, if any, should be used.
          # http_proxy = "http://corporate.proxy:3128"
          ## HTTP User-Agent
          # user_agent = "telegraf"
          ## Content-Encoding for write request body, can be set to "gzip" to
          ## compress body or "identity" to apply no encoding.
          # content_encoding = "gzip"
          ## Enable or disable uint support for writing uints influxdb 2.0.
          # influx_uint_support = false
          ## Optional TLS Config for use on HTTP connections.
          # tls_ca = "/etc/telegraf/ca.pem"
          # tls_cert = "/etc/telegraf/cert.pem"
          # tls_key = "/etc/telegraf/key.pem"
          ## Use TLS but skip chain & host verification
          # insecure_skip_verify = false

        ###############################################################################
        #                            INPUT PLUGINS                                    #
        ###############################################################################


        # Read metrics about cpu usage
        [[inputs.cpu]]
          ## Whether to report per-cpu stats or not
          percpu = true
          ## Whether to report total system cpu stats or not
          totalcpu = true
          ## If true, collect raw CPU time metrics.
          collect_cpu_time = false
          ## If true, compute and report the sum of all non-idle CPU states.
          report_active = false


        # Read metrics about disk usage by mount point
        [[inputs.disk]]
          ## By default stats will be gathered for all mount points.
          ## Set mount_points will restrict the stats to only the specified mount points.
          # mount_points = ["/"]

          ## Ignore mount points by filesystem type.
          ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]


        # Read metrics about disk IO by device
        [[inputs.diskio]]
          ## By default, telegraf will gather stats for all devices including
          ## disk partitions.
          ## Setting devices will restrict the stats to the specified devices.
          # devices = ["sda", "sdb", "vd*"]
          ## Uncomment the following line if you need disk serial numbers.
          # skip_serial_number = false
          #
          ## On systems which support it, device metadata can be added in the form of
          ## tags.
          ## Currently only Linux is supported via udev properties. You can view
          ## available properties for a device by running:
          ## 'udevadm info -q property -n /dev/sda'
          ## Note: Most, but not all, udev properties can be accessed this way. Properties
          ## that are currently inaccessible include DEVTYPE, DEVNAME, and DEVPATH.
          # device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"]
          #
          ## Using the same metadata source as device_tags, you can also customize the
          ## name of the device via templates.
          ## The 'name_templates' parameter is a list of templates to try and apply to
          ## the device. The template may contain variables in the form of '$PROPERTY' or
          ## '${PROPERTY}'. The first template which does not contain any variables not
          ## present for the device is used as the device name tag.
          ## The typical use case is for LVM volumes, to get the VG/LV name instead of
          ## the near-meaningless DM-0 name.
          # name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"]


        # Get kernel statistics from /proc/stat
        [[inputs.kernel]]
          # no configuration


        # Read metrics about memory usage
        [[inputs.mem]]
          # no configuration

        # Read metrics about network interface usage
        [[inputs.net]]
          ## By default, telegraf gathers stats from any up interface (excluding loopback)
          ## Setting interfaces will tell it to gather these explicit interfaces,
          ## regardless of status.
          ##
          # interfaces = ["eth0"]
          ##
          ## On linux systems telegraf also collects protocol stats.
          ## Setting ignore_protocol_stats to true will skip reporting of protocol metrics.
          ##
          # ignore_protocol_stats = false
          ##

        # Get the number of processes and group them by status
        [[inputs.processes]]
          # no configuration


        # Read metrics about swap memory usage
        [[inputs.swap]]
          # no configuration


        # Read metrics about system load & uptime
        [[inputs.system]]
          ## Uncomment to remove deprecated metrics.
          # fielddrop = ["uptime_format"]
    description: A set of plugins for monitoring your Linux system.
---
apiVersion: influxdata.com/v2alpha1
kind: Dashboard
metadata:
    name: linux-system
spec:
    name: Linux System
    associations:
      - kind: Label
        name: linux-system-template
    charts:
      - height: 1
        kind: Markdown
        name: Name this Cell
        note: This dashboard gives you an overview of Linux System metrics. See the
            [Telegraf Documentation](https://v2.docs.influxdata.com/v2.0/reference/telegraf-plugins/)
            for help configuring these plugins.
        queries: null
        width: 12
      - colors:
          - hex: '#00C9FF'
            name: laser
            type: text
        height: 1
        kind: Single_Stat
        name: System Uptime
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "system")
                  |> filter(fn: (r) => r._field == "uptime")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> last()
                  |> map(fn: (r) => ({ _value: float(v: r._value) / 86400.00 }))
        suffix: ' days'
        width: 3
        yPos: 1
      - axes:
          - base: "10"
            name: x
            scale: linear
          - base: "10"
            name: y
            scale: linear
            suffix: '%'
          - base: "10"
            name: y2
            scale: linear
        geom: line
        height: 3
        kind: Xy
        name: Disk Usage
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "disk")
                  |> filter(fn: (r) => r._field == "used_percent")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
        width: 3
        yPos: 2
      - axes:
          - base: "10"
            name: y2
            scale: linear
          - base: "10"
            name: x
            scale: linear
          - base: "2"
            label: Bytes
            name: y
            scale: linear
        geom: line
        height: 3
        kind: Xy
        name: Disk IO
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "diskio")
                  |> filter(fn: (r) => r._field == "read_bytes" or r._field == "write_bytes")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> derivative(unit: v.windowPeriod, nonNegative: false)
        width: 3
        yPos: 5
      - colors:
          - hex: '#00C9FF'
            name: laser
            type: text
        decimalPlaces: 2
        height: 1
        kind: Single_Stat
        name: nCPUs
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "system")
                  |> filter(fn: (r) => r._field == "n_cpus")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> last()
        suffix: ' cpus'
        width: 2
        xPos: 3
        yPos: 1
      - axes:
          - base: "10"
            name: y
            scale: linear
            suffix: '%'
          - base: "10"
            name: y2
            scale: linear
          - base: "10"
            name: x
            scale: linear
        geom: line
        height: 3
        kind: Xy
        name: CPU Usage
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "cpu")
                  |> filter(fn: (r) => r._field == "usage_user" or r._field == "usage_system" or r._field == "usage_idle")
                  |> filter(fn: (r) => r.cpu == "cpu-total")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
        width: 3
        xPos: 3
        yPos: 2
      - axes:
          - base: "10"
            name: x
            scale: linear
          - base: "2"
            label: Bytes
            name: y
            scale: linear
          - base: "10"
            name: y2
            scale: linear
        geom: line
        height: 3
        kind: Xy
        name: Network
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "net")
                  |> filter(fn: (r) => r._field == "bytes_recv" or r._field == "bytes_sent")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> derivative(unit: v.windowPeriod, nonNegative: false)
        width: 3
        xCol: _time
        xPos: 3
        yCol: _value
        yPos: 5
      - colors:
          - hex: '#00C9FF'
            name: laser
            type: text
        decimalPlaces: 2
        height: 1
        kind: Single_Stat
        name: System Load
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "system")
                  |> filter(fn: (r) => r._field == "load1")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> last()
        width: 2
        xPos: 5
        yPos: 1
      - axes:
          - base: "10"
            name: x
            scale: linear
          - base: "10"
            label: Load
            name: y
            scale: linear
          - base: "10"
            name: y2
            scale: linear
        geom: line
        height: 3
        kind: Xy
        name: System Load
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "system")
                  |> filter(fn: (r) => r._field == "load1" or r._field == "load5" or r._field == "load15")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
        width: 3
        xPos: 6
        yPos: 2
      - axes:
          - base: "10"
            name: x
            scale: linear
          - base: "10"
            name: y
            scale: linear
          - base: "10"
            name: y2
            scale: linear
        geom: line
        height: 3
        kind: Xy
        name: Processes
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "processes")
                  |> filter(fn: (r) => r._field == "running" or r._field == "blocked" or r._field == "idle" or r._field == "unknown")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> aggregateWindow(every: v.windowPeriod, fn: max)
        width: 3
        xPos: 6
        yPos: 5
      - colors:
          - hex: '#00C9FF'
            name: laser
            type: text
        decimalPlaces: 2
        height: 1
        kind: Single_Stat
        name: Total Memory
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "mem")  
                  |> filter(fn: (r) => r._field == "total")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> last()  
                  |> map(fn: (r) => ({r with _value: float(v: r._value) / 1024.0 / 1024.0 / 1024.0}))
        suffix: ' GB'
        width: 2
        xPos: 7
        yPos: 1
      - axes:
          - base: "10"
            name: x
            scale: linear
          - base: "10"
            name: y
            scale: linear
            suffix: '%'
          - base: "10"
            name: y2
            scale: linear
        colors:
          - hex: '#00C9FF'
            name: laser
            type: text
          - hex: '#8F8AF4'
            name: Do Androids Dream of Electric Sheep?
            type: scale
          - hex: '#A51414'
            name: Do Androids Dream of Electric Sheep?
            type: scale
          - hex: '#F4CF31'
            name: Do Androids Dream of Electric Sheep?
            type: scale
        decimalPlaces: 1
        height: 4
        kind: Single_Stat_Plus_Line
        name: Memory Usage
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "mem")
                  |> filter(fn: (r) => r._field == "used_percent")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
        suffix: '%'
        width: 3
        xPos: 9
        yPos: 1
      - axes:
          - base: "10"
            name: x
            scale: linear
          - base: "2"
            name: y
            scale: linear
          - base: "10"
            name: y2
            scale: linear
        geom: line
        height: 3
        kind: Xy
        name: Swap
        queries:
          - query: |-
                from(bucket: v.bucket)
                  |> range(start: v.timeRangeStart, stop: v.timeRangeStop)
                  |> filter(fn: (r) => r._measurement == "swap")
                  |> filter(fn: (r) => r._field == "total" or r._field == "used")
                  |> filter(fn: (r) => r.host == v.linux_host)
                  |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
        width: 3
        xPos: 9
        yPos: 5
    description: A collection of useful visualizations for monitoring your Linux system