apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: adoring-pasteur-d4d00d spec: color: '#326BBA' name: inputs.net --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: agreeing-goldberg-d4d013 spec: color: '#326BBA' name: inputs.kernel --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: dreamy-heyrovsky-d4d011 spec: color: '#326BBA' name: inputs.disk --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: elastic-morse-d4d001 spec: color: '#326BBA' name: inputs.swap --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: elegant-yonath-d4d015 spec: color: '#326BBA' name: inputs.system --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: gallant-sanderson-d4d003 spec: color: '#326BBA' name: inputs.processes --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: goofy-yalow-d4d005 spec: color: '#326BBA' name: inputs.diskio --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: inspiring-goodall-d4d00b spec: color: '#326BBA' name: inputs.docker --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: nice-kilby-d4d009 spec: color: '#108174' name: outputs.influxdb_v2 --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: unruffled-benz-d4d007 spec: color: '#326BBA' name: inputs.cpu --- apiVersion: influxdata.com/v2alpha1 kind: Label metadata: name: vigilant-chatelet-d4d00f spec: color: '#326BBA' name: inputs.mem --- apiVersion: influxdata.com/v2alpha1 kind: Bucket metadata: name: heuristic-sinoussi-d4d017 spec: name: docker retentionRules: - everySeconds: 604800 type: expire --- apiVersion: influxdata.com/v2alpha1 kind: CheckThreshold metadata: name: crumbling-meitner-14d002 spec: description: Container mem usage is above 80% for 15 minutes every: 15m0s name: Container Mem Usage Pct query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r["_measurement"] == "docker_container_mem") |> filter(fn: (r) => r["_field"] == "usage_percent") |> aggregateWindow(every: 15m, fn: mean) |> yield(name: "mean") status: active statusMessageTemplate: 'Check: ${ r._check_name } for container {r.container_name} is: ${ r._level } ' thresholds: - level: WARN type: greater value: 80 - level: CRIT type: greater value: 90 - level: OK type: lesser value: 80 --- apiVersion: influxdata.com/v2alpha1 kind: CheckThreshold metadata: name: ecstatic-mendeleev-d4d01b spec: description: Container disk usage is above 80% every: 15m0s name: Container Disk Usage query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r["_measurement"] == "docker_container_blkio") |> filter(fn: (r) => r["_field"] == "usage_percent") |> aggregateWindow(every: 15m, fn: mean) |> yield(name: "mean") status: active statusMessageTemplate: 'Check: ${ r._check_name } is: ${ r._level }' thresholds: - level: WARN type: greater value: 80 - level: CRIT type: greater value: 90 - level: OK type: lesser value: 80 --- apiVersion: influxdata.com/v2alpha1 kind: CheckThreshold metadata: name: frosty-greider-14d004 spec: description: Container exits with a non zero exit status every: 5m0s name: Container Non Zero Exit Code query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r["_measurement"] == "docker_container_status") |> filter(fn: (r) => r["_field"] == "exitcode") |> unique() |> yield(name: "unique") status: active statusMessageTemplate: 'Check: ${ r._check_name } for container {r.container_name} has exited with a non 0 ecit code' thresholds: - level: CRIT max: 0 min: 0 type: outside_range --- apiVersion: influxdata.com/v2alpha1 kind: CheckThreshold metadata: name: gracious-bose-14d000 spec: description: Container cpu usage is above 80% for 15 minutes every: 15m0s name: Container CPU Usage Pct query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r["_measurement"] == "docker_container_cpu") |> filter(fn: (r) => r["_field"] == "usage_percent") |> aggregateWindow(every: 15m, fn: mean) |> yield(name: "mean") status: active statusMessageTemplate: 'Check: ${ r._check_name } for container {r.container_name} is: ${ r._level }' thresholds: - level: WARN type: greater value: 80 - level: CRIT type: greater value: 90 - level: OK type: lesser value: 80 --- apiVersion: influxdata.com/v2alpha1 kind: NotificationEndpointHTTP metadata: name: earnest-ishizaka-14d006 spec: method: POST name: HTTP POST status: active type: none url: http://localhost:25378/alert --- apiVersion: influxdata.com/v2alpha1 kind: NotificationRule metadata: name: gifted-haslett-14d008 spec: endpointName: earnest-ishizaka-14d006 every: 5m0s name: Crit Notifier statusRules: - currentLevel: CRIT previousLevel: OK --- apiVersion: influxdata.com/v2alpha1 kind: Dashboard metadata: name: earning-boyd-94d001 spec: associations: - kind: Label name: inspiring-goodall-d4d00b - kind: Label name: elegant-yonath-d4d015 charts: - height: 1 kind: Markdown name: Name this Cell note: This dashboard gives you an overview of [Docker](https://docker.com) metrics. See the [Telegraf Documentation](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/docker) for help configuring these plugins. width: 12 - height: 1 kind: Markdown name: Name this Cell note: '# Docker Daemon' width: 12 yPos: 1 - colors: - hex: '#00C9FF' name: laser type: text decimalPlaces: 2 height: 1 kind: Single_Stat name: Num Images queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r["_measurement"] == "docker") |> filter(fn: (r) => r["_field"] == "n_images") width: 2 yPos: 2 - height: 1 kind: Markdown name: Name this Cell note: '# System Stats' width: 12 yPos: 3 - colors: - hex: '#00C9FF' name: laser type: text height: 1 kind: Single_Stat name: System Uptime queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r["_measurement"] == "system") |> filter(fn: (r) => r["_field"] == "uptime") |> map(fn: (r) => ({r with _value: float(v: r._value) / 86400.0})) suffix: ' days' width: 2 yPos: 4 - axes: - base: "10" name: x scale: linear - base: "2" name: y scale: linear - base: "10" name: y2 scale: linear geom: line height: 2 kind: Xy name: Swap queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "swap") |> filter(fn: (r) => r._field == "total" or r._field == "used") |> window(period: v.windowPeriod) |> mean() |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> yield(name: "mean") width: 3 yPos: 5 - axes: - base: "10" name: y2 scale: linear - base: "10" name: x scale: linear - base: "2" label: Bytes name: y scale: linear geom: line height: 3 kind: Xy name: Network TX trafic per container / sec queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop:v.timeRangeStop) |> filter(fn: (r) => r._measurement == "docker_container_net") |> filter(fn: (r) => r._field == "tx_bytes") |> derivative(unit:1s, nonNegative: false) |> window(period: v.windowPeriod) |> mean() |> keep(columns: ["_measurement","container_name", "host","_value","_field","_stop"]) |> group(columns: ["_value", "_time", "_start", "_stop"],mode: "except") width: 6 xCol: _stop yCol: _value yPos: 7 - height: 1 kind: Markdown name: Name this Cell note: '# Container Stats' width: 12 yPos: 10 - axes: - base: "10" name: x scale: linear - base: "10" name: y scale: linear suffix: '%' - base: "10" name: y2 scale: linear geom: line height: 3 kind: Xy name: Disk Usage queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "disk") |> filter(fn: (r) => r._field == "used_percent") |> window(period: v.windowPeriod) |> mean() |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> yield(name: "mean") width: 3 yPos: 11 - colors: - hex: '#00C9FF' name: laser type: text decimalPlaces: 2 height: 1 kind: Single_Stat name: Total Num Containers queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r["_measurement"] == "docker") |> filter(fn: (r) => r["_field"] == "n_containers") width: 2 xPos: 2 yPos: 2 - colors: - hex: '#00C9FF' name: laser type: text decimalPlaces: 2 height: 1 kind: Single_Stat name: System Load queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "system") |> filter(fn: (r) => r._field == "load1") |> window(period: v.windowPeriod) |> mean() |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> yield(name: "mean") width: 1 xPos: 2 yPos: 4 - axes: - base: "10" name: x scale: linear - base: "10" label: Load name: y scale: linear - base: "10" name: y2 scale: linear geom: line height: 3 kind: Xy name: System Load queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "system") |> filter(fn: (r) => r._field == "load1" or r._field == "load5" or r._field == "load15") |> window(period: v.windowPeriod) |> mean() |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> yield(name: "mean") width: 3 xPos: 3 yPos: 4 - axes: - base: "10" name: x scale: linear - base: "10" name: y scale: linear geom: line height: 3 kind: Xy name: Memory usage % per container queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r._measurement == "docker_container_mem") |> filter(fn: (r) => r._field == "usage_percent") |> window(period: v.windowPeriod) |> mean() |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> keep(columns: ["_measurement","container_name", "host","_value","_field","_stop"]) |> yield(name: "mean") width: 3 xPos: 3 yPos: 11 - colors: - hex: '#00C9FF' name: laser type: text decimalPlaces: 2 height: 1 kind: Single_Stat name: Number of Runnng Docker containers queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r["_measurement"] == "docker") |> filter(fn: (r) => r["_field"] == "n_containers_running") |> group(columns: ["engine_host"]) width: 2 xPos: 4 yPos: 2 - colors: - hex: '#00C9FF' name: laser type: text decimalPlaces: 2 height: 1 kind: Single_Stat name: Docker Daemon nCPUs queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "docker") |> filter(fn: (r) => r._field == "n_cpus") |> window(period: v.windowPeriod) |> last() |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> yield(name: "last") suffix: ' cpus' width: 2 xPos: 6 yPos: 2 - axes: - base: "10" name: x scale: linear - base: "10" name: y scale: linear suffix: '%' - base: "10" name: y2 scale: linear geom: line height: 3 kind: Xy name: CPU Usage queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "cpu") |> filter(fn: (r) => r._field == "usage_user" or r._field == "usage_system" or r._field == "usage_idle") |> filter(fn: (r) => r.cpu == "cpu-total") |> window(period: v.windowPeriod) |> mean() |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> yield(name: "mean") width: 3 xPos: 6 yPos: 4 - axes: - base: "10" name: x scale: linear - base: "2" label: Bytes name: y scale: linear - base: "10" name: y2 scale: linear colors: - hex: '#FDC44F' name: Cthulhu type: scale - hex: '#007C76' name: Cthulhu type: scale - hex: '#8983FF' name: Cthulhu type: scale geom: line height: 3 kind: Xy name: Network RX trafic per container / sec queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r._measurement == "docker_container_net") |> filter(fn: (r) => r._field == "rx_bytes" ) |> derivative(unit: 1s, nonNegative: false) |> window(period: v.windowPeriod) |> mean() |> keep(columns: ["_measurement","container_name", "host","_value","_field","_stop"]) |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") width: 6 xCol: _stop xPos: 6 yCol: _value yPos: 7 - axes: - base: "10" name: x scale: linear - base: "10" name: y scale: linear geom: line height: 3 kind: Xy name: Memory usage per container queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r._measurement == "docker_container_mem") |> filter(fn: (r) => r._field == "usage") |> window(period: v.windowPeriod) |> mean() |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> keep(columns: ["_measurement","container_name", "host","_value","_field","_stop"]) |> yield(name: "mean") width: 3 xPos: 6 yPos: 11 - colors: - hex: '#00C9FF' name: laser type: text decimalPlaces: 2 height: 1 kind: Single_Stat name: Docker Daemon Total Mem queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "docker") |> filter(fn: (r) => r._field == "memory_total") |> window(period: v.windowPeriod) |> last() |> map(fn: (r) => ({r with _value: float(v: r._value) / 1024.0 / 1024.0 / 1024.0})) |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> yield(name: "last") suffix: ' GB' width: 2 xPos: 8 yPos: 2 - axes: - base: "10" name: x scale: linear - base: "10" name: y scale: linear suffix: '%' - base: "10" name: y2 scale: linear colors: - hex: '#00C9FF' name: laser type: text - hex: '#8F8AF4' name: Do Androids Dream of Electric Sheep? type: scale - hex: '#A51414' name: Do Androids Dream of Electric Sheep? type: scale - hex: '#F4CF31' name: Do Androids Dream of Electric Sheep? type: scale decimalPlaces: 1 height: 3 kind: Single_Stat_Plus_Line name: System Memory Usage queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart) |> filter(fn: (r) => r._measurement == "mem") |> filter(fn: (r) => r._field == "used_percent") |> window(period: v.windowPeriod) |> mean() |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> yield(name: "mean") suffix: '%' width: 3 xCol: _stop xPos: 9 yCol: _value yPos: 4 - axes: - base: "10" name: x scale: linear - base: "10" name: y scale: linear geom: line height: 3 kind: Xy name: CPU usage per container queries: - query: |- from(bucket: "docker") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r._measurement == "cpu" or r._measurement == "docker_container_cpu") |> filter(fn: (r) => r._field == "usage_percent") |> window(period: v.windowPeriod) |> mean() |> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except") |> keep(columns: ["_measurement","container_name", "host","_value","_field","_stop"]) |> yield(name: "mean") width: 3 xPos: 9 yPos: 11 description: A collection of useful visualizations for monitoring Docker stats name: Docker --- apiVersion: influxdata.com/v2alpha1 kind: Telegraf metadata: name: lucid-goodall-d4d017 spec: associations: - kind: Label name: agreeing-goldberg-d4d013 - kind: Label name: vigilant-chatelet-d4d00f - kind: Label name: elastic-morse-d4d001 - kind: Label name: gallant-sanderson-d4d003 - kind: Label name: goofy-yalow-d4d005 - kind: Label name: unruffled-benz-d4d007 - kind: Label name: adoring-pasteur-d4d00d - kind: Label name: dreamy-heyrovsky-d4d011 - kind: Label name: inspiring-goodall-d4d00b - kind: Label name: elegant-yonath-d4d015 - kind: Label name: nice-kilby-d4d009 config: | # Global tags can be specified here in key="value" format. [global_tags] # dc = "us-east-1" # will tag all metrics with dc=us-east-1 # rack = "1a" ## Environment variables can be used as tags, and throughout the config file # user = "$USER" # Configuration for telegraf agent [agent] interval = "10s" round_interval = true metric_batch_size = 1000 metric_buffer_limit = 10000 collection_jitter = "0s" flush_interval = "10s" flush_jitter = "0s" precision = "" hostname = "" omit_hostname = false ############################################################################### # OUTPUT PLUGINS # ############################################################################### # Configuration for sending metrics to InfluxDB [[outputs.influxdb_v2]] urls = ["$INFLUX_HOST"] token = "$INFLUX_TOKEN" organization = "$INFLUX_ORG" ## Destination bucket to write into. bucket = "docker" ############################################################################### # INPUT PLUGINS # ############################################################################### [[inputs.cpu]] percpu = true totalcpu = true collect_cpu_time = false report_active = false [[inputs.disk]] #ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] [[inputs.diskio]] [[inputs.kernel]] [[inputs.mem]] [[inputs.net]] [[inputs.processes]] [[inputs.swap]] [[inputs.system]] [[inputs.docker]] endpoint = "unix:///var/run/docker.sock" gather_services = false container_names = [] source_tag = false container_name_include = [] container_name_exclude = [] timeout = "5s" perdevice = true total = true docker_label_include = [] docker_label_exclude = [] tag_env = [] description: A set of plugins for monitoring your System and Docker. name: Docker Monitoring