apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: kafka-servicemonitor labels: app: kafka kafka_cr: kafka spec: selector: matchLabels: app: kafka kafka_cr: kafka namespaceSelector: matchNames: - kafka endpoints: - interval: 10s port: metrics path: /metrics honorLabels: true relabelings: - sourceLabels: [__meta_kubernetes_pod_ip] separator: ; regex: (.*) targetLabel: pod_ip replacement: $1 action: replace - sourceLabels: [__meta_kubernetes_pod_label_brokerId] separator: ; regex: (.*) targetLabel: brokerId replacement: $1 action: replace - sourceLabels: [__meta_kubernetes_pod_label_kafka_cr] separator: ; regex: (.*) targetLabel: kafka_cr replacement: $1 action: replace --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: cruisecontrol-servicemonitor labels: app: kafka kafka_cr: kafka spec: selector: matchLabels: app: cruisecontrol namespaceSelector: matchNames: - kafka endpoints: - interval: 10s port: metrics path: /metrics honorLabels: true --- apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: creationTimestamp: null labels: prometheus: kafka-rules name: kafka-alerts spec: groups: - name: KafkaAlerts rules: - alert: BrokerOverLoaded expr: avg(sum by(brokerId, kafka_cr, namespace) (rate(kafka_network_requestmetrics_requests_total[15m]))) > 30 for: 5m labels: severity: alert annotations: description: 'broker {{ $labels.brokerId }} overloaded (current value is: {{ $value }})' summary: 'broker overloaded' # If brokerConfigGroup is defined it will override scaled broker config # brokerConfigGroup: 'default_group' storageClass: 'standard' mountPath: '/kafkalog' diskSize: '2G' image: 'wurstmeister/kafka:2.12-2.3.0' command: 'upScale' - alert: BrokerUnderReplicated expr: kafka_server_replicamanager_underreplicatedpartitions > 0 for: 5m labels: severity: alert annotations: description: 'broker {{ $labels.brokerId }} underreplicated' summary: 'broker underreplicated' storageClass: 'standard' mountPath: '/kafkalog' diskSize: '2G' image: 'wurstmeister/kafka:2.12-2.3.0' command: 'upScale' - alert: PartitionCountHigh expr: max(kafka_server_replicamanager_partitioncount) by (namespace, kafka_cr) > 100 for: 3m labels: severity: alert annotations: description: 'broker {{ $labels.brokerId }} has high partition count' summary: 'high partition count' storageClass: 'standard' mountPath: '/kafkalog' diskSize: '2G' image: 'wurstmeister/kafka:2.12-2.3.0' command: 'upScale' - alert: PartitionCountLow expr: min(kafka_server_replicamanager_partitioncount) by (namespace, kafka_cr) < 40 for: 3m labels: severity: alert annotations: description: 'broker {{ $labels.brokerId }} has low partition count' summary: 'low partition count' command: 'downScale' - alert: RemainingDiskSpaceLow expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim=~"kafka-.*"} < 1 * 1000 * 1000 * 1000 for: 2m labels: severity: alert annotations: description: 'broker {{ $labels.brokerId }} has low disk space' summary: 'low diskspace' storageClass: 'standard' mountPath: '/kafkalog' diskSize: '2G' command: 'addPVC' --- apiVersion: v1 kind: ServiceAccount metadata: name: prometheus --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: prometheus rules: - apiGroups: [""] resources: - nodes - services - endpoints - pods verbs: ["get", "list", "watch"] - apiGroups: [""] resources: - configmaps verbs: ["get"] - nonResourceURLs: ["/metrics"] verbs: ["get"] --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: prometheus roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus subjects: - kind: ServiceAccount name: prometheus namespace: default --- apiVersion: monitoring.coreos.com/v1 kind: Prometheus metadata: name: kafka-prometheus spec: serviceAccountName: prometheus alerting: alertmanagers: - namespace: kafka name: kafka-operator-alertmanager port: alerts serviceMonitorSelector: matchLabels: app: kafka kafka_cr: kafka ruleSelector: matchLabels: prometheus: kafka-rules resources: requests: memory: 400Mi enableAdminAPI: false