apiVersion: v1 kind: ConfigMap metadata: name: thanos-ruler-rules namespace: prometheus-ha data: alert_down_services.rules.yaml: | groups: - name: metaprometheus-ha rules: - alert: PrometheusReplicaDown annotations: message: Prometheus replica in cluster {{$labels.cluster}} has disappeared from Prometheus target discovery. expr: | sum(up{cluster="prometheus-ha", instance=~".*:9090", job="kubernetes-service-endpoints"}) by (job,cluster) < 3 for: 15s labels: severity: critical --- apiVersion: apps/v1 kind: StatefulSet metadata: labels: app: thanos-ruler name: thanos-ruler namespace: prometheus-ha spec: replicas: 1 selector: matchLabels: app: thanos-ruler serviceName: thanos-ruler template: metadata: labels: app: thanos-ruler thanos-store-api: "true" spec: containers: - name: thanos image: quay.io/thanos/thanos:v0.8.0 args: - rule - --log.level=debug - --data-dir=/data - --eval-interval=15s - --rule-file=/etc/thanos-ruler/*.rules.yaml - --alertmanagers.url=http://alertmanager:9093 - --query=thanos-querier:9090 - "--objstore.config-file=/config/thanos.yaml" - --label=ruler_cluster="prometheus-ha" - --label=replica="$(POD_NAME)" env: - name : AZURE_APPLICATION_CREDENTIALS value: /etc/secret/thanos-azure-credentials.yaml - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name ports: - name: http containerPort: 10902 - name: grpc containerPort: 10901 livenessProbe: httpGet: port: http path: /-/healthy readinessProbe: httpGet: port: http path: /-/ready volumeMounts: - name: config mountPath: /config/ readOnly: true - name: data mountPath: /var/thanos/store volumes: - name: data emptyDir: {} - name: config secret: secretName: thanos-objstore-config --- apiVersion: v1 kind: Service metadata: labels: app: thanos-ruler name: thanos-ruler namespace: prometheus-ha spec: ports: - port: 9090 protocol: TCP targetPort: http name: http selector: app: thanos-ruler