apiVersion: template.openshift.io/v1 kind: Template metadata: name: prometheus annotations: "openshift.io/display-name": Prometheus description: | A monitoring solution for an OpenShift cluster - collect and gather metrics and alerts from nodes, services, and the infrastructure. This is a tech preview feature. iconClass: icon-cogs tags: "monitoring,prometheus, alertmanager,time-series" parameters: - description: The namespace to instantiate prometheus under. Defaults to 'kube-system'. name: NAMESPACE required: true # value: kube-system - description: The location of the proxy image name: IMAGE_PROXY value: openshift/oauth-proxy:v1.0.0 - description: The location of the prometheus image name: IMAGE_PROMETHEUS value: openshift/prometheus:v2.0.0 - description: The location of the alertmanager image name: IMAGE_ALERTMANAGER value: openshift/prometheus-alertmanager:v0.9.1 - description: The location of alert-buffer image name: IMAGE_ALERT_BUFFER value: openshift/prometheus-alert-buffer:v0.0.2 - description: The session secret for the proxy name: SESSION_SECRET generate: expression from: "[a-zA-Z0-9]{43}" objects: # Authorize the prometheus service account to read data about the cluster - apiVersion: v1 kind: ServiceAccount metadata: name: prometheus namespace: "${NAMESPACE}" annotations: serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' - apiVersion: authorization.openshift.io/v1 kind: ClusterRoleBinding metadata: name: prometheus-cluster-reader roleRef: name: system:auth-delegator subjects: - kind: ServiceAccount name: prometheus namespace: "${NAMESPACE}" - apiVersion: authorization.openshift.io/v1 kind: RoleBinding metadata: name: prometheus-viewer roleRef: name: view subjects: - kind: ServiceAccount name: prometheus namespace: "${NAMESPACE}" # Create a fully end-to-end TLS connection to the prometheus proxy - apiVersion: route.openshift.io/v1 kind: Route metadata: name: prometheus namespace: "${NAMESPACE}" spec: to: name: prometheus port: targetPort: prometheus-https tls: termination: Reencrypt insecureEdgeTerminationPolicy: Redirect - apiVersion: v1 kind: Service metadata: annotations: prometheus.io/scrape: "true" prometheus.io/scheme: https service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls labels: name: prometheus name: prometheus namespace: "${NAMESPACE}" spec: ports: - name: prometheus-https port: 443 protocol: TCP targetPort: 8443 # - name: prometheus-http # port: 9090 # protocol: TCP # targetPort: 9090 selector: app: prometheus - apiVersion: v1 kind: Secret metadata: name: prometheus-proxy namespace: "${NAMESPACE}" stringData: session_secret: "${SESSION_SECRET}=" - apiVersion: apps/v1beta1 kind: StatefulSet metadata: labels: app: prometheus name: prometheus namespace: "${NAMESPACE}" spec: updateStrategy: type: RollingUpdate podManagementPolicy: Parallel selector: matchLabels: app: prometheus template: metadata: labels: app: prometheus name: prometheus spec: serviceAccountName: prometheus containers: # Deploy Prometheus behind an oauth proxy - name: prom-proxy image: ${IMAGE_PROXY} imagePullPolicy: IfNotPresent resources: limits: cpu: "100m" memory: "200Mi" ports: - containerPort: 8443 name: web args: - -provider=openshift - -https-address=:8443 - -http-address= - -email-domain=* - -upstream=http://localhost:9090 - -client-id=system:serviceaccount:${NAMESPACE}:prometheus - -openshift-ca=/etc/pki/tls/cert.pem - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}' - -tls-cert=/etc/tls/private/tls.crt - -tls-key=/etc/tls/private/tls.key - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - -cookie-secret-file=/etc/proxy/secrets/session_secret - -skip-auth-regex=^/metrics volumeMounts: - mountPath: /etc/tls/private name: prometheus-tls - mountPath: /etc/proxy/secrets name: prometheus-secrets - mountPath: /prometheus name: prometheus-data - name: prometheus resources: limits: cpu: "400m" memory: "1024Mi" # ports: # - containerPort: 9090 # name: prometheus args: - --storage.tsdb.retention=6h - --storage.tsdb.min-block-duration=2m - --config.file=/etc/prometheus/prometheus.yml - --web.listen-address=localhost:9090 - --web.enable-lifecycle image: ${IMAGE_PROMETHEUS} imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /etc/prometheus name: prometheus-config - mountPath: /prometheus name: prometheus-data # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy # use http port=4190 and https port=9943 to differ from prom-proxy - name: alerts-proxy image: ${IMAGE_PROXY} imagePullPolicy: IfNotPresent resources: limits: cpu: "100m" memory: "200Mi" ports: - containerPort: 9443 name: web args: - -provider=openshift - -https-address=:9443 - -http-address= - -email-domain=* - -upstream=http://localhost:9099 - -client-id=system:serviceaccount:${NAMESPACE}:prometheus - -openshift-ca=/etc/pki/tls/cert.pem - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}' - -tls-cert=/etc/tls/private/tls.crt - -tls-key=/etc/tls/private/tls.key - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token - -cookie-secret-file=/etc/proxy/secrets/session_secret - -skip-auth-regex=^/metrics - -ssl-insecure-skip-verify=true volumeMounts: - mountPath: /etc/tls/private name: alerts-tls - mountPath: /etc/proxy/secrets name: alerts-secrets - name: alert-buffer args: - --storage-path=/alert-buffer/messages.db image: ${IMAGE_ALERT_BUFFER} imagePullPolicy: IfNotPresent resources: limits: cpu: "100m" memory: "200Mi" volumeMounts: - mountPath: /alert-buffer name: alert-buffer-data ports: - containerPort: 9099 name: alert-buf - name: alertmanager args: - -config.file=/etc/alertmanager/alertmanager.yml image: ${IMAGE_ALERTMANAGER} imagePullPolicy: IfNotPresent resources: limits: cpu: "200m" memory: "200Mi" ports: - containerPort: 9093 name: web volumeMounts: - mountPath: /etc/alertmanager name: alertmanager-config - mountPath: /alertmanager name: alertmanager-data restartPolicy: Always volumes: - name: prometheus-config configMap: defaultMode: 420 name: prometheus - name: prometheus-secrets secret: secretName: prometheus-proxy - name: prometheus-tls secret: secretName: prometheus-tls - name: prometheus-data emptyDir: {} - name: alertmanager-config configMap: defaultMode: 420 name: prometheus-alerts - name: alerts-secrets secret: secretName: alerts-proxy - name: alerts-tls secret: secretName: prometheus-alerts-tls - name: alertmanager-data emptyDir: {} - name: alert-buffer-data #TODO: make persistent emptyDir: {} - apiVersion: v1 kind: ConfigMap metadata: name: prometheus namespace: "${NAMESPACE}" data: prometheus.rules: | groups: - name: example-rules interval: 30s # defaults to global interval rules: - alert: Node Down expr: up{job="kubernetes-nodes"} == 0 annotations: miqTarget: "ContainerNode" severity: "HIGH" message: "{{$labels.instance}} is down" prometheus.yml: | rule_files: - 'prometheus.rules' # A scrape configuration for running Prometheus on a Kubernetes cluster. # This uses separate scrape configs for cluster components (i.e. API server, node) # and services to allow each to use different authentication configs. # # Kubernetes labels will be added as Prometheus labels on metrics via the # `labelmap` relabeling action. # Scrape config for API servers. # # Kubernetes exposes API servers as endpoints to the default/kubernetes # service so this uses `endpoints` role and uses relabelling to only keep # the endpoints associated with the default/kubernetes service using the # default named port `https`. This works for single API server deployments as # well as HA API server deployments. scrape_configs: - job_name: 'kubernetes-pods' kubernetes_sd_configs: - role: pod namespaces: names: - ${NAMESPACE} relabel_configs: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: kubernetes_pod_name # Scrape config for service endpoints. # # The relabeling allows the actual service scrape endpoint to be configured # via the following annotations: # # * `prometheus.io/scrape`: Only scrape services that have a value of `true` # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need # to set this to `https` & most likely set the `tls_config` of the scrape config. # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. # * `prometheus.io/port`: If the metrics are exposed on a different port to the # service then set this appropriately. - job_name: 'kubernetes-service-endpoints' tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt # TODO: this should be per target insecure_skip_verify: true kubernetes_sd_configs: - role: endpoints namespaces: names: - ${NAMESPACE} relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] action: replace target_label: __scheme__ regex: (https?) - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] action: replace target_label: __address__ regex: (.+)(?::\d+);(\d+) replacement: $1:$2 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username] action: replace target_label: __basic_auth_username__ regex: (.+) - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password] action: replace target_label: __basic_auth_password__ regex: (.+) - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] action: replace target_label: kubernetes_name # Example scrape config for probing services via the Blackbox Exporter. # # The relabeling allows the actual service scrape endpoint to be configured # via the following annotations: # # * `prometheus.io/probe`: Only probe services that have a value of `true` - job_name: 'kubernetes-services' tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt # TODO: this should be per target insecure_skip_verify: true metrics_path: /metrics params: module: [http_2xx] kubernetes_sd_configs: - role: service namespaces: names: - ${NAMESPACE} relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] action: keep regex: true - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: blackbox-exporter.example.com:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] target_label: kubernetes_name # - job_name: 'openshift-template-service-broker' # # scheme: https # tls_config: # ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt # server_name: apiserver.openshift-template-service-broker.svc # bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token # # kubernetes_sd_configs: # - role: endpoints # # relabel_configs: # - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] # action: keep # regex: openshift-template-service-broker;apiserver;https # # alerting: # alertmanagers: # - scheme: http # static_configs: # - targets: # - "localhost:9093" # Create a fully end-to-end TLS connection to the alert proxy - apiVersion: route.openshift.io/v1 kind: Route metadata: name: alerts namespace: "${NAMESPACE}" spec: to: name: alerts tls: termination: Reencrypt insecureEdgeTerminationPolicy: Redirect - apiVersion: v1 kind: Service metadata: annotations: service.alpha.openshift.io/serving-cert-secret-name: prometheus-alerts-tls labels: name: alerts name: alerts namespace: "${NAMESPACE}" spec: ports: - name: alerts port: 443 protocol: TCP targetPort: 9443 selector: app: prometheus - apiVersion: v1 kind: Secret metadata: name: alerts-proxy namespace: "${NAMESPACE}" stringData: session_secret: "${SESSION_SECRET}=" - apiVersion: v1 kind: ConfigMap metadata: name: prometheus-alerts namespace: "${NAMESPACE}" data: alertmanager.yml: | global: # The root route on which each incoming alert enters. route: # default route if none match receiver: alert-buffer-wh # The labels by which incoming alerts are grouped together. For example, # multiple alerts coming in for cluster=A and alertname=LatencyHigh would # be batched into a single group. # TODO: group_by: [] # All the above attributes are inherited by all child routes and can # overwritten on each. receivers: - name: alert-buffer-wh webhook_configs: - url: http://localhost:9099/topics/alerts