apiVersion: template.openshift.io/v1
kind: Template
metadata:
  name: prometheus
  annotations:
    "openshift.io/display-name": Prometheus
    description: |
      A monitoring solution for an OpenShift cluster - collect and gather metrics and alerts from nodes, services, and the infrastructure. This is a tech preview feature.
    iconClass: icon-cogs
    tags: "monitoring,prometheus, alertmanager,time-series"
parameters:
- description: The namespace to instantiate prometheus under. Defaults to 'kube-system'.
  name: NAMESPACE
  required: true
#  value: kube-system
- description: The location of the proxy image
  name: IMAGE_PROXY
  value: openshift/oauth-proxy:v1.0.0
- description: The location of the prometheus image
  name: IMAGE_PROMETHEUS
  value: openshift/prometheus:v2.0.0
- description: The location of the alertmanager image
  name: IMAGE_ALERTMANAGER
  value: openshift/prometheus-alertmanager:v0.9.1
- description: The location of alert-buffer image
  name: IMAGE_ALERT_BUFFER
  value: openshift/prometheus-alert-buffer:v0.0.2
- description: The session secret for the proxy
  name: SESSION_SECRET
  generate: expression
  from: "[a-zA-Z0-9]{43}"
objects:
# Authorize the prometheus service account to read data about the cluster
- apiVersion: v1
  kind: ServiceAccount
  metadata:
    name: prometheus
    namespace: "${NAMESPACE}"
    annotations:
      serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
      serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
- apiVersion: authorization.openshift.io/v1
  kind: ClusterRoleBinding
  metadata:
    name: prometheus-cluster-reader
  roleRef:
    name: system:auth-delegator 
  subjects:
  - kind: ServiceAccount
    name: prometheus
    namespace: "${NAMESPACE}"
- apiVersion: authorization.openshift.io/v1
  kind: RoleBinding
  metadata:
    name: prometheus-viewer
  roleRef:
    name: view 
  subjects:
  - kind: ServiceAccount
    name: prometheus
    namespace: "${NAMESPACE}"
# Create a fully end-to-end TLS connection to the prometheus proxy
- apiVersion: route.openshift.io/v1
  kind: Route
  metadata:
    name: prometheus
    namespace: "${NAMESPACE}"
  spec:
    to:
      name: prometheus
    port:
      targetPort: prometheus-https
    tls:
      termination: Reencrypt
      insecureEdgeTerminationPolicy: Redirect
- apiVersion: v1
  kind: Service
  metadata:
    annotations:
      prometheus.io/scrape: "true"
      prometheus.io/scheme: https
      service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls
    labels:
      name: prometheus
    name: prometheus
    namespace: "${NAMESPACE}"
  spec:
    ports:
    - name: prometheus-https
      port: 443
      protocol: TCP
      targetPort: 8443
#    - name: prometheus-http
#      port: 9090
#      protocol: TCP
#      targetPort: 9090
    selector:
      app: prometheus
- apiVersion: v1
  kind: Secret
  metadata:
    name: prometheus-proxy
    namespace: "${NAMESPACE}"
  stringData:
    session_secret: "${SESSION_SECRET}="
- apiVersion: apps/v1beta1
  kind: StatefulSet
  metadata:
    labels:
      app: prometheus
    name: prometheus
    namespace: "${NAMESPACE}"
  spec:
    updateStrategy:
      type: RollingUpdate
    podManagementPolicy: Parallel
    selector:
      matchLabels:
        app: prometheus
    template:
      metadata:
        labels:
          app: prometheus
        name: prometheus
      spec:
        serviceAccountName: prometheus
        containers:
        # Deploy Prometheus behind an oauth proxy
        - name: prom-proxy
          image: ${IMAGE_PROXY}
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: "100m" 
              memory: "200Mi" 
          ports:
          - containerPort: 8443
            name: web
          args:
          - -provider=openshift
          - -https-address=:8443
          - -http-address=
          - -email-domain=*
          - -upstream=http://localhost:9090
          - -client-id=system:serviceaccount:${NAMESPACE}:prometheus
          - -openshift-ca=/etc/pki/tls/cert.pem
          - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
          - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}'
          - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}'
          - -tls-cert=/etc/tls/private/tls.crt
          - -tls-key=/etc/tls/private/tls.key
          - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
          - -cookie-secret-file=/etc/proxy/secrets/session_secret
          - -skip-auth-regex=^/metrics
          volumeMounts:
          - mountPath: /etc/tls/private
            name: prometheus-tls
          - mountPath: /etc/proxy/secrets
            name: prometheus-secrets
          - mountPath: /prometheus
            name: prometheus-data

        - name: prometheus
          resources:
            limits:
              cpu: "400m" 
              memory: "1024Mi"
#          ports:
#          - containerPort: 9090
#            name: prometheus
          args:
          - --storage.tsdb.retention=6h
          - --storage.tsdb.min-block-duration=2m
          - --config.file=/etc/prometheus/prometheus.yml
          - --web.listen-address=localhost:9090
          - --web.enable-lifecycle
          image: ${IMAGE_PROMETHEUS}
          imagePullPolicy: IfNotPresent
          volumeMounts:
          - mountPath: /etc/prometheus
            name: prometheus-config
          - mountPath: /prometheus
            name: prometheus-data

        # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy
        # use http port=4190 and https port=9943 to differ from prom-proxy
        - name: alerts-proxy
          image: ${IMAGE_PROXY}
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: "100m" 
              memory: "200Mi" 
          ports:
          - containerPort: 9443
            name: web
          args:
          - -provider=openshift
          - -https-address=:9443
          - -http-address=
          - -email-domain=*
          - -upstream=http://localhost:9099
          - -client-id=system:serviceaccount:${NAMESPACE}:prometheus
          - -openshift-ca=/etc/pki/tls/cert.pem
          - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
          - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}'
          - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}'
          - -tls-cert=/etc/tls/private/tls.crt
          - -tls-key=/etc/tls/private/tls.key
          - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
          - -cookie-secret-file=/etc/proxy/secrets/session_secret
          - -skip-auth-regex=^/metrics
          - -ssl-insecure-skip-verify=true
          volumeMounts:
          - mountPath: /etc/tls/private
            name: alerts-tls
          - mountPath: /etc/proxy/secrets
            name: alerts-secrets

        - name: alert-buffer
          args:
          - --storage-path=/alert-buffer/messages.db
          image: ${IMAGE_ALERT_BUFFER}
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: "100m" 
              memory: "200Mi" 
          volumeMounts:
          - mountPath: /alert-buffer
            name: alert-buffer-data
          ports:
          - containerPort: 9099
            name: alert-buf

        - name: alertmanager
          args:
          - -config.file=/etc/alertmanager/alertmanager.yml
          image: ${IMAGE_ALERTMANAGER}
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: "200m" 
              memory: "200Mi" 
          ports:
          - containerPort: 9093
            name: web
          volumeMounts:
          - mountPath: /etc/alertmanager
            name: alertmanager-config
          - mountPath: /alertmanager
            name: alertmanager-data

        restartPolicy: Always
        volumes:
        - name: prometheus-config
          configMap:
            defaultMode: 420
            name: prometheus
        - name: prometheus-secrets
          secret:
            secretName: prometheus-proxy
        - name: prometheus-tls
          secret:
            secretName: prometheus-tls
        - name: prometheus-data
          emptyDir: {}
        - name: alertmanager-config
          configMap:
            defaultMode: 420
            name: prometheus-alerts
        - name: alerts-secrets
          secret:
            secretName: alerts-proxy
        - name: alerts-tls
          secret:
            secretName: prometheus-alerts-tls
        - name: alertmanager-data
          emptyDir: {}
        - name: alert-buffer-data #TODO: make persistent
          emptyDir: {}

- apiVersion: v1
  kind: ConfigMap
  metadata:
    name: prometheus
    namespace: "${NAMESPACE}"
  data:
    prometheus.rules: |
      groups:
      - name: example-rules
        interval: 30s # defaults to global interval
        rules:
        - alert: Node Down
          expr: up{job="kubernetes-nodes"} == 0
          annotations:
            miqTarget: "ContainerNode"
            severity: "HIGH"
            message: "{{$labels.instance}} is down"
    prometheus.yml: |
      rule_files:
        - 'prometheus.rules'

      # A scrape configuration for running Prometheus on a Kubernetes cluster.
      # This uses separate scrape configs for cluster components (i.e. API server, node)
      # and services to allow each to use different authentication configs.
      #
      # Kubernetes labels will be added as Prometheus labels on metrics via the
      # `labelmap` relabeling action.

      # Scrape config for API servers.
      #
      # Kubernetes exposes API servers as endpoints to the default/kubernetes
      # service so this uses `endpoints` role and uses relabelling to only keep
      # the endpoints associated with the default/kubernetes service using the
      # default named port `https`. This works for single API server deployments as
      # well as HA API server deployments.
      scrape_configs:
      - job_name: 'kubernetes-pods'

        kubernetes_sd_configs:
        - role: pod
          namespaces:
            names:
            - ${NAMESPACE}

        relabel_configs:
        - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
          action: keep
          regex: true
        - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
          action: replace
          target_label: __metrics_path__
          regex: (.+)
        - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
          action: replace
          regex: ([^:]+)(?::\d+)?;(\d+)
          replacement: $1:$2
          target_label: __address__
        - action: labelmap
          regex: __meta_kubernetes_pod_label_(.+)
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: kubernetes_namespace
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: kubernetes_pod_name

      # Scrape config for service endpoints.
      #
      # The relabeling allows the actual service scrape endpoint to be configured
      # via the following annotations:
      #
      # * `prometheus.io/scrape`: Only scrape services that have a value of `true`
      # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
      # to set this to `https` & most likely set the `tls_config` of the scrape config.
      # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
      # * `prometheus.io/port`: If the metrics are exposed on a different port to the
      # service then set this appropriately.
      - job_name: 'kubernetes-service-endpoints'

        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
          # TODO: this should be per target
          insecure_skip_verify: true

        kubernetes_sd_configs:
        - role: endpoints
          namespaces:
            names:
            - ${NAMESPACE}

        relabel_configs:
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
          action: keep
          regex: true
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
          action: replace
          target_label: __scheme__
          regex: (https?)
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
          action: replace
          target_label: __metrics_path__
          regex: (.+)
        - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
          action: replace
          target_label: __address__
          regex: (.+)(?::\d+);(\d+)
          replacement: $1:$2
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username]
          action: replace
          target_label: __basic_auth_username__
          regex: (.+)
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password]
          action: replace
          target_label: __basic_auth_password__
          regex: (.+)
        - action: labelmap
          regex: __meta_kubernetes_service_label_(.+)
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: kubernetes_namespace
        - source_labels: [__meta_kubernetes_service_name]
          action: replace
          target_label: kubernetes_name

      # Example scrape config for probing services via the Blackbox Exporter.
      #
      # The relabeling allows the actual service scrape endpoint to be configured
      # via the following annotations:
      #
      # * `prometheus.io/probe`: Only probe services that have a value of `true`
      - job_name: 'kubernetes-services'

        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
          # TODO: this should be per target
          insecure_skip_verify: true

        metrics_path: /metrics
        params:
          module: [http_2xx]

        kubernetes_sd_configs:
        - role: service
          namespaces:
            names:
            - ${NAMESPACE}

        relabel_configs:
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
          action: keep
          regex: true
        - source_labels: [__address__]
          target_label: __param_target
        - target_label: __address__
          replacement: blackbox-exporter.example.com:9115
        - source_labels: [__param_target]
          target_label: instance
        - action: labelmap
          regex: __meta_kubernetes_service_label_(.+)
        - source_labels: [__meta_kubernetes_namespace]
          target_label: kubernetes_namespace
        - source_labels: [__meta_kubernetes_service_name]
          target_label: kubernetes_name
#      - job_name: 'openshift-template-service-broker'
#
#        scheme: https
#        tls_config:
#          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
#          server_name: apiserver.openshift-template-service-broker.svc
#        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
#
#        kubernetes_sd_configs:
#        - role: endpoints
#
#        relabel_configs:
#        - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
#          action: keep
#          regex: openshift-template-service-broker;apiserver;https
#
#      alerting:
#        alertmanagers:
#        - scheme: http
#          static_configs:
#          - targets:
#            - "localhost:9093"

# Create a fully end-to-end TLS connection to the alert proxy
- apiVersion: route.openshift.io/v1
  kind: Route
  metadata:
    name: alerts
    namespace: "${NAMESPACE}"
  spec:
    to:
      name: alerts
    tls:
      termination: Reencrypt
      insecureEdgeTerminationPolicy: Redirect
- apiVersion: v1
  kind: Service
  metadata:
    annotations:
      service.alpha.openshift.io/serving-cert-secret-name: prometheus-alerts-tls
    labels:
      name: alerts
    name: alerts
    namespace: "${NAMESPACE}"
  spec:
    ports:
    - name: alerts
      port: 443
      protocol: TCP
      targetPort: 9443
    selector:
      app: prometheus
- apiVersion: v1
  kind: Secret
  metadata:
    name: alerts-proxy
    namespace: "${NAMESPACE}"
  stringData:
    session_secret: "${SESSION_SECRET}="

- apiVersion: v1
  kind: ConfigMap
  metadata:
    name: prometheus-alerts
    namespace: "${NAMESPACE}"
  data:
    alertmanager.yml: |
      global:

      # The root route on which each incoming alert enters.
      route:
        # default route if none match
        receiver: alert-buffer-wh

        # The labels by which incoming alerts are grouped together. For example,
        # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
        # be batched into a single group.
        # TODO:
        group_by: []

        # All the above attributes are inherited by all child routes and can
        # overwritten on each.

      receivers:
      - name: alert-buffer-wh
        webhook_configs:
        - url: http://localhost:9099/topics/alerts