# create amazon-cloudwatch namespace
apiVersion: v1
kind: Namespace
metadata:
  name: amazon-cloudwatch
  labels:
    name: amazon-cloudwatch

---
# create configmap for prometheus cwagent config
apiVersion: v1
data:
  # cwagent json config
  cwagentconfig.json: |
    {
      "logs": {
        "metrics_collected": {
          "prometheus": {
            "prometheus_config_path": "/etc/prometheusconfig/prometheus.yaml",
            "emf_processor": {
              "metric_declaration": [
                {
                  "source_labels": ["Service"],
                  "label_matcher": ".*nginx.*",
                  "dimensions": [["Service","Namespace","ClusterName"]],
                  "metric_selectors": [
                    "^nginx_ingress_controller_(requests|success)$",
                    "^nginx_ingress_controller_nginx_process_connections$",
                    "^nginx_ingress_controller_nginx_process_connections_total$",
                    "^nginx_ingress_controller_nginx_process_resident_memory_bytes$",
                    "^nginx_ingress_controller_nginx_process_cpu_seconds_total$",
                    "^nginx_ingress_controller_config_last_reload_successful$"
                  ]
                },
                {
                  "source_labels": ["Service"],
                  "label_matcher": ".*nginx.*",
                  "dimensions": [["Service","Namespace","ClusterName","ingress"],["Service","Namespace","ClusterName","status"]],
                  "metric_selectors": ["^nginx_ingress_controller_requests$"]
                },
                {
                  "source_labels": ["Service", "frontend"],
                  "label_matcher": ".*haproxy-ingress-.*metrics;(httpfront-shared-frontend|httpfront-default-backend|httpsfront|_front_http)",
                  "dimensions": [["Service","Namespace","ClusterName","frontend","code"]],
                  "metric_selectors": [
                    "^haproxy_frontend_http_responses_total$"
                  ]
                },
                {
                  "source_labels": ["Service", "backend"],
                  "label_matcher": ".*haproxy-ingress-.*metrics",
                  "dimensions": [["Service","Namespace","ClusterName","backend","code"]],
                  "metric_selectors": [
                    "^haproxy_backend_http_responses_total$"
                  ]
                },
                {
                  "source_labels": ["Service"],
                  "label_matcher": ".*haproxy-ingress-.*metrics",
                  "dimensions": [["Service","Namespace","ClusterName"]],
                  "metric_selectors": [
                    "^haproxy_backend_up$",
                    "^haproxy_backend_status$",
                    "^haproxy_backend_bytes_(in|out)_total$",
                    "^haproxy_backend_connections_total$",
                    "^haproxy_backend_connection_errors_total$",
                    "^haproxy_backend_current_sessions$",
                    "^haproxy_frontend_bytes_(in|out)_total$",
                    "^haproxy_frontend_connections_total$",
                    "^haproxy_frontend_http_requests_total$",
                    "^haproxy_frontend_request_errors_total$",
                    "^haproxy_frontend_requests_denied_total$",
                    "^haproxy_frontend_current_sessions$"
                  ]
                },
                {
                  "source_labels": ["Service"],
                  "label_matcher": ".*memcached.*",
                  "dimensions": [["Service","Namespace","ClusterName"]],
                  "metric_selectors": [
                    "^memcached_current_(bytes|items|connections)$",
                    "^memcached_items_(reclaimed|evicted)_total$",
                    "^memcached_(written|read)_bytes_total$",
                    "^memcached_limit_bytes$",
                    "^memcached_commands_total$"
                  ]
                },
                {
                  "source_labels": ["Service", "status", "command"],
                  "label_matcher": ".*memcached.*;hit;get",
                  "dimensions": [["Service","Namespace","ClusterName","status","command"]],
                  "metric_selectors": [
                    "^memcached_commands_total$"
                  ]
                },
                {
                  "source_labels": ["Service", "command"],
                  "label_matcher": ".*memcached.*;(get|set)",
                  "dimensions": [["Service","Namespace","ClusterName","command"]],
                  "metric_selectors": [
                    "^memcached_commands_total$"
                  ]
                },
                {
                  "source_labels": ["container_name"],
                  "label_matcher": "^envoy$",
                  "dimensions": [["ClusterName","Namespace"]],
                  "metric_selectors": [
                    "^envoy_http_downstream_rq_(total|xx)$",
                    "^envoy_cluster_upstream_cx_(r|t)x_bytes_total$",
                    "^envoy_cluster_membership_(healthy|total)$",
                    "^envoy_server_memory_(allocated|heap_size)$",
                    "^envoy_cluster_upstream_cx_(connect_timeout|destroy_local_with_active_rq)$",
                    "^envoy_cluster_upstream_rq_(pending_failure_eject|pending_overflow|timeout|per_try_timeout|rx_reset|maintenance_mode)$",
                    "^envoy_http_downstream_cx_destroy_remote_active_rq$",
                    "^envoy_cluster_upstream_flow_control_(paused_reading_total|resumed_reading_total|backed_up_total|drained_total)$",
                    "^envoy_cluster_upstream_rq_retry$",
                    "^envoy_cluster_upstream_rq_retry_(success|overflow)$",
                    "^envoy_server_(version|uptime|live)$"
                  ]
                },
                {
                  "source_labels": ["container_name"],
                  "label_matcher": "^envoy$",
                  "dimensions": [["ClusterName","Namespace","envoy_http_conn_manager_prefix","envoy_response_code_class"]],
                  "metric_selectors": [
                    "^envoy_http_downstream_rq_xx$"
                  ]
                },
                {
                  "source_labels": ["container_name"],
                  "label_matcher": "^fluent-bit.*$",
                  "dimensions": [["ClusterName","Namespace","NodeName"]],
                  "metric_selectors": [
                    "^fluentbit_output_errors_total$",
                    "^fluentbit_input_bytes_total$",
                    "^fluentbit_output_proc_bytes_total$",
                    "^fluentbit_input_records_total$",
                    "^fluentbit_output_proc_records_total$",
                    "^fluentbit_output_retries_(total|failed_total)$"
                  ]
                },
                {
                  "source_labels": ["job"],
                  "label_matcher": "^kubernetes-pod-jmx$",
                  "dimensions": [["ClusterName","Namespace"]],
                  "metric_selectors": [
                    "^jvm_threads_(current|daemon)$",
                    "^jvm_classes_loaded$",
                    "^java_lang_operatingsystem_(freephysicalmemorysize|totalphysicalmemorysize|freeswapspacesize|totalswapspacesize|systemcpuload|processcpuload|availableprocessors|openfiledescriptorcount)$",
                    "^catalina_manager_(rejectedsessions|activesessions)$",
                    "^jvm_gc_collection_seconds_(count|sum)$",
                    "^catalina_globalrequestprocessor_(bytesreceived|bytessent|requestcount|errorcount|processingtime)$"
                  ]
                },
                {
                  "source_labels": ["job"],
                  "label_matcher": "^kubernetes-pod-jmx$",
                  "dimensions": [["ClusterName","Namespace","area"]],
                  "metric_selectors": [
                    "^jvm_memory_bytes_used$"
                  ]
                },
                {
                  "source_labels": ["job"],
                  "label_matcher": "^kubernetes-pod-jmx$",
                  "dimensions": [["ClusterName","Namespace","pool"]],
                  "metric_selectors": [
                    "^jvm_memory_pool_bytes_used$"
                  ]
                }
              ]
            }
          }
        },
        "force_flush_interval": 5
      }
    }
kind: ConfigMap
metadata:
  name: prometheus-cwagentconfig
  namespace: amazon-cloudwatch

---
# create configmap for prometheus scrape config
apiVersion: v1
data:
  # prometheus config
  prometheus.yaml: |
    global:
      scrape_interval: 1m
      scrape_timeout: 10s
    scrape_configs:
    - job_name: 'kubernetes-pod-appmesh-envoy'
      sample_limit: 10000
      metrics_path: /stats/prometheus
      kubernetes_sd_configs:
      - role: pod
      relabel_configs:
      - source_labels: [__meta_kubernetes_pod_container_name]
        action: keep
        regex: '^envoy$'
      - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
        action: replace
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: ${1}:9901
        target_label: __address__
      - action: labelmap
        regex: __meta_kubernetes_pod_label_(.+)
      - action: replace
        source_labels:
        - __meta_kubernetes_namespace
        target_label: Namespace
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod_name
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_container_name
        target_label: container_name
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_controller_name
        target_label: pod_controller_name
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_controller_kind
        target_label: pod_controller_kind
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_phase
        target_label: pod_phase

    - job_name: 'kubernetes-pod-fluentbit-plugin'
      sample_limit: 10000
      metrics_path: /api/v1/metrics/prometheus
      kubernetes_sd_configs:
      - role: pod
      relabel_configs:
      - source_labels: [__meta_kubernetes_pod_container_name]
        action: keep
        regex: '^fluent-bit.*$'
      - source_labels: [__address__]
        action: replace
        regex: ([^:]+)(?::\d+)?
        replacement: ${1}:2020
        target_label: __address__
      - action: labelmap
        regex: __meta_kubernetes_pod_label_(.+)
      - action: replace
        source_labels:
        - __meta_kubernetes_namespace
        target_label: Namespace
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod_name
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_container_name
        target_label: container_name
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_controller_name
        target_label: pod_controller_name
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_controller_kind
        target_label: pod_controller_kind
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_phase
        target_label: pod_phase
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_node_name
        target_label: NodeName

    - job_name: kubernetes-service-endpoints
      sample_limit: 10000
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - action: keep
        regex: true
        source_labels:
        - __meta_kubernetes_service_annotation_prometheus_io_scrape
      - action: replace
        regex: (https?)
        source_labels:
        - __meta_kubernetes_service_annotation_prometheus_io_scheme
        target_label: __scheme__
      - action: replace
        regex: (.+)
        source_labels:
        - __meta_kubernetes_service_annotation_prometheus_io_path
        target_label: __metrics_path__
      - action: replace
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
        source_labels:
        - __address__
        - __meta_kubernetes_service_annotation_prometheus_io_port
        target_label: __address__
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - action: replace
        source_labels:
        - __meta_kubernetes_namespace
        target_label: Namespace
      - action: replace
        source_labels:
        - __meta_kubernetes_service_name
        target_label: Service
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_node_name
        target_label: kubernetes_node
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_name
        target_label: pod_name
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_container_name
        target_label: container_name
      metric_relabel_configs:
      - source_labels: [__name__]
        regex: 'go_gc_duration_seconds.*'
        action: drop
      - source_labels: [__name__, proxy]
        regex: "haproxy_frontend.+;(.+)"
        target_label: frontend
        replacement: "$1"
      - source_labels: [__name__, proxy]
        regex: "haproxy_server.+;(.+)"
        target_label: backend
        replacement: "$1"
      - source_labels: [__name__, proxy]
        regex: "haproxy_backend.+;(.+)"
        target_label: backend
        replacement: "$1"
      - regex: proxy
        action: labeldrop

    - job_name: 'kubernetes-pod-jmx'
      sample_limit: 10000
      metrics_path: /metrics
      kubernetes_sd_configs:
      - role: pod
      relabel_configs:
      - source_labels: [__address__]
        action: keep
        regex: '.*:9404$'
      - action: labelmap
        regex: __meta_kubernetes_pod_label_(.+)
      - action: replace
        source_labels:
        - __meta_kubernetes_namespace
        target_label: Namespace
      - source_labels: [__meta_kubernetes_pod_name]
        action: replace
        target_label: pod_name
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_container_name
        target_label: container_name
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_controller_name
        target_label: pod_controller_name
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_controller_kind
        target_label: pod_controller_kind
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_phase
        target_label: pod_phase
      metric_relabel_configs:
      - source_labels: [__name__]
        regex: 'jvm_gc_collection_seconds.*'
        action: drop

kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: amazon-cloudwatch

---
# create cwagent service account and role binding
apiVersion: v1
kind: ServiceAccount
metadata:
  name: cwagent-prometheus
  namespace: amazon-cloudwatch

---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: cwagent-prometheus-role
rules:
  - apiGroups: [""]
    resources:
    - nodes
    - nodes/proxy
    - services
    - endpoints
    - pods
    verbs: ["get", "list", "watch"]
  - apiGroups:
    - extensions
    resources:
    - ingresses
    verbs: ["get", "list", "watch"]
  - nonResourceURLs: ["/metrics"]
    verbs: ["get"]

---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: cwagent-prometheus-role-binding
subjects:
  - kind: ServiceAccount
    name: cwagent-prometheus
    namespace: amazon-cloudwatch
roleRef:
  kind: ClusterRole
  name: cwagent-prometheus-role
  apiGroup: rbac.authorization.k8s.io

---
# Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
  name: cwagent-prometheus
  namespace: amazon-cloudwatch
spec:
  replicas: 1
  selector:
    matchLabels:
      app: cwagent-prometheus
  template:
    metadata:
      labels:
        app: cwagent-prometheus
    spec:
      containers:
        - name: cloudwatch-agent
          image: public.ecr.aws/cloudwatch-agent/cloudwatch-agent:1.300052.0b1024
          imagePullPolicy: Always
          resources:
            limits:
              cpu:  1000m
              memory: 1000Mi
            requests:
              cpu: 200m
              memory: 200Mi
          # Please don't change below envs
          env:
            - name: CI_VERSION
              value: "k8s/1.3.31"
          # Please don't change the mountPath
          volumeMounts:
            - name: prometheus-cwagentconfig
              mountPath: /etc/cwagentconfig
            - name: prometheus-config
              mountPath: /etc/prometheusconfig

      volumes:
        - name: prometheus-cwagentconfig
          configMap:
            name: prometheus-cwagentconfig
        - name: prometheus-config
          configMap:
            name: prometheus-config
      terminationGracePeriodSeconds: 60
      serviceAccountName: cwagent-prometheus