# Copyright 2024 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This daemonset installs nvidia driver 450.80.02 and invokes the
# partition_gpu tool to enable MIG mode and create GPU instances as specified
# in the GPU config.

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: device-injector
  namespace: kube-system
  labels:
    k8s-app: device-injector
spec:
  selector:
    matchLabels:
      k8s-app: device-injector
  updateStrategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        name: device-injector
        k8s-app: device-injector
    spec:
      priorityClassName: system-node-critical
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: cloud.google.com/gke-accelerator
                    operator: In
                    values:
                      - nvidia-h100-80gb
                      - nvidia-h100-mega-80gb
      tolerations:
        - operator: "Exists"
      hostNetwork: true
      hostPID: true
      initContainers:
        - image: "gke.gcr.io/gke-distroless/bash:latest"
          name: enable-nri
          securityContext:
            privileged: true
          volumeMounts:
          - name: root
            mountPath: /
          command:
          - '/bin/bash'
          - '-c'
          - |
            if ! grep -q nri /etc/containerd/config.toml; then
              echo "[plugins.\"io.containerd.nri.v1.nri\"]
               disable = false
               disable_connections = false
               plugin_config_path = \"/etc/nri/conf.d\"
               plugin_path = \"/home/kubernetes/nri/plugins\"
               plugin_registration_timeout = \"5s\"
               plugin_request_timeout = \"5s\"
               socket_path = \"/var/run/nri/nri.sock\"">> /etc/containerd/config.toml
              systemctl restart containerd.service
            fi
      containers:
        - image: "gcr.io/gke-release/nri-device-injector@sha256:7704e2bd74b8edbb76b6913c7904cc2362f1fa887c4d4aba7b19778ea353537c"
          name: device-injector
          resources:
            requests:
              cpu: 150m
          securityContext:
            privileged: true
          volumeMounts:
            - name: dev
              mountPath: /dev
            - name: nri
              mountPath: /var/run/nri
      volumes:
        - name: root
          hostPath:
            path: /
        - name: nri
          hostPath:
            path: /var/run/nri
        - name: dev
          hostPath:
            path: /dev