# Copyright 2024 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This daemonset installs nvidia driver 450.80.02 and invokes the # partition_gpu tool to enable MIG mode and create GPU instances as specified # in the GPU config. apiVersion: apps/v1 kind: DaemonSet metadata: name: device-injector namespace: gpudirect-system labels: k8s-app: device-injector spec: selector: matchLabels: k8s-app: device-injector updateStrategy: type: RollingUpdate template: metadata: labels: name: device-injector k8s-app: device-injector spec: affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: cloud.google.com/gke-accelerator operator: In values: - nvidia-h100-80gb - nvidia-h100-mega-80gb tolerations: - operator: "Exists" hostNetwork: true hostPID: true initContainers: - image: "gke.gcr.io/gke-distroless/bash:latest" name: enable-nri resources: limits: cpu: 150m memory: 160Mi ephemeral-storage: 10Mi securityContext: privileged: true volumeMounts: - name: root mountPath: / command: - '/bin/bash' - '-c' - | if ! grep -q nri /etc/containerd/config.toml; then echo "[plugins.\"io.containerd.nri.v1.nri\"] disable = false disable_connections = false plugin_config_path = \"/etc/nri/conf.d\" plugin_path = \"/home/kubernetes/nri/plugins\" plugin_registration_timeout = \"5s\" plugin_request_timeout = \"5s\" socket_path = \"/var/run/nri/nri.sock\"">> /etc/containerd/config.toml systemctl restart containerd.service fi containers: - image: "gcr.io/gke-release/nri-device-injector@sha256:7704e2bd74b8edbb76b6913c7904cc2362f1fa887c4d4aba7b19778ea353537c" name: device-injector resources: limits: cpu: 150m memory: 160Mi ephemeral-storage: 10Mi securityContext: privileged: true volumeMounts: - name: dev mountPath: /dev - name: nri mountPath: /var/run/nri volumes: - name: root hostPath: path: / - name: nri hostPath: path: /var/run/nri - name: dev hostPath: path: /dev