{{- if not .Values.dra.enabled -}} apiVersion: v1 kind: ConfigMap metadata: name: {{ include "hami-vgpu.scheduler" . }}-device namespace: {{ include "hami-vgpu.namespace" . }} labels: app.kubernetes.io/component: hami-scheduler {{- include "hami-vgpu.labels" . | nindent 4 }} data: device-config.yaml: |- {{- if .Files.Glob "files/device-config.yaml" }} {{- .Files.Get "files/device-config.yaml" | nindent 4}} {{- else }} nvidia: resourceCountName: {{ .Values.resourceName }} resourceMemoryName: {{ .Values.resourceMem }} resourceMemoryPercentageName: {{ .Values.resourceMemPercentage }} resourceCoreName: {{ .Values.resourceCores }} resourcePriorityName: {{ .Values.resourcePriority }} overwriteEnv: false defaultMemory: 0 defaultCores: 0 defaultGPUNum: 1 preConfiguredDeviceMemory: {{ .Values.devicePlugin.preConfiguredDeviceMemory | default 0 }} memoryFactor: 1 deviceSplitCount: {{ .Values.devicePlugin.deviceSplitCount }} deviceMemoryScaling: {{ .Values.devicePlugin.deviceMemoryScaling }} deviceCoreScaling: {{ .Values.devicePlugin.deviceCoreScaling }} gpuCorePolicy: {{ .Values.devices.nvidia.gpuCorePolicy }} libCudaLogLevel: {{ .Values.devices.nvidia.libCudaLogLevel }} runtimeClassName: "{{ .Values.devicePlugin.runtimeClassName }}" knownMigGeometries: - models: [ "A30" ] allowedGeometries: - - name: 1g.6gb core: 25 memory: 6144 count: 4 - - name: 2g.12gb core: 50 memory: 12288 count: 2 - - name: 4g.24gb core: 100 memory: 24576 count: 1 - models: [ "A100-SXM4-40GB", "A100-40GB-PCIe", "A100-PCIE-40GB"] allowedGeometries: - - name: 1g.5gb core: 14 memory: 5120 count: 7 - - name: 1g.5gb core: 14 memory: 5120 count: 1 - name: 2g.10gb core: 28 memory: 10240 count: 3 - - name: 3g.20gb core: 42 memory: 20480 count: 2 - - name: 7g.40gb core: 100 memory: 40960 count: 1 - models: [ "A100-SXM4-80GB", "A100-80GB-PCIe", "A100-PCIE-80GB"] allowedGeometries: - - name: 1g.10gb core: 14 memory: 10240 count: 7 - - name: 1g.10gb core: 14 memory: 10240 count: 1 - name: 2g.20gb core: 28 memory: 20480 count: 3 - - name: 3g.40gb core: 42 memory: 40960 count: 2 - - name: 7g.79gb core: 100 memory: 80896 count: 1 - models: [ "H100-PCIE-80GB", "H100-SXM5-80GB"] allowedGeometries: - - name: 1g.10gb core: 14 memory: 10240 count: 7 - - name: 1g.10gb core: 14 memory: 10240 count: 1 - name: 2g.20gb core: 28 memory: 20480 count: 3 - - name: 3g.40gb core: 42 memory: 40960 count: 2 - - name: 7g.80gb core: 100 memory: 81920 count: 1 - models: [ "H100-PCIE-94GB", "H100-SXM5-94GB"] allowedGeometries: - - name: 1g.12gb core: 14 memory: 12288 count: 7 - - name: 1g.12gb core: 14 memory: 12288 count: 1 - name: 2g.24gb core: 28 memory: 24576 count: 3 - - name: 3g.47gb core: 42 memory: 48128 count: 2 - - name: 7g.94gb core: 100 memory: 96256 count: 1 - models: [ "H20", "H100 on GH200"] allowedGeometries: - - name: 1g.12gb core: 14 memory: 12288 count: 7 - - name: 1g.12gb core: 14 memory: 12288 count: 1 - name: 2g.24gb core: 28 memory: 24576 count: 3 - - name: 3g.48gb core: 42 memory: 49152 count: 2 - - name: 7g.96gb core: 100 memory: 98304 count: 1 - models: [ "H200 NVL", "H200-SXM5"] allowedGeometries: - - name: 1g.18gb core: 14 memory: 18432 count: 7 - - name: 1g.18gb core: 14 memory: 18432 count: 1 - name: 2g.35gb core: 28 memory: 35840 count: 3 - - name: 3g.71gb core: 42 memory: 72704 count: 2 - - name: 7g.141gb core: 100 memory: 144384 count: 1 - models: [ "B200" ] allowedGeometries: - - name: 1g.23gb core: 14 memory: 23552 count: 7 - - name: 1g.23gb core: 14 memory: 23552 count: 1 - name: 2g.45gb core: 28 memory: 46080 count: 3 - - name: 3g.90gb core: 42 memory: 92160 count: 2 - - name: 7g.180gb core: 100 memory: 184320 count: 1 cambricon: resourceCountName: {{ .Values.mluResourceName }} resourceMemoryName: {{ .Values.mluResourceMem }} resourceCoreName: {{ .Values.mluResourceCores }} hygon: resourceCountName: {{ .Values.dcuResourceName }} resourceMemoryName: {{ .Values.dcuResourceMem }} resourceCoreName: {{ .Values.dcuResourceCores }} memoryFactor: 1 metax: resourceCountName: "metax-tech.com/gpu" resourceVCountName: {{ .Values.metaxResourceName }} resourceVMemoryName: {{ .Values.metaxResourceMem }} resourceVCoreName: {{ .Values.metaxResourceCore }} sgpuTopologyAware: {{ .Values.metaxsGPUTopologyAware }} enflame: resourceNameGCU: "enflame.com/gcu" resourceNameVGCU: {{ .Values.enflameResourceNameVGCU }} resourceNameVGCUPercentage: {{ .Values.enflameResourceNameVGCUPercentage }} mthreads: resourceCountName: "mthreads.com/vgpu" resourceMemoryName: "mthreads.com/sgpu-memory" resourceCoreName: "mthreads.com/sgpu-core" iluvatars: - chipName: MR-V100 commonWord: MR-V100 resourceCountName: iluvatar.ai/MR-V100-vgpu resourceMemoryName: iluvatar.ai/MR-V100.vMem resourceCoreName: iluvatar.ai/MR-V100.vCore - chipName: MR-V50 commonWord: MR-V50 resourceCountName: iluvatar.ai/MR-V50-vgpu resourceMemoryName: iluvatar.ai/MR-V50.vMem resourceCoreName: iluvatar.ai/MR-V50.vCore - chipName: BI-V150 commonWord: BI-V150 resourceCountName: iluvatar.ai/BI-V150-vgpu resourceMemoryName: iluvatar.ai/BI-V150.vMem resourceCoreName: iluvatar.ai/BI-V150.vCore - chipName: BI-V100 commonWord: BI-V100 resourceCountName: iluvatar.ai/BI-V100-vgpu resourceMemoryName: iluvatar.ai/BI-V100.vMem resourceCoreName: iluvatar.ai/BI-V100.vCore kunlun: resourceCountName: {{ .Values.kunlunResourceName }} resourceVCountName: {{ .Values.kunlunResourceVCountName }} resourceVMemoryName: {{ .Values.kunlunResourceVMemoryName }} awsneuron: resourceCountName: "aws.amazon.com/neuron" resourceCoreName: "aws.amazon.com/neuroncore" amd: resourceCountName: "amd.com/gpu" vastai: resourceCountName: {{ .Values.vastaiResourceName }} vnpus: - chipName: 910A commonWord: Ascend910A resourceName: huawei.com/Ascend910A resourceMemoryName: huawei.com/Ascend910A-memory memoryAllocatable: 32768 memoryCapacity: 32768 memoryFactor: 1 aiCore: 30 templates: - name: vir02 memory: 2184 aiCore: 2 - name: vir04 memory: 4369 aiCore: 4 - name: vir08 memory: 8738 aiCore: 8 - name: vir16 memory: 17476 aiCore: 16 - chipName: 910B2 commonWord: Ascend910B2 resourceName: huawei.com/Ascend910B2 resourceMemoryName: huawei.com/Ascend910B2-memory memoryAllocatable: 65536 memoryCapacity: 65536 memoryFactor: 1 aiCore: 24 aiCPU: 6 templates: - name: vir03_1c_8g memory: 8192 aiCore: 3 aiCPU: 1 - name: vir06_1c_16g memory: 16384 aiCore: 6 aiCPU: 1 - name: vir12_3c_32g memory: 32768 aiCore: 12 aiCPU: 3 - chipName: 910B3 commonWord: Ascend910B3 resourceName: huawei.com/Ascend910B3 resourceMemoryName: huawei.com/Ascend910B3-memory memoryAllocatable: 65536 memoryCapacity: 65536 memoryFactor: 1 aiCore: 20 aiCPU: 7 templates: - name: vir05_1c_16g memory: 16384 aiCore: 5 aiCPU: 1 - name: vir10_3c_32g memory: 32768 aiCore: 10 aiCPU: 3 - chipName: 910B4-1 commonWord: Ascend910B4-1 resourceName: huawei.com/Ascend910B4-1 resourceMemoryName: huawei.com/Ascend910B4-1-memory memoryAllocatable: 65536 memoryCapacity: 65536 memoryFactor: 1 aiCore: 20 aiCPU: 7 templates: # NOTE: Names of vnpu template for 910B4-1 are fixed by Ascend runtime and must not be changed. # The memory is used for scheduling so the correct values must be set. # Template vir05_1c_8g actually provides 16GB memory, - name: vir05_1c_8g memory: 16384 aiCore: 5 aiCPU: 1 # Template vir10_3c_16g actually provides 32GB memory - name: vir10_3c_16g memory: 32768 aiCore: 10 aiCPU: 3 - chipName: 910B4 commonWord: Ascend910B4 resourceName: huawei.com/Ascend910B4 resourceMemoryName: huawei.com/Ascend910B4-memory memoryAllocatable: 32768 memoryCapacity: 32768 memoryFactor: 1 aiCore: 20 aiCPU: 7 templates: - name: vir05_1c_8g memory: 8192 aiCore: 5 aiCPU: 1 - name: vir10_3c_16g memory: 16384 aiCore: 10 aiCPU: 3 - chipName: 310P3 commonWord: Ascend310P resourceName: huawei.com/Ascend310P resourceMemoryName: huawei.com/Ascend310P-memory memoryAllocatable: 21527 memoryCapacity: 24576 memoryFactor: 1 aiCore: 8 aiCPU: 7 templates: - name: vir01 memory: 3072 aiCore: 1 aiCPU: 1 - name: vir02 memory: 6144 aiCore: 2 aiCPU: 2 - name: vir04 memory: 12288 aiCore: 4 aiCPU: 4 - chipName: Ascend910 commonWord: Ascend910C resourceName: huawei.com/Ascend910C resourceMemoryName: huawei.com/Ascend910C-memory memoryAllocatable: 65536 memoryCapacity: 65536 aiCore: 20 aiCPU: 7 {{ end }} {{- end }}