--- apiVersion: kubeflow.org/v1beta1 kind: Experiment metadata: namespace: kubeflow name: grid spec: objective: type: minimize goal: 0.001 objectiveMetricName: loss algorithm: algorithmName: grid parallelTrialCount: 3 maxTrialCount: 12 maxFailedTrialCount: 3 parameters: - name: lr parameterType: double feasibleSpace: min: "0.01" step: "0.005" max: "0.05" - name: momentum parameterType: double feasibleSpace: min: "0.5" step: "0.1" max: "0.9" trialTemplate: primaryContainerName: training-container trialParameters: - name: learningRate description: Learning rate for the training model reference: lr - name: momentum description: Momentum for the training model reference: momentum trialSpec: apiVersion: batch/v1 kind: Job spec: template: spec: containers: - name: training-container image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest command: - "python3" - "/opt/pytorch-mnist/mnist.py" - "--epochs=1" - "--batch-size=16" - "--lr=${trialParameters.learningRate}" - "--momentum=${trialParameters.momentum}" restartPolicy: Never