apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: train-on-synthetic-real-world-dataset- annotations: {pipelines.kubeflow.org/kfp_sdk_version: 1.0.1, pipelines.kubeflow.org/pipeline_compilation_time: '2021-02-03T14:58:28.155041', pipelines.kubeflow.org/pipeline_spec: '{"description": "Train on Synthetic + Real World Dataset", "inputs": [{"default": "unitytechnologies/datasetinsights:latest", "name": "docker", "optional": true, "type": "String"}, {"default": "https://storage.googleapis.com/datasetinsights/data/groceries/v3.zip", "name": "source_uri", "optional": true, "type": "String"}, {"default": "datasetinsights/configs/faster_rcnn_fine_tune.yaml", "name": "config", "optional": true, "type": "String"}, {"default": "", "name": "override", "optional": true, "type": "String"}, {"default": "https://storage.googleapis.com/datasetinsights/models/Synthetic/FasterRCNN.estimator", "name": "checkpoint_file", "optional": true, "type": "String"}, {"default": "gs:///runs/yyyymmdd-hhmm", "name": "tb_log_dir", "optional": true, "type": "String"}, {"default": "gs:///checkpoints/yyyymmdd-hhmm", "name": "checkpoint_dir", "optional": true, "type": "String"}, {"default": "100Gi", "name": "volume_size", "optional": true, "type": "String"}], "name": "Train on Synthetic + Real World Dataset"}'} labels: {pipelines.kubeflow.org/kfp_sdk_version: 1.0.1} spec: entrypoint: train-on-synthetic-real-world-dataset templates: - name: download container: args: ['--source-uri={{inputs.parameters.source_uri}}', --output=/data, --include-binary] command: [datasetinsights, download] env: - {name: GOOGLE_APPLICATION_CREDENTIALS, value: /secret/gcp-credentials/user-gcp-sa.json} - {name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE, value: /secret/gcp-credentials/user-gcp-sa.json} image: '{{inputs.parameters.docker}}' resources: limits: {memory: 64Gi} requests: {memory: 64Gi} volumeMounts: - {mountPath: /data, name: pvc} - {mountPath: /secret/gcp-credentials, name: gcp-credentials-user-gcp-sa} inputs: parameters: - {name: docker} - {name: pvc-name} - {name: source_uri} volumes: - name: gcp-credentials-user-gcp-sa secret: {secretName: user-gcp-sa} - name: pvc persistentVolumeClaim: {claimName: '{{inputs.parameters.pvc-name}}'} - name: pvc resource: action: create manifest: | apiVersion: v1 kind: PersistentVolumeClaim metadata: name: '{{workflow.name}}-pvc' spec: accessModes: - ReadWriteOnce resources: requests: storage: '{{inputs.parameters.volume_size}}' inputs: parameters: - {name: volume_size} outputs: parameters: - name: pvc-manifest valueFrom: {jsonPath: '{}'} - name: pvc-name valueFrom: {jsonPath: '{.metadata.name}'} - name: pvc-size valueFrom: {jsonPath: '{.status.capacity.storage}'} - name: train container: args: ['--config={{inputs.parameters.config}}', --train-data=/data, --val-data=/data, '--tb-log-dir={{inputs.parameters.tb_log_dir}}', --kfp-log-dir=/kfp_logs, --kfp-ui-metadata-filename=kfp_ui_metadata.json, '--checkpoint-dir={{inputs.parameters.checkpoint_dir}}', '--checkpoint-file={{inputs.parameters.checkpoint_file}}', '--override={{inputs.parameters.override}}'] command: [datasetinsights, train] env: - name: MLFLOW_HOST_ID valueFrom: secretKeyRef: {name: dev-mlflow-secret, key: MLFLOW_HOST_ID} - name: MLFLOW_CLIENT_ID valueFrom: secretKeyRef: {name: dev-mlflow-secret, key: MLFLOW_CLIENT_ID} - {name: GOOGLE_APPLICATION_CREDENTIALS, value: /secret/gcp-credentials/user-gcp-sa.json} - {name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE, value: /secret/gcp-credentials/user-gcp-sa.json} image: '{{inputs.parameters.docker}}' resources: limits: {nvidia.com/gpu: 1, memory: 64Gi} requests: {memory: 64Gi} volumeMounts: - {mountPath: /data, name: pvc} - {mountPath: /secret/gcp-credentials, name: gcp-credentials-user-gcp-sa} inputs: parameters: - {name: checkpoint_dir} - {name: checkpoint_file} - {name: config} - {name: docker} - {name: override} - {name: pvc-name} - {name: tb_log_dir} outputs: artifacts: - {name: mlpipeline-ui-metadata, path: /kfp_logs/kfp_ui_metadata.json} nodeSelector: {cloud.google.com/gke-accelerator: nvidia-tesla-v100} volumes: - name: gcp-credentials-user-gcp-sa secret: {secretName: user-gcp-sa} - name: pvc persistentVolumeClaim: {claimName: '{{inputs.parameters.pvc-name}}'} - name: train-on-synthetic-real-world-dataset inputs: parameters: - {name: checkpoint_dir} - {name: checkpoint_file} - {name: config} - {name: docker} - {name: override} - {name: source_uri} - {name: tb_log_dir} - {name: volume_size} dag: tasks: - name: download template: download dependencies: [pvc] arguments: parameters: - {name: docker, value: '{{inputs.parameters.docker}}'} - {name: pvc-name, value: '{{tasks.pvc.outputs.parameters.pvc-name}}'} - {name: source_uri, value: '{{inputs.parameters.source_uri}}'} - name: pvc template: pvc arguments: parameters: - {name: volume_size, value: '{{inputs.parameters.volume_size}}'} - name: train template: train dependencies: [download, pvc] arguments: parameters: - {name: checkpoint_dir, value: '{{inputs.parameters.checkpoint_dir}}'} - {name: checkpoint_file, value: '{{inputs.parameters.checkpoint_file}}'} - {name: config, value: '{{inputs.parameters.config}}'} - {name: docker, value: '{{inputs.parameters.docker}}'} - {name: override, value: '{{inputs.parameters.override}}'} - {name: pvc-name, value: '{{tasks.pvc.outputs.parameters.pvc-name}}'} - {name: tb_log_dir, value: '{{inputs.parameters.tb_log_dir}}'} arguments: parameters: - {name: docker, value: 'unitytechnologies/datasetinsights:latest'} - {name: source_uri, value: 'https://storage.googleapis.com/datasetinsights/data/groceries/v3.zip'} - {name: config, value: datasetinsights/configs/faster_rcnn_fine_tune.yaml} - {name: override} - {name: checkpoint_file, value: 'https://storage.googleapis.com/datasetinsights/models/Synthetic/FasterRCNN.estimator'} - {name: tb_log_dir, value: 'gs:///runs/yyyymmdd-hhmm'} - {name: checkpoint_dir, value: 'gs:///checkpoints/yyyymmdd-hhmm'} - {name: volume_size, value: 100Gi} serviceAccountName: pipeline-runner