# Copyright 2018 The Kubeflow Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. name: Submitting a Cloud ML training job as a pipeline step description: | A Kubeflow Pipeline component to submit a Cloud Machine Learning (Cloud ML) Engine training job as a step in a pipeline. metadata: labels: add-pod-env: 'true' inputs: - name: project_id description: 'Required. The ID of the parent project of the job.' type: GCPProjectID - name: python_module description: 'The Python module name to run after installing the packages.' default: '' type: String - name: package_uris description: >- The Cloud Storage location of the packages (that contain the training program and any additional dependencies). The maximum number of package URIs is 100. default: '' type: List - name: region description: 'The Compute Engine region in which the training job is run.' default: '' type: GCPRegion - name: args description: 'The command line arguments to pass to the program.' default: '' type: List - name: job_dir description: >- A Cloud Storage path in which to store the training outputs and other data needed for training. This path is passed to your TensorFlow program as the `job-dir` command-line argument. The benefit of specifying this field is that Cloud ML validates the path for use in training. default: '' type: GCSPath - name: python_version description: >- The version of Python used in training. If not set, the default version is `2.7`. Python `3.5` is available when runtimeVersion is set to `1.4` and above. default: '' type: String - name: runtime_version description: >- The Cloud ML Engine runtime version to use for training. If not set, Cloud ML Engine uses the default stable version, 1.0. default: '' type: String - name: master_image_uri description: >- The Docker image to run on the master replica. This image must be in Container Registry. default: '' type: GCRPath - name: worker_image_uri description: >- The Docker image to run on the worker replica. This image must be in Container Registry. default: '' type: GCRPath - name: training_input description: >- The input parameters to create a training job. It is the JSON payload of a [TrainingInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#TrainingInput) default: '' type: Dict - name: job_id_prefix description: 'The prefix of the generated job id.' default: '' type: String - name: job_id description: >- The ID of the job to create, takes precedence over generated job id if set. default: '' type: String - name: wait_interval description: >- Optional. A time-interval to wait for between calls to get the job status. Defaults to 30.' default: '30' type: Integer outputs: - name: job_id description: 'The ID of the created job.' type: String - name: job_dir description: >- The output path in Cloud Storage of the training job, which contains the trained model files. type: GCSPath - name: MLPipeline UI metadata type: UI metadata implementation: container: image: gcr.io/ml-pipeline/ml-pipeline-gcp:1.7.0-rc.3 command: ["python", -u, -m, "kfp_component.launcher"] args: [ --ui_metadata_path, {outputPath: MLPipeline UI metadata}, kfp_component.google.ml_engine, train, --project_id, {inputValue: project_id}, --python_module, {inputValue: python_module}, --package_uris, {inputValue: package_uris}, --region, {inputValue: region}, --args, {inputValue: args}, --job_dir, {inputValue: job_dir}, --python_version, {inputValue: python_version}, --runtime_version, {inputValue: runtime_version}, --master_image_uri, {inputValue: master_image_uri}, --worker_image_uri, {inputValue: worker_image_uri}, --training_input, {inputValue: training_input}, --job_id_prefix, {inputValue: job_id_prefix}, --job_id, {inputValue: job_id}, --wait_interval, {inputValue: wait_interval}, --job_id_output_path, {outputPath: job_id}, --job_dir_output_path, {outputPath: job_dir}, ] env: KFP_POD_NAME: "{{pod.name}}"