name: SageMaker - Hyperparameter Tuning description: Hyperparameter Tuning Jobs in SageMaker inputs: - name: spot_instance type: Bool description: Use managed spot training. default: "False" - {name: max_wait_time, type: Integer, description: The maximum time in seconds you are willing to wait for a managed spot training job to complete., default: '86400'} - {name: max_run_time, type: Integer, description: The maximum run time in seconds for the training job., default: '86400'} - {name: checkpoint_config, type: JsonObject, description: Dictionary of information about the output location for managed spot training checkpoint data., default: '{}'} - {name: region, type: String, description: The region for the SageMaker resource.} - {name: endpoint_url, type: String, description: The URL to use when communicating with the SageMaker service., default: ''} - {name: assume_role, type: String, description: The ARN of an IAM role to assume when connecting to SageMaker., default: ''} - {name: tags, type: JsonObject, description: 'An array of key-value pairs, to categorize AWS resources.', default: '{}'} - {name: job_name, type: String, description: The name of the tuning job. Must be unique within the same AWS account and AWS region., default: ''} - {name: role, type: String, description: The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.} - {name: image, type: String, description: The registry path of the Docker image that contains the training algorithm., default: ''} - {name: algorithm_name, type: String, description: The name of the resource algorithm to use for the hyperparameter tuning job., default: ''} - {name: training_input_mode, type: String, description: The input mode that the algorithm supports. File or Pipe., default: File} - {name: metric_definitions, type: JsonObject, description: The dictionary of name-regex pairs specify the metrics that the algorithm emits., default: '{}'} - {name: strategy, type: String, description: How hyperparameter tuning chooses the combinations of hyperparameter values to use for the training job it launches., default: Bayesian} - {name: metric_name, type: String, description: The name of the metric to use for the objective metric.} - {name: metric_type, type: String, description: Whether to minimize or maximize the objective metric.} - name: early_stopping_type type: String description: Whether to minimize or maximize the objective metric. default: "Off" - {name: static_parameters, type: JsonObject, description: The values of hyperparameters that do not change for the tuning job., default: '{}'} - {name: integer_parameters, type: JsonArray, description: The array of IntegerParameterRange objects that specify ranges of integer hyperparameters that you want to search., default: '[]'} - {name: continuous_parameters, type: JsonArray, description: The array of ContinuousParameterRange objects that specify ranges of continuous hyperparameters that you want to search., default: '[]'} - {name: categorical_parameters, type: JsonArray, description: The array of CategoricalParameterRange objects that specify ranges of categorical hyperparameters that you want to search., default: '[]'} - {name: channels, type: JsonArray, description: A list of dicts specifying the input channels. Must have at least one.} - {name: output_location, type: String, description: The Amazon S3 path where you want Amazon SageMaker to store the results of the transform job.} - {name: output_encryption_key, type: String, description: The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts., default: ''} - {name: instance_type, type: String, description: The ML compute instance type., default: ml.m4.xlarge} - {name: instance_count, type: Integer, description: The number of ML compute instances to use in each training job., default: '1'} - {name: volume_size, type: Integer, description: The size of the ML storage volume that you want to provision., default: '30'} - {name: max_num_jobs, type: Integer, description: The maximum number of training jobs that a hyperparameter tuning job can launch.} - {name: max_parallel_jobs, type: Integer, description: The maximum number of concurrent training jobs that a hyperparameter tuning job can launch.} - {name: resource_encryption_key, type: String, description: The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s)., default: ''} - {name: vpc_security_group_ids, type: String, description: 'The VPC security group IDs, in the form sg-xxxxxxxx.', default: ''} - {name: vpc_subnets, type: String, description: The ID of the subnets in the VPC to which you want to connect your hpo job., default: ''} - name: network_isolation type: Bool description: Isolates the training container. default: "True" - name: traffic_encryption type: Bool description: Encrypts all communications between ML compute instances in distributed training. default: "False" - {name: warm_start_type, type: String, description: Specifies either "IdenticalDataAndAlgorithm" or "TransferLearning", default: ''} - {name: parent_hpo_jobs, type: String, description: List of previously completed or stopped hyperparameter tuning jobs to be used as a starting point., default: ''} outputs: - {name: hpo_job_name, description: The name of the hyper parameter tuning job.} - {name: model_artifact_url, description: The output model artifacts S3 url.} - {name: best_job_name, description: Best training job in the hyper parameter tuning job.} - {name: best_hyperparameters, description: The resulting tuned hyperparameters.} - {name: training_image, description: The registry path of the Docker image that contains the training algorithm.} implementation: container: image: public.ecr.aws/kubeflow-on-aws/aws-sagemaker-kfp-components:1.1.2 command: [python3] args: - hyperparameter_tuning/src/sagemaker_tuning_component.py - --spot_instance - {inputValue: spot_instance} - --max_wait_time - {inputValue: max_wait_time} - --max_run_time - {inputValue: max_run_time} - --checkpoint_config - {inputValue: checkpoint_config} - --region - {inputValue: region} - --endpoint_url - {inputValue: endpoint_url} - --assume_role - {inputValue: assume_role} - --tags - {inputValue: tags} - --job_name - {inputValue: job_name} - --role - {inputValue: role} - --image - {inputValue: image} - --algorithm_name - {inputValue: algorithm_name} - --training_input_mode - {inputValue: training_input_mode} - --metric_definitions - {inputValue: metric_definitions} - --strategy - {inputValue: strategy} - --metric_name - {inputValue: metric_name} - --metric_type - {inputValue: metric_type} - --early_stopping_type - {inputValue: early_stopping_type} - --static_parameters - {inputValue: static_parameters} - --integer_parameters - {inputValue: integer_parameters} - --continuous_parameters - {inputValue: continuous_parameters} - --categorical_parameters - {inputValue: categorical_parameters} - --channels - {inputValue: channels} - --output_location - {inputValue: output_location} - --output_encryption_key - {inputValue: output_encryption_key} - --instance_type - {inputValue: instance_type} - --instance_count - {inputValue: instance_count} - --volume_size - {inputValue: volume_size} - --max_num_jobs - {inputValue: max_num_jobs} - --max_parallel_jobs - {inputValue: max_parallel_jobs} - --resource_encryption_key - {inputValue: resource_encryption_key} - --vpc_security_group_ids - {inputValue: vpc_security_group_ids} - --vpc_subnets - {inputValue: vpc_subnets} - --network_isolation - {inputValue: network_isolation} - --traffic_encryption - {inputValue: traffic_encryption} - --warm_start_type - {inputValue: warm_start_type} - --parent_hpo_jobs - {inputValue: parent_hpo_jobs} - --hpo_job_name_output_path - {outputPath: hpo_job_name} - --model_artifact_url_output_path - {outputPath: model_artifact_url} - --best_job_name_output_path - {outputPath: best_job_name} - --best_hyperparameters_output_path - {outputPath: best_hyperparameters} - --training_image_output_path - {outputPath: training_image}