name: SageMaker - Ground Truth description: Ground Truth Jobs in SageMaker inputs: - {name: region, type: String, description: The region for the SageMaker resource.} - {name: endpoint_url, type: String, description: The URL to use when communicating with the SageMaker service., default: ''} - {name: assume_role, type: String, description: The ARN of an IAM role to assume when connecting to SageMaker., default: ''} - {name: tags, type: JsonObject, description: 'An array of key-value pairs, to categorize AWS resources.', default: '{}'} - {name: role, type: String, description: The Amazon Resource Name (ARN) that Amazon SageMaker assumes to perform tasks on your behalf.} - {name: job_name, type: String, description: The name of the labeling job., default: ''} - {name: label_attribute_name, type: String, description: The attribute name to use for the label in the output manifest file. Default is the job name., default: ''} - {name: manifest_location, type: String, description: The Amazon S3 location of the manifest file that describes the input data objects.} - {name: output_location, type: String, description: The Amazon S3 location to write output data.} - {name: output_encryption_key, type: String, description: The AWS KMS key that Amazon SageMaker uses to encrypt the model artifacts., default: ''} - {name: task_type, type: String, description: 'Built in image classification, bounding box, text classification, or semantic segmentation, or custom. If custom, please provide pre- and post-labeling task lambda functions.'} - {name: worker_type, type: String, description: 'The workteam for data labeling, either public, private, or vendor.'} - {name: workteam_arn, type: String, description: The ARN of the work team assigned to complete the tasks., default: ''} - name: no_adult_content type: Bool description: If true, your data is free of adult content. default: "False" - name: no_ppi type: Bool description: If true, your data is free of personally identifiable information. default: "False" - {name: label_category_config, type: String, description: The S3 URL of the JSON structured file that defines the categories used to label the data objects., default: ''} - {name: max_human_labeled_objects, type: Integer, description: The maximum number of objects that can be labeled by human workers., default: '0'} - {name: max_percent_objects, type: Integer, description: The maximum percentatge of input data objects that should be labeled., default: '0'} - name: enable_auto_labeling type: Bool description: Enables auto-labeling, only for bounding box, text classification, and image classification. default: "False" - {name: initial_model_arn, type: String, description: The ARN of the final model used for a previous auto-labeling job., default: ''} - {name: resource_encryption_key, type: String, description: The AWS KMS key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance(s)., default: ''} - {name: ui_template, type: String, description: The Amazon S3 bucket location of the UI template.} - {name: pre_human_task_function, type: String, description: The ARN of a Lambda function that is run before a data object is sent to a human worker., default: ''} - {name: post_human_task_function, type: String, description: The ARN of a Lambda function implements the logic for annotation consolidation., default: ''} - {name: task_keywords, type: String, description: Keywords used to describe the task so that workers on Amazon Mechanical Turk can discover the task., default: ''} - {name: title, type: String, description: A title for the task for your human workers.} - {name: description, type: String, description: A description of the task for your human workers.} - {name: num_workers_per_object, type: Integer, description: The number of human workers that will label an object.} - {name: time_limit, type: Integer, description: The amount of time that a worker has to complete a task in seconds} - {name: task_availibility, type: Integer, description: The length of time that a task remains available for labelling by human workers., default: '0'} - {name: max_concurrent_tasks, type: Integer, description: The maximum number of data objects that can be labeled by human workers at the same time., default: '0'} - {name: workforce_task_price, type: String, description: The price that you pay for each task performed by a public worker in USD. Specify to the tenth fractions of a cent. Format as '0.000'., default: '0.000'} outputs: - {name: output_manifest_location, description: The Amazon S3 bucket location of the manifest file for labeled data.} - {name: active_learning_model_arn, description: The ARN for the most recent Amazon SageMaker model trained as part of automated data labeling.} implementation: container: image: amazon/aws-sagemaker-kfp-components:1.1.1 command: [python3] args: - ground_truth/src/sagemaker_ground_truth_component.py - --region - {inputValue: region} - --endpoint_url - {inputValue: endpoint_url} - --assume_role - {inputValue: assume_role} - --tags - {inputValue: tags} - --role - {inputValue: role} - --job_name - {inputValue: job_name} - --label_attribute_name - {inputValue: label_attribute_name} - --manifest_location - {inputValue: manifest_location} - --output_location - {inputValue: output_location} - --output_encryption_key - {inputValue: output_encryption_key} - --task_type - {inputValue: task_type} - --worker_type - {inputValue: worker_type} - --workteam_arn - {inputValue: workteam_arn} - --no_adult_content - {inputValue: no_adult_content} - --no_ppi - {inputValue: no_ppi} - --label_category_config - {inputValue: label_category_config} - --max_human_labeled_objects - {inputValue: max_human_labeled_objects} - --max_percent_objects - {inputValue: max_percent_objects} - --enable_auto_labeling - {inputValue: enable_auto_labeling} - --initial_model_arn - {inputValue: initial_model_arn} - --resource_encryption_key - {inputValue: resource_encryption_key} - --ui_template - {inputValue: ui_template} - --pre_human_task_function - {inputValue: pre_human_task_function} - --post_human_task_function - {inputValue: post_human_task_function} - --task_keywords - {inputValue: task_keywords} - --title - {inputValue: title} - --description - {inputValue: description} - --num_workers_per_object - {inputValue: num_workers_per_object} - --time_limit - {inputValue: time_limit} - --task_availibility - {inputValue: task_availibility} - --max_concurrent_tasks - {inputValue: max_concurrent_tasks} - --workforce_task_price - {inputValue: workforce_task_price} - --output_manifest_location_output_path - {outputPath: output_manifest_location} - --active_learning_model_arn_output_path - {outputPath: active_learning_model_arn}