{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "#/components/schemas/StartMLDataProcessingJobInput", "title": "StartMLDataProcessingJobInput", "type": "object", "required": [ "inputDataS3Location", "processedDataS3Location" ], "properties": { "id": { "type": "string", "description": "Unique identifier for the job (auto-generated if omitted)." }, "inputDataS3Location": { "type": "string", "description": "S3 URI for input data." }, "processedDataS3Location": { "type": "string", "description": "S3 URI for output results." }, "previousDataProcessingJobId": { "type": "string", "description": "Job ID of a previous job for incremental processing." }, "sagemakerIamRoleArn": { "type": "string", "description": "IAM role ARN for SageMaker execution." }, "neptuneIamRoleArn": { "type": "string", "description": "IAM role ARN for Neptune access." }, "processingInstanceType": { "type": "string", "description": "ML instance type (default auto-selected ml.r5 type)." }, "processingInstanceVolumeSizeInGB": { "type": "integer", "description": "Disk volume size in GB (default 0 = auto-selected)." }, "processingTimeOutInSeconds": { "type": "integer", "description": "Timeout in seconds (default 86400)." }, "modelType": { "type": "string", "description": "Model type selection.", "enum": [ "heterogeneous", "kge" ] }, "configFileName": { "type": "string", "description": "Data specification file name.", "default": "training-data-configuration.json" }, "subnets": { "type": "array", "items": { "type": "string" }, "description": "Subnet IDs in Neptune VPC." }, "securityGroupIds": { "type": "array", "items": { "type": "string" }, "description": "VPC security group IDs." }, "volumeEncryptionKMSKey": { "type": "string" }, "s3OutputEncryptionKMSKey": { "type": "string" }, "enableInterContainerTrafficEncryption": { "type": "boolean", "default": true } } }