{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/api-evangelist/google-cloud-dataproc/refs/heads/main/json-schema/cluster-schema.json", "title": "Google Cloud Dataproc Cluster", "description": "A Dataproc cluster resource representing a managed Apache Hadoop and Spark cluster on Google Cloud.", "type": "object", "properties": { "projectId": { "type": "string", "description": "The Google Cloud Platform project ID that the cluster belongs to." }, "clusterName": { "type": "string", "description": "The cluster name, which must be unique within a project.", "pattern": "^[a-z][-a-z0-9]{0,53}$" }, "config": { "type": "object", "description": "The cluster config.", "properties": { "configBucket": { "type": "string", "description": "Cloud Storage bucket used for staging dependencies and config files." }, "tempBucket": { "type": "string", "description": "Cloud Storage bucket used for temporary data." }, "gceClusterConfig": { "type": "object", "properties": { "zoneUri": { "type": "string" }, "networkUri": { "type": "string" }, "subnetworkUri": { "type": "string" }, "internalIpOnly": { "type": "boolean" }, "serviceAccountScopes": { "type": "array", "items": { "type": "string" } }, "tags": { "type": "array", "items": { "type": "string" } } } }, "masterConfig": { "$ref": "#/$defs/InstanceGroupConfig" }, "workerConfig": { "$ref": "#/$defs/InstanceGroupConfig" }, "secondaryWorkerConfig": { "$ref": "#/$defs/InstanceGroupConfig" }, "softwareConfig": { "type": "object", "properties": { "imageVersion": { "type": "string", "description": "The version of software inside the cluster (e.g., 2.1-debian11)." }, "properties": { "type": "object", "additionalProperties": { "type": "string" } }, "optionalComponents": { "type": "array", "items": { "type": "string", "examples": ["JUPYTER", "ZEPPELIN", "DOCKER", "FLINK"] } } } }, "initializationActions": { "type": "array", "items": { "type": "object", "properties": { "executableFile": { "type": "string" }, "executionTimeout": { "type": "string" } }, "required": ["executableFile"] } } } }, "labels": { "type": "object", "additionalProperties": { "type": "string" }, "description": "The labels to associate with this cluster." }, "status": { "type": "object", "properties": { "state": { "type": "string", "enum": ["UNKNOWN", "CREATING", "RUNNING", "ERROR", "ERROR_DUE_TO_UPDATE", "DELETING", "UPDATING", "STOPPING", "STOPPED", "STARTING", "REPAIRING"] }, "stateStartTime": { "type": "string", "format": "date-time" }, "detail": { "type": "string" } } }, "clusterUuid": { "type": "string", "description": "A cluster UUID generated by the Dataproc service." } }, "required": ["projectId", "clusterName", "config"], "$defs": { "InstanceGroupConfig": { "type": "object", "properties": { "numInstances": { "type": "integer", "minimum": 0, "description": "The number of VM instances in the instance group." }, "machineTypeUri": { "type": "string", "description": "The Compute Engine machine type (e.g., n1-standard-4)." }, "diskConfig": { "type": "object", "properties": { "bootDiskType": { "type": "string", "enum": ["pd-standard", "pd-ssd", "pd-balanced"] }, "bootDiskSizeGb": { "type": "integer", "minimum": 10 }, "numLocalSsds": { "type": "integer", "minimum": 0 } } }, "imageUri": { "type": "string" }, "preemptibility": { "type": "string", "enum": ["PREEMPTIBILITY_UNSPECIFIED", "NON_PREEMPTIBLE", "PREEMPTIBLE", "SPOT"] } } } } }