openapi: 3.1.0 info: title: Google Cloud Dataproc API description: >- The Cloud Dataproc API manages Hadoop-based clusters and jobs on Google Cloud Platform. It provides programmatic access to create and manage clusters, submit and monitor Apache Spark, Apache Hadoop, Apache Hive, and Apache Pig jobs, and manage workflow templates for orchestrating multi-step data processing pipelines. version: v1 contact: name: Google Cloud url: https://cloud.google.com/dataproc/docs/reference/rest license: name: Apache 2.0 url: https://www.apache.org/licenses/LICENSE-2.0 servers: - url: https://dataproc.googleapis.com/v1 description: Cloud Dataproc API v1 tags: - name: Clusters description: Operations on Dataproc clusters - name: Jobs description: Operations on Dataproc jobs - name: WorkflowTemplates description: Operations on workflow templates paths: /projects/{project}/regions/{region}/clusters: get: tags: - Clusters summary: Google Cloud Dataproc List clusters description: Lists all regions/{region}/clusters in a project. operationId: listClusters parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string - name: filter in: query schema: type: string - name: pageSize in: query schema: type: integer - name: pageToken in: query schema: type: string responses: '200': description: Successful response content: application/json: schema: $ref: '#/components/schemas/ListClustersResponse' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform post: tags: - Clusters summary: Google Cloud Dataproc Create a cluster description: Creates a cluster in a project. The returned Operation.metadata will be ClusterOperationMetadata. operationId: createCluster parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/Cluster' responses: '200': description: Cluster creation initiated content: application/json: schema: $ref: '#/components/schemas/Operation' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform /projects/{project}/regions/{region}/clusters/{cluster}: get: tags: - Clusters summary: Google Cloud Dataproc Get a cluster description: Gets the resource representation for a cluster in a project. operationId: getCluster parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string - name: cluster in: path required: true schema: type: string responses: '200': description: Successful response content: application/json: schema: $ref: '#/components/schemas/Cluster' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform patch: tags: - Clusters summary: Google Cloud Dataproc Update a cluster description: Updates a cluster in a project. The returned Operation.metadata will be ClusterOperationMetadata. operationId: updateCluster parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string - name: cluster in: path required: true schema: type: string - name: updateMask in: query required: true schema: type: string requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/Cluster' responses: '200': description: Cluster update initiated content: application/json: schema: $ref: '#/components/schemas/Operation' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform delete: tags: - Clusters summary: Google Cloud Dataproc Delete a cluster description: Deletes a cluster in a project. The returned Operation.metadata will be ClusterOperationMetadata. operationId: deleteCluster parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string - name: cluster in: path required: true schema: type: string responses: '200': description: Cluster deletion initiated content: application/json: schema: $ref: '#/components/schemas/Operation' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform /projects/{project}/regions/{region}/jobs: get: tags: - Jobs summary: Google Cloud Dataproc List jobs description: Lists regions/{region}/jobs in a project. operationId: listJobs parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string - name: pageSize in: query schema: type: integer - name: pageToken in: query schema: type: string - name: clusterName in: query schema: type: string - name: jobStateMatcher in: query schema: type: string enum: - ALL - ACTIVE - NON_ACTIVE responses: '200': description: Successful response content: application/json: schema: $ref: '#/components/schemas/ListJobsResponse' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform /projects/{project}/regions/{region}/jobs:submit: post: tags: - Jobs summary: Google Cloud Dataproc Submit a job description: Submits a job to a cluster. operationId: submitJob parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/SubmitJobRequest' responses: '200': description: Job submitted successfully content: application/json: schema: $ref: '#/components/schemas/Job' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform /projects/{project}/regions/{region}/jobs/{jobId}: get: tags: - Jobs summary: Google Cloud Dataproc Get a job description: Gets the resource representation for a job in a project. operationId: getJob parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string - name: jobId in: path required: true schema: type: string responses: '200': description: Successful response content: application/json: schema: $ref: '#/components/schemas/Job' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform delete: tags: - Jobs summary: Google Cloud Dataproc Delete a job description: Deletes the job from the project. After deletion, the job metadata is not retrievable. operationId: deleteJob parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string - name: jobId in: path required: true schema: type: string responses: '200': description: Job deleted successfully security: - oauth2: - https://www.googleapis.com/auth/cloud-platform /projects/{project}/regions/{region}/jobs/{jobId}:cancel: post: tags: - Jobs summary: Google Cloud Dataproc Cancel a job description: Starts a job cancellation request. operationId: cancelJob parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string - name: jobId in: path required: true schema: type: string responses: '200': description: Job cancellation initiated content: application/json: schema: $ref: '#/components/schemas/Job' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform /projects/{project}/regions/{region}/workflowTemplates: get: tags: - WorkflowTemplates summary: Google Cloud Dataproc List workflow templates description: Lists workflows that match the specified filter in the request. operationId: listWorkflowTemplates parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string - name: pageSize in: query schema: type: integer - name: pageToken in: query schema: type: string responses: '200': description: Successful response content: application/json: schema: $ref: '#/components/schemas/ListWorkflowTemplatesResponse' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform post: tags: - WorkflowTemplates summary: Google Cloud Dataproc Create a workflow template description: Creates new workflow template. operationId: createWorkflowTemplate parameters: - name: project in: path required: true schema: type: string - name: region in: path required: true schema: type: string requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/WorkflowTemplate' responses: '200': description: Workflow template created content: application/json: schema: $ref: '#/components/schemas/WorkflowTemplate' security: - oauth2: - https://www.googleapis.com/auth/cloud-platform components: securitySchemes: oauth2: type: oauth2 flows: authorizationCode: authorizationUrl: https://accounts.google.com/o/oauth2/auth tokenUrl: https://oauth2.googleapis.com/token scopes: https://www.googleapis.com/auth/cloud-platform: Full access to Cloud Platform schemas: Cluster: type: object properties: projectId: type: string clusterName: type: string description: The cluster name, unique within a project. config: $ref: '#/components/schemas/ClusterConfig' labels: type: object additionalProperties: type: string status: $ref: '#/components/schemas/ClusterStatus' statusHistory: type: array items: $ref: '#/components/schemas/ClusterStatus' clusterUuid: type: string ClusterConfig: type: object properties: configBucket: type: string tempBucket: type: string gceClusterConfig: type: object properties: zoneUri: type: string networkUri: type: string subnetworkUri: type: string internalIpOnly: type: boolean serviceAccountScopes: type: array items: type: string tags: type: array items: type: string metadata: type: object additionalProperties: type: string masterConfig: $ref: '#/components/schemas/InstanceGroupConfig' workerConfig: $ref: '#/components/schemas/InstanceGroupConfig' secondaryWorkerConfig: $ref: '#/components/schemas/InstanceGroupConfig' softwareConfig: type: object properties: imageVersion: type: string properties: type: object additionalProperties: type: string optionalComponents: type: array items: type: string initializationActions: type: array items: type: object properties: executableFile: type: string executionTimeout: type: string InstanceGroupConfig: type: object properties: numInstances: type: integer machineTypeUri: type: string diskConfig: type: object properties: bootDiskType: type: string bootDiskSizeGb: type: integer numLocalSsds: type: integer imageUri: type: string preemptibility: type: string enum: - PREEMPTIBILITY_UNSPECIFIED - NON_PREEMPTIBLE - PREEMPTIBLE - SPOT ClusterStatus: type: object properties: state: type: string enum: - UNKNOWN - CREATING - RUNNING - ERROR - ERROR_DUE_TO_UPDATE - DELETING - UPDATING - STOPPING - STOPPED - STARTING - REPAIRING stateStartTime: type: string format: date-time detail: type: string Job: type: object properties: reference: type: object properties: projectId: type: string jobId: type: string placement: type: object properties: clusterName: type: string clusterUuid: type: string hadoopJob: type: object properties: mainJarFileUri: type: string mainClass: type: string args: type: array items: type: string jarFileUris: type: array items: type: string properties: type: object additionalProperties: type: string sparkJob: type: object properties: mainJarFileUri: type: string mainClass: type: string args: type: array items: type: string jarFileUris: type: array items: type: string properties: type: object additionalProperties: type: string pysparkJob: type: object properties: mainPythonFileUri: type: string args: type: array items: type: string pythonFileUris: type: array items: type: string properties: type: object additionalProperties: type: string hiveJob: type: object properties: queryFileUri: type: string queryList: type: object properties: queries: type: array items: type: string continueOnFailure: type: boolean scriptVariables: type: object additionalProperties: type: string properties: type: object additionalProperties: type: string pigJob: type: object properties: queryFileUri: type: string queryList: type: object properties: queries: type: array items: type: string continueOnFailure: type: boolean scriptVariables: type: object additionalProperties: type: string status: type: object properties: state: type: string enum: - STATE_UNSPECIFIED - PENDING - SETUP_DONE - RUNNING - CANCEL_PENDING - CANCEL_STARTED - CANCELLED - DONE - ERROR - ATTEMPT_FAILURE stateStartTime: type: string format: date-time details: type: string labels: type: object additionalProperties: type: string driverOutputResourceUri: type: string driverControlFilesUri: type: string jobUuid: type: string SubmitJobRequest: type: object properties: job: $ref: '#/components/schemas/Job' requestId: type: string required: - job WorkflowTemplate: type: object properties: id: type: string name: type: string version: type: integer createTime: type: string format: date-time updateTime: type: string format: date-time labels: type: object additionalProperties: type: string placement: type: object properties: managedCluster: type: object properties: clusterName: type: string config: $ref: '#/components/schemas/ClusterConfig' clusterSelector: type: object properties: zone: type: string clusterLabels: type: object additionalProperties: type: string jobs: type: array items: type: object properties: stepId: type: string hadoopJob: type: object sparkJob: type: object pysparkJob: type: object hiveJob: type: object pigJob: type: object prerequisiteStepIds: type: array items: type: string ListClustersResponse: type: object properties: clusters: type: array items: $ref: '#/components/schemas/Cluster' nextPageToken: type: string ListJobsResponse: type: object properties: jobs: type: array items: $ref: '#/components/schemas/Job' nextPageToken: type: string ListWorkflowTemplatesResponse: type: object properties: templates: type: array items: $ref: '#/components/schemas/WorkflowTemplate' nextPageToken: type: string Operation: type: object properties: name: type: string done: type: boolean metadata: type: object error: type: object properties: code: type: integer message: type: string response: type: object