--- AWSTemplateFormatVersion: '2010-09-09' Description: AWS CloudFormation template to create a Cloud9 environment setup. Metadata: Author: Description: Christian Melendez License: Description: 'Copyright 2022 Amazon.com, Inc. and its affiliates. All Rights Reserved. Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/asl/ or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.' Parameters: C9InstanceType: Description: Example Cloud9 instance type Type: String Default: t3.micro AllowedValues: - t3.micro - m5.large ConstraintDescription: Must be a valid Cloud9 instance type #Used only by Event Engine, if you are self-deploying the stack leave the default value to NONE EETeamRoleArn: Description: "ARN of the Team Role" Default: NONE Type: String ConstraintDescription: This is ONLY used Event Engine, dont change this if you are self-deploying the stack Conditions: NotEventEngine: !Equals [!Ref EETeamRoleArn, NONE] Resources: ################## PERMISSIONS AND ROLES ################# C9Role: Type: AWS::IAM::Role Condition: NotEventEngine Properties: Tags: - Key: Environment Value: AWS Example AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - ec2.amazonaws.com - ssm.amazonaws.com Action: - sts:AssumeRole ManagedPolicyArns: - arn:aws:iam::aws:policy/AdministratorAccess Path: "/" C9LambdaExecutionRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - sts:AssumeRole Path: "/" Policies: - PolicyName: Fn::Join: - '' - - C9LambdaPolicy- - Ref: AWS::Region PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - logs:CreateLogGroup - logs:CreateLogStream - logs:PutLogEvents Resource: arn:aws:logs:*:*:* - Effect: Allow Action: - cloudformation:DescribeStacks - cloudformation:DescribeStackEvents - cloudformation:DescribeStackResource - cloudformation:DescribeStackResources - ec2:DescribeInstances - ec2:AssociateIamInstanceProfile - ec2:ModifyInstanceAttribute - ec2:ReplaceIamInstanceProfileAssociation - iam:ListInstanceProfiles - iam:PassRole Resource: "*" ################## LAMBDA BOOTSTRAP FUNCTION ################ C9BootstrapInstanceLambda: Description: Bootstrap Cloud9 instance Type: Custom::C9BootstrapInstanceLambda DependsOn: - C9BootstrapInstanceLambdaFunction - C9Instance - C9LambdaExecutionRole Properties: Tags: - Key: Environment Value: AWS Example ServiceToken: Fn::GetAtt: - C9BootstrapInstanceLambdaFunction - Arn REGION: Ref: AWS::Region StackName: Ref: AWS::StackName EnvironmentId: Ref: C9Instance LabIdeInstanceProfileArn: !If [ NotEventEngine, !GetAtt C9InstanceProfile.Arn, !Sub 'arn:aws:iam::${AWS::AccountId}:instance-profile/TeamRoleInstanceProfile' ] C9BootstrapInstanceLambdaFunction: Type: AWS::Lambda::Function Properties: Tags: - Key: Environment Value: AWS Example Handler: index.lambda_handler Role: Fn::GetAtt: - C9LambdaExecutionRole - Arn Runtime: python3.9 MemorySize: 256 Timeout: '600' Code: ZipFile: | from __future__ import print_function import boto3 # import logging import json import os import time import traceback import cfnresponse # logging.basicConfig(level=logging.INFO) # logger = logging.getLogger(__name__) def lambda_handler(event, context): # logger.info('event: {}'.format(event)) # logger.info('context: {}'.format(context)) responseData = {} if event['RequestType'] == 'Create': try: # Open AWS clients ec2 = boto3.client('ec2') # Get the InstanceId of the Cloud9 IDE # print(str({'Name': 'tag:aws:cloud9:environment','Values': [event['ResourceProperties']['EnvironmentId']]})) instance = ec2.describe_instances(Filters=[{'Name': 'tag:aws:cloud9:environment','Values': [event['ResourceProperties']['EnvironmentId']]}])['Reservations'][0]['Instances'][0] # logger.info('instance: {}'.format(instance)) # Create the IamInstanceProfile request object iam_instance_profile = { 'Arn': event['ResourceProperties']['LabIdeInstanceProfileArn'] } # logger.info('iam_instance_profile: {}'.format(iam_instance_profile)) # Wait for Instance to become ready before adding Role instance_state = instance['State']['Name'] # logger.info('instance_state: {}'.format(instance_state)) while instance_state != 'running': time.sleep(5) instance_state = ec2.describe_instances(InstanceIds=[instance['InstanceId']]) # logger.info('instance_state: {}'.format(instance_state)) # attach instance profile response = ec2.associate_iam_instance_profile(IamInstanceProfile=iam_instance_profile, InstanceId=instance['InstanceId']) # logger.info('response - associate_iam_instance_profile: {}'.format(response)) r_ec2 = boto3.resource('ec2') responseData = {'Success': 'Started bootstrapping for instance: '+instance['InstanceId']} cfnresponse.send(event, context, cfnresponse.SUCCESS, responseData, 'CustomResourcePhysicalID') except Exception as e: responseData = {'Error': str(e)} cfnresponse.send(event, context, cfnresponse.FAILED, responseData, 'CustomResourcePhysicalID') AthenaResultsBucket: Type: AWS::S3::Bucket DeletionPolicy: Delete Properties: BucketName: !Join [ "-", ["athena-results", Ref: "AWS::AccountId"]] SparkResultsWorkGroup: Type: AWS::Athena::WorkGroup Properties: Name: SparkResultsWorkGroup RecursiveDeleteOption: true WorkGroupConfiguration: PublishCloudWatchMetricsEnabled: true ResultConfiguration: OutputLocation: !Join [ "", ["s3://" , Ref: AthenaResultsBucket, "/"]] SparkResultsTable: Type: AWS::Athena::NamedQuery Properties: Database: "default" Description: "Create table EMRWorkshopResults" Name: "EMRWorkshopResults" WorkGroup: !Ref SparkResultsWorkGroup QueryString: !Sub | CREATE EXTERNAL TABLE EMRWorkshopResults ( words string, count bigint) stored as parquet LOCATION 's3://${SparkAppBucket}/results'; SparkResultsSpotTable: Type: AWS::Athena::NamedQuery Properties: Database: "default" Description: "Create table EMRWorkshopResultsSpot" Name: "EMRWorkshopResultsSpot" WorkGroup: !Ref SparkResultsWorkGroup QueryString: !Sub | CREATE EXTERNAL TABLE EMRWorkshopResultsSpot ( words string, count bigint) stored as parquet LOCATION 's3://${SparkAppBucket}/resultsspot'; SparkAppBucket: Type: AWS::S3::Bucket DeletionPolicy: Delete Properties: BucketName: !Join [ "-", ["spark-app", Ref: "AWS::AccountId"]] ################## SSM BOOTSRAP HANDLER ############### C9OutputBucket: Type: AWS::S3::Bucket DeletionPolicy: Delete C9SSMDocument: Type: AWS::SSM::Document Properties: Tags: - Key: Environment Value: AWS Example Content: Yaml DocumentType: Command Content: schemaVersion: '2.2' description: Bootstrap Cloud9 Instance mainSteps: - action: aws:runShellScript name: C9bootstrap inputs: runCommand: - "#!/bin/bash" - date - . /home/ec2-user/.bashrc - whoami - sudo -H -u ec2-user aws sts get-caller-identity - echo '=== Install JQ and envsubst ===' - sudo yum -y install jq gettext - echo '=== Update to the latest AWS CLI ===' - sudo -H -u ec2-user aws --version - curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" - unzip awscliv2.zip - sudo ./aws/install - . /home/ec2-user/.bash_profile - sudo -H -u ec2-user aws --version - echo '=== setup AWS configs ===' - rm -vf /home/ec2-user/.aws/credentials - export ACCOUNT_ID=$(aws sts get-caller-identity --output text --query Account) - export AWS_REGION=$(curl -s 169.254.169.254/latest/dynamic/instance-identity/document | jq -r '.region') - echo "export ACCOUNT_ID=${ACCOUNT_ID}" >> /home/ec2-user/.bash_profile - echo "export AWS_REGION=${AWS_REGION}" >> /home/ec2-user/.bash_profile - !Sub echo "export S3_BUCKET=${SparkAppBucket}" >> /home/ec2-user/.bash_profile - sudo -H -u ec2-user aws configure set default.region ${AWS_REGION} - sudo -H -u ec2-user aws configure get default.region - sudo -H -u ec2-user aws sts get-caller-identity - echo '=== Generate SSH key and import to aws ===' - sudo -H -u ec2-user aws ec2 create-key-pair --key-name emr-workshop-key-pair --query "KeyMaterial" --output text > /home/ec2-user/environment/emr-workshop-key-pair.pem - sudo -H -u ec2-user chmod 400 /home/ec2-user/environment/emr-workshop-key-pair.pem - echo '=== Upload Spark App to S3 ===' - curl "https://ec2spotworkshops.com/running_spark_apps_with_emr_on_spot_instances/launching_emr_cluster-1.files/script.py" -o "script.py" - !Sub sudo -H -u ec2-user aws s3 cp script.py s3://${SparkAppBucket} - . /home/ec2-user/.bash_profile C9BootstrapAssociation: Type: AWS::SSM::Association DependsOn: - C9OutputBucket Properties: Name: !Ref C9SSMDocument OutputLocation: S3Location: OutputS3BucketName: !Ref C9OutputBucket OutputS3KeyPrefix: bootstrapoutput Targets: - Key: tag:SSMBootstrap Values: - Active ################## INSTANCE ##################### C9InstanceProfile: Type: AWS::IAM::InstanceProfile Condition: NotEventEngine Properties: Path: "/" Roles: - Ref: C9Role C9Instance: Description: "-" DependsOn: C9BootstrapAssociation Type: AWS::Cloud9::EnvironmentEC2 Properties: Description: AWS Cloud9 instance for Examples AutomaticStopTimeMinutes: 3600 InstanceType: Ref: C9InstanceType Name: Ref: AWS::StackName # OwnerArn: !Sub 'arn:aws:sts::${AWS::AccountId}:assumed-role/TeamRole/MasterKey' OwnerArn: !If [NotEventEngine , !Ref AWS::NoValue , !Sub 'arn:aws:sts::${AWS::AccountId}:assumed-role/TeamRole/MasterKey'] Tags: - Key: SSMBootstrap Value: Active - Key: Environment Value: Ref: AWS::StackName Outputs: Cloud9IDE: Value: Fn::Join: - '' - - https:// - Ref: AWS::Region - ".console.aws.amazon.com/cloud9/ide/" - Ref: C9Instance - "?region=" - Ref: AWS::Region SparkAppBucket: Value: !Ref SparkAppBucket Description: Bucket for the Spark application AthenaResultsBucket: Value: !Ref AthenaResultsBucket Description: Bucket for storing the Athena results