--- AWSTemplateFormatVersion: "2010-09-09" Description: "Template to install Data Prepper on a single EC2 instance" Parameters: AmazonEsEndpoint: Description: Endpoint of the Amazon Elasticsearch Service domain (including https://) Type: String AllowedPattern: https:\/\/[a-z0-9-\.]+(es.amazonaws.com) ConstraintDescription: must be a valid Amazon Elasticsearch Service domain endpoint starting with https:// AmazonEsRegion: Description: Region of the Amazon Elasticsearch Service domain Type: String AllowedPattern: "[a-z]+-[a-z]+-[0-9]+" Default: us-east-1 ConstraintDescription: must be a valid AWS region (e.g. us-west-2) AmazonEsSubnetId: Description: The subnet ID of the Amazon Elasticsearch Service domain (Leave blank if the domain is not in a VPC) Type: String DataPrepperVersion: Description: Version of Data Prepper to download and run Type: String AllowedPattern: "[0-9]+\\.[0-9]+\\.[0-9]+[a-z-]*" Default: 0.7.1-alpha ConstraintDescription: must be a valid release number IAMRole: Description: Pre-existing IAM Role to associate with the EC2 instance, to be used for authentication when calling Elasticsearch Type: String AllowedPattern: "[a-zA-Z0-9+=,\\.@\\-_]+" Default: DataPrepperRole ConstraintDescription: must be a valid IAM role name EC2InstanceType: Description: EC2 instance type Type: String AllowedPattern: "[a-z0-9]+\\.[a-z0-9]+" Default: t2.medium ConstraintDescription: cannot be empty EC2VolumeSize: Description: EC2 instance storage volume size (GB) Type: Number Default: 8 KeyName: Description: Name of an existing EC2 KeyPair to enable SSH access to the instance Type: AWS::EC2::KeyPair::KeyName ConstraintDescription: cannot be empty LatestAmi: Description: AMI to deploy to EC2, defaults to Amazon Linux 2 Type: "AWS::SSM::Parameter::Value" Default: "/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs" PipelineBatchSize: Description: Max number of records the Data Prepper internal buffer drains after each read Type: Number Default: 8 PipelineBufferSize: Description: Max number of records the Data Prepper internal buffer accepts Type: Number Default: 512 PipelineWorkerCount: Description: The number of worker threads dedicated to the Data Prepper pipeline Type: Number Default: 1 HeapInMb: Description: The heap in MB that Data Prepper should use. This heap value will be used for both min and max. Type: Number Default: 256 SSHLocation: Description: The IP address range that can be used to SSH to the EC2 instances Type: String MinLength: "9" MaxLength: "18" Default: 0.0.0.0/0 AllowedPattern: (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2}) ConstraintDescription: must be a valid IP CIDR range of the form x.x.x.x/x. Conditions: DomainIsPublic: !Equals [!Ref AmazonEsSubnetId, ""] DomainIsInVPC: !Not [Condition: DomainIsPublic] Resources: EC2Instance: Type: AWS::EC2::Instance Metadata: AWS::CloudFormation::Init: configSets: default: - 01_config-data-prepper 01_config-data-prepper: files: "/etc/data-prepper/config.yaml": content: !Sub | otel-trace-pipeline: delay: 100 workers: ${PipelineWorkerCount} buffer: bounded_blocking: buffer_size: ${PipelineBufferSize} batch_size: ${PipelineBatchSize} source: otel_trace_source: sink: - pipeline: name: "raw-pipeline" - pipeline: name: "service-map-pipeline" raw-pipeline: workers: ${PipelineWorkerCount} buffer: bounded_blocking: buffer_size: ${PipelineBufferSize} batch_size: ${PipelineBatchSize} source: pipeline: name: "otel-trace-pipeline" processor: - otel_trace_raw_processor: sink: - elasticsearch: hosts: [ "${AmazonEsEndpoint}" ] aws_sigv4: true aws_region: "${AmazonEsRegion}" trace_analytics_raw: true service-map-pipeline: delay: "100" buffer: bounded_blocking: buffer_size: ${PipelineBufferSize} batch_size: ${PipelineBatchSize} source: pipeline: name: "otel-trace-pipeline" processor: - service_map_stateful: sink: - elasticsearch: hosts: [ "${AmazonEsEndpoint}" ] aws_sigv4: true aws_region: "${AmazonEsRegion}" trace_analytics_service_map: true mode: "000400" owner: root group: root Properties: BlockDeviceMappings: - DeviceName: /dev/xvda Ebs: VolumeSize: !Ref EC2VolumeSize SubnetId: !If [DomainIsInVPC, !Ref AmazonEsSubnetId, !Ref "AWS::NoValue"] InstanceType: Ref: EC2InstanceType IamInstanceProfile: Ref: IAMRole KeyName: Ref: KeyName ImageId: Ref: LatestAmi SecurityGroups: !If [DomainIsPublic, [!Ref InstanceSecurityGroup], !Ref "AWS::NoValue"] UserData: # Script to download and run Data Prepper Fn::Base64: !Sub | #!/bin/bash export JAVA_OPTS="-Xms${HeapInMb}m -Xmx${HeapInMb}m" echo $JAVA_OPTS > /var/log/java_opts.txt export RELEASE=opendistroforelasticsearch-data-prepper-${DataPrepperVersion}-linux-x64 yum install java-11-amazon-corretto-headless -y wget https://github.com/opendistro-for-elasticsearch/data-prepper/releases/download/v${DataPrepperVersion}/$RELEASE.tar.gz -O /tmp/$RELEASE.tar.gz tar -xzf /tmp/$RELEASE.tar.gz --directory /usr/local/bin /opt/aws/bin/cfn-init -v --stack ${AWS::StackId} --resource EC2Instance --region ${AWS::Region} --configsets default nohup /usr/local/bin/$RELEASE/data-prepper-tar-install.sh /etc/data-prepper/config.yaml > /var/log/data-prepper.out 2>&1& /opt/aws/bin/cfn-signal --stack ${AWS::StackId} --resource EC2Instance --region ${AWS::Region} CreationPolicy: ResourceSignal: Count: 1 Timeout: "PT15M" InstanceSecurityGroup: Type: AWS::EC2::SecurityGroup Condition: DomainIsPublic Properties: GroupDescription: Enable SSH access via port 22 SecurityGroupIngress: - IpProtocol: tcp FromPort: "22" ToPort: "22" CidrIp: Ref: SSHLocation