---
AWSTemplateFormatVersion: "2010-09-09"
Description: "Template to install Data Prepper on a single EC2 instance"
Parameters:
  AmazonEsEndpoint:
    Description: Endpoint of the AWS Elasticsearch Service domain (including https://)
    Type: String
    AllowedPattern: https:\/\/[a-z0-9-\.]+(es.amazonaws.com)
    ConstraintDescription: must be a valid Amazon Elasticsearch Service domain endpoint starting with https://
  AmazonEsRegion:
    Description: Region of the AWS Elasticsearch Service domain
    Type: String
    AllowedPattern: "[a-z]+-[a-z]+-[0-9]+"
    Default: us-east-1
    ConstraintDescription: must be a valid AWS region (e.g. us-west-2)
  AmazonEsSubnetId:
    Description: The subnet ID of the AWS Elasticsearch Service domain (Leave blank if the domain is not in a VPC)
    Type: String
  Username:
    Description: The username of the AWS Elasticsearch Service domain (Leave blank if the domain is configured with IAM role)
    Type: String
    Default: ""
  Password:
    Description: The password of the AWS Elasticsearch Service domain (Leave blank if the domain is configured with IAM role)
    Type: String
    Default: ""
  DataPrepperVersion:
    Description: Version of Data Prepper to download and run
    Type: String
    AllowedPattern: "[0-9]+\\.[0-9]+\\.[0-9]+[a-z-]*"
    Default: "1.0.0"
    ConstraintDescription: must be a valid release number
  IAMRole:
    Description: Pre-existing IAM Role to associate with the EC2 instance, to be used for authentication when calling Elasticsearch
      (Leave blank if the domain is configured with HTTP basic authentication in Fine-grained access control)
    Type: String
    AllowedPattern: "^$|[a-zA-Z0-9+=,\\.@\\-_]+"
    Default: DataPrepperRole
    ConstraintDescription: must be a valid IAM role name
  InstanceType:
    Description: EC2 instance type
    Type: String
    AllowedPattern: "[a-z0-9]+\\.[a-z0-9]+"
    Default: t2.medium
    ConstraintDescription: cannot be empty
  KeyName:
    Description: Name of an existing EC2 KeyPair to enable SSH access to the instance
    Type: AWS::EC2::KeyPair::KeyName
    ConstraintDescription: cannot be empty
  LatestAmi:
    Description: AMI to deploy to EC2, defaults to Amazon Linux 2
    Type: "AWS::SSM::Parameter::Value<AWS::EC2::Image::Id>"
    Default: "/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs"
  SSHLocation:
    Description: The IP address range that can be used to SSH to the EC2 instances
    Type: String
    MinLength: "9"
    MaxLength: "18"
    Default: 0.0.0.0/0
    AllowedPattern: (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2})
    ConstraintDescription: must be a valid IP CIDR range of the form x.x.x.x/x.
Metadata:
  AWS::CloudFormation::Interface:
    ParameterGroups:
      - Label:
          default: Common
        Parameters:
          - IAMRole
      - Label:
          default: Amazon Elasticsearch Service Domain
        Parameters:
          - AmazonEsEndpoint
          - AmazonEsRegion
          - AmazonEsSubnetId
          - Username
          - Password
      - Label:
          default: Data-Prepper Configuration
        Parameters:
          - DataPrepperVersion
          - InstanceType
          - KeyName
          - LatestAmi
          - SSHLocation
Conditions:
  DomainIsPublic: !Equals [!Ref AmazonEsSubnetId, ""]
  DomainIsInVPC: !Not [Condition: DomainIsPublic]
  NoMasterUser: !And
    - !Equals
      - !Ref Username
      - ""
    - !Equals
      - !Ref Password
      - ""
Resources:
  EC2Instance:
    Type: AWS::EC2::Instance
    Metadata:
      AWS::CloudFormation::Init:
        configSets:
          default:
            - 01_config-data-prepper
        01_config-data-prepper:
          files:
            "/etc/data-prepper/data-prepper-config.yaml":
              content: !Sub |
                ssl: false
              mode: "000400"
              owner: root
              group: root
            "/etc/data-prepper/pipelines.yaml":
              content: !Sub
                - |
                  entry-pipeline:
                    delay: "100"
                    source:
                      otel_trace_source:
                        ssl: false
                        health_check_service: true
                    sink:
                      - pipeline:
                          name: "raw-pipeline"
                      - pipeline:
                          name: "service-map-pipeline"
                  raw-pipeline:
                    source:
                      pipeline:
                        name: "entry-pipeline"
                    prepper:
                      - otel_trace_raw_prepper:
                    sink:
                      - elasticsearch: ${rawSpanConfig}
                  service-map-pipeline:
                    delay: "100"
                    source:
                      pipeline:
                        name: "entry-pipeline"
                    prepper:
                      - service_map_stateful:
                    sink:
                      - elasticsearch: ${serviceMapConfig}
                - rawSpanConfig: !If
                    - NoMasterUser
                    - !Sub "\n
                    \        hosts: [ \"${AmazonEsEndpoint}\" ]\n
                    \        aws_sigv4: true\n
                    \        aws_region: \"${AmazonEsRegion}\"\n
                    \        trace_analytics_raw: true"
                    - !Sub "\n
                    \        hosts: [ \"${AmazonEsEndpoint}\" ]\n
                    \        aws_sigv4: false\n
                    \        username: \"${Username}\"\n
                    \        password: \"${Password}\"\n
                    \        trace_analytics_raw: true"
                  serviceMapConfig: !If
                    - NoMasterUser
                    - !Sub "\n
                    \        hosts: [ \"${AmazonEsEndpoint}\" ]\n
                    \        aws_sigv4: true\n
                    \        aws_region: \"${AmazonEsRegion}\"\n
                    \        trace_analytics_service_map: true"
                    - !Sub "\n
                    \        hosts: [ \"${AmazonEsEndpoint}\" ]\n
                    \        aws_sigv4: false\n
                    \        username: \"${Username}\"\n
                    \        password: \"${Password}\"\n
                    \        trace_analytics_service_map: true"
              mode: "000400"
              owner: root
              group: root
    Properties:
      SubnetId:
        !If [DomainIsInVPC, !Ref AmazonEsSubnetId, !Ref "AWS::NoValue"]
      InstanceType:
        Ref: InstanceType
      IamInstanceProfile:
        Ref: IAMRole
      KeyName:
        Ref: KeyName
      ImageId:
        Ref: LatestAmi
      SecurityGroups:
        !If [DomainIsPublic, [!Ref InstanceSecurityGroup], !Ref "AWS::NoValue"]
      UserData:
        # Script to download and run Data Prepper
        Fn::Base64: !Sub |
          #!/bin/bash
          export RELEASE=opendistroforelasticsearch-data-prepper-${DataPrepperVersion}-linux-x64
          yum install java-11-amazon-corretto-headless -y
          wget https://github.com/opendistro-for-elasticsearch/data-prepper/releases/download/v${DataPrepperVersion}/$RELEASE.tar.gz -O /tmp/$RELEASE.tar.gz
          tar -xzf /tmp/$RELEASE.tar.gz --directory /usr/local/bin
          /opt/aws/bin/cfn-init -v --stack ${AWS::StackId} --resource EC2Instance --region ${AWS::Region} --configsets default
          nohup /usr/local/bin/$RELEASE/data-prepper-tar-install.sh /etc/data-prepper/pipelines.yaml /etc/data-prepper/data-prepper-config.yaml > /var/log/data-prepper.out &
          /opt/aws/bin/cfn-signal --stack ${AWS::StackId} --resource EC2Instance --region ${AWS::Region}
    CreationPolicy:
      ResourceSignal:
        Count: 1
        Timeout: "PT15M"
  InstanceSecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Condition: DomainIsPublic
    Properties:
      GroupDescription: Enable SSH access via port 22
      SecurityGroupIngress:
        - IpProtocol: tcp
          FromPort: "22"
          ToPort: "22"
          CidrIp:
            Ref: SSHLocation