AWSTemplateFormatVersion: '2010-09-09'
Description: >
  Recipe to configure an HPC cluster with cross-stack imports.

### Stack metadata
Metadata:
  AWS::CloudFormation::Interface:
    ParameterGroups:
      - Label:
          default: Cluster Design
        Parameters:
          - OS
          - Architecture
          - ComputeInstanceMax
      - Label:
          default: Networking and Access
        Parameters:
          - NetworkStackName
      - Label:
          default: Storage Configuration
        Parameters:
          - StorageStackName

Conditions:
  GovCloud: !Equals [!Ref AWS::Partition, 'aws-us-gov']
  China: !Equals [!Ref AWS::Partition, 'aws-cn']

Parameters:

  ComputeInstanceMax:
    Description: Maximum number of compute instances in the queue
    Type: Number
    MinValue: 1
    Default: 4

  OS:
    Type: String
    Default: alinux2
    AllowedValues:
      - alinux2
      - centos7
      - ubuntu2004
      - ubuntu2204
      - rhel8
    Description: Cluster operating system

  Architecture:
    Type: String
    Default: Graviton
    AllowedValues:
      - Graviton
      - GPU
      - x86
    Description: Choose a node architecture.

  NetworkStackName:
    Description: Name of CloudFormation stack providing VPC and subnets
    Type: String

  StorageStackName:
    Description: Name of CloudFormation stack providing an FSx for Lustre filesystem
    Type: String

Mappings:
  ParallelCluster:
    Constants:
      Version: 3.7.2
  Recipes:
    Constants:
      Version: main
  InstanceTypeForArchitecture:
    HeadNode:
      Graviton: c7g.medium
      GPU: c6a.large
      x86: c6a.large
    LoginNodes:
      Graviton: c7g.large
      GPU: c6a.xlarge
      x86: c6a.xlarge
    ComputeNodes:
      Graviton: c7g.xlarge
      GPU: g5.xlarge
      x86: c6a.xlarge

Resources:

  PclusterClusterProvider:
    Type: AWS::CloudFormation::Stack
    DeletionPolicy: Delete
    UpdateReplacePolicy: Delete
    Properties:
      TemplateURL: !Sub
        - https://${AWS::Region}-aws-parallelcluster.s3.${AWS::Region}.${AWS::URLSuffix}/parallelcluster/${Version}/templates/custom_resource/cluster.yaml
        - { Version: !FindInMap [ParallelCluster, Constants, Version] }
      TimeoutInMinutes: 10

  PclusterCluster:
    Type: Custom::PclusterCluster
    DeletionPolicy: Delete
    UpdateReplacePolicy: Delete
    Properties:
      ServiceToken: !GetAtt [ PclusterClusterProvider , Outputs.ServiceToken ]
      ClusterName: !Sub 'c-${AWS::StackName}'
      ClusterConfiguration:
        Image:
          Os: !Ref OS
        HeadNode:
          InstanceType: !FindInMap [ InstanceTypeForArchitecture, HeadNode, !Ref Architecture ]
          Networking:
            SubnetId: {"Fn::ImportValue" : {"Fn::Sub" : "${NetworkStackName}-DefaultPublicSubnet"}}
            AdditionalSecurityGroups:
              - {"Fn::ImportValue" : {"Fn::Sub" : "${StorageStackName}-FSxLustreSecurityGroupId"}}
          Dcv:
            Enabled: false
          LocalStorage:
            RootVolume:
              Size: 40
          Iam:
            AdditionalIamPolicies:
              - Policy: arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
        Scheduling:
          Scheduler: slurm
          SlurmSettings:
            QueueUpdateStrategy: TERMINATE
          SlurmQueues:
          - Name: compute
            ComputeResources:
            - Name: nodes
              InstanceType: !FindInMap [ InstanceTypeForArchitecture, ComputeNodes, !Ref Architecture ]
              MinCount: 0
              MaxCount: !Ref ComputeInstanceMax
              DisableSimultaneousMultithreading: true
            Networking:
              SubnetIds:
                - {"Fn::ImportValue" : {"Fn::Sub" : "${NetworkStackName}-DefaultPrivateSubnet"}}
              PlacementGroup:
                Enabled: true
              AdditionalSecurityGroups:
                - {"Fn::ImportValue" : {"Fn::Sub" : "${StorageStackName}-FSxLustreSecurityGroupId"}}
            ComputeSettings:
              LocalStorage:
                RootVolume:
                  Size: 40
        SharedStorage:
        - StorageType: Efs
          Name: shared
          MountDir: /shared
          EfsSettings:
            DeletionPolicy: Delete
        - StorageType: FsxLustre
          Name: fsxlustre
          MountDir: /fsx
          FsxLustreSettings:
            FileSystemId: {"Fn::ImportValue" : {"Fn::Sub" : "${StorageStackName}-FSxLustreFilesystemId"}}

Outputs:
  HeadNodeIp:
    Description: EC2 instance ID for the HeadNode
    Value: !GetAtt [ PclusterCluster, headNode.instanceId ]
    Export:
      Name: !Sub ${AWS::StackName}-HeadNodeInstanceId
  SystemManagerUrl:
    Description: URL to access the HeadNode via SystemManager
    Value: !Sub
      - https://${ConsoleDomain}/systems-manager/session-manager/${InstanceId}?region=${AWS::Region}
      - { ConsoleDomain: !If [ GovCloud, 'console.amazonaws-us-gov.com', !If [ China, 'console.amazonaws.cn', !Sub '${AWS::Region}.console.aws.amazon.com']],
          InstanceId: !GetAtt [ PclusterCluster, headNode.instanceId ]
        }
    Export:
      Name: !Sub ${AWS::StackName}-SystemManagerUrl
  ValidationMessages:
    Description: Warnings from cluster create or update operations.
    Value: !GetAtt PclusterCluster.validationMessages