Skip to content

Instantly share code, notes, and snippets.

@t0ny-peng
Created October 15, 2019 01:13
Show Gist options
  • Save t0ny-peng/e21eab5ceeefcb09849be7e8be2b3ec3 to your computer and use it in GitHub Desktop.
Save t0ny-peng/e21eab5ceeefcb09849be7e8be2b3ec3 to your computer and use it in GitHub Desktop.
Kubernetes Cloudformation template
---
AWSTemplateFormatVersion: '2010-09-09'
Description: 'smart.AI - CI configuration'
Metadata:
Author:
Description: John Doe <[email protected]>
License:
Description: 'This configuration creates three node groups:
- gitlab-runner: On-demand instance. t3a.medium (Always runing)
- x86_64: Spot instance. c5.4xlarge. 0-16 machines'
Parameters:
# Cluster Information
ClusterName:
Description: The cluster name provided when the cluster was created. If it is incorrect, nodes will not be able to join the cluster.
Type: String
Default: gitlab-ci
KeyName:
Description: The EC2 Key Pair to allow SSH access to the instances
Type: AWS::EC2::KeyPair::KeyName
SpotPrice:
Description: Spot Price (Only Spot market price is charged, up to max of On-Demand price. Value here is only used to prevent EC2 instance launch if Spot market price exceeds this value.)
Type: String
Default: 1.5
ClusterControlPlaneSecurityGroup:
Description: The security group of the cluster control plane.
Type: AWS::EC2::SecurityGroup::Id
VpcId:
Description: The VPC of the worker instances
Type: AWS::EC2::VPC::Id
Subnets:
Description: The subnets where workers can be created.
Type: List<AWS::EC2::Subnet::Id>
# Instance Image ID
X8664NodeImageId:
Type: AWS::EC2::Image::Id
Description: Node base image of x86_64 machine. Use the one with ade prebaked!
Default: ami-05eae5f67e9d222d0
# Gitlab Runner Node Image ID
GitlabRunnerNodeImageId:
Type: AWS::EC2::Image::Id
Description: Node base image of Gitlab runner node. Find the latest AMI id here - https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html
Default: ami-05d586e6f773f6abf
# Machine Type
SpotX8664NodeInstanceType:
Description: EC2 instance type for the x86_64 spot instances.
Type: String
Default: c5.18xlarge
AllowedValues:
- c5.4xlarge
- c5.9xlarge
- c5.18xlarge
ConstraintDescription: Must be a valid EC2 instance type
SpotX8664IsolatedNodeInstanceType:
Description: EC2 instance type for the x86_64 Isolated spot instances.
Type: String
Default: c5.4xlarge
AllowedValues:
- c5.2xlarge
- c5.4xlarge
- c5.9xlarge
ConstraintDescription: Must be a valid EC2 instance type
GitlabRunnerNodeInstanceType:
Description: EC2 instance type for the Gitlab Runner node instances.
Type: String
Default: t3a.medium
AllowedValues:
- t3a.medium
ConstraintDescription: Must be a valid EC2 instance type
# Auto Scaler Min/Max Size
GitlabRunnerNodeAutoScalingGroupMinSize:
Type: Number
Description: Minimum size of Node Group ASG.
Default: 1
GitlabRunnerNodeAutoScalingGroupMaxSize:
Type: Number
Description: Maximum size of Node Group ASG.
Default: 1
SpotNodeAutoScalingGroupMinSize:
Type: Number
Description: Minimum size of Node Group ASG. [Shared by x86_64 and aarch64]
Default: 0
SpotNodeAutoScalingGroupMaxSize:
Type: Number
Description: Maximum size of Node Group ASG. [Shared by x86_64 and aarch64]
Default: 16
# Node Volume Size
SpotNodeVolumeSize:
Type: Number
Description: Node volume size (GB) of Spot Instance. [Shared by x86_64 and aarch64]
Default: 256
GitlabRunnerNodeVolumeSize:
Type: Number
Description: Node volume size (GB)
Default: 32
# Bootstrap Args
GitlabRunnerNodeBootstrapArguments:
Description: DO NOT CHANGE - Sets Node Label to identify OnDemand Instances
Default: "--kubelet-extra-args --node-labels=lifecycle=OnDemand,type=gitlab-runner"
Type: String
SpotX8664NodeBootstrapArguments:
Description: DO NOT CHANGE - Sets Node Label to identify Spot instances.
Default: "--kubelet-extra-args --node-labels=lifecycle=Ec2Spot,type=x86_64-test"
Type: String
SpotX8664IsolatedNodeBootstrapArguments:
Description: DO NOT CHANGE - Sets Node Label to identify Spot Isolated instances.
Default: "--kubelet-extra-args --node-labels=lifecycle=Ec2Spot,type=x86_64-isolated-test"
Type: String
# Group Names
SpotX8664NodeGroupName:
Description: Unique identifier for the x86_64 Spot Node Group.
Type: String
Default: 'SpotX8664NodeGroup'
SpotX8664IsolatedNodeGroupName:
Description: Unique identifier for the x86_64 Spot Isolated Node Group.
Type: String
Default: 'SpotX8664IsolatedNodeGroup'
GitlabRunnerNodeGroupName:
Description: Unique identifier for the OnDemand Node Group.
Type: String
Default: 'GitlabRunnerNodeGroup'
Metadata:
AWS::CloudFormation::Interface:
ParameterGroups:
-
Label:
default: "EKS Cluster Information"
Parameters:
- ClusterName
- ClusterControlPlaneSecurityGroup
- VpcId
- Subnets
- KeyName
-
Label:
default: "Gitlab Runner Worker Node Configuration"
Parameters:
- GitlabRunnerNodeGroupName
- GitlabRunnerNodeInstanceType
- GitlabRunnerNodeAutoScalingGroupMinSize
- GitlabRunnerNodeAutoScalingGroupMaxSize
- GitlabRunnerNodeVolumeSize
- GitlabRunnerNodeBootstrapArguments
-
Label:
default: "x86_64 Spot Node Configuration"
Parameters:
- SpotX8664NodeGroupName
- SpotX8664NodeInstanceType
- SpotX8664NodeBootstrapArguments
-
Label:
default: "x86_64 Spot Isolated Node Configuration"
Parameters:
- SpotX8664IsolatedNodeGroupName
- SpotX8664IsolatedNodeInstanceType
- SpotX8664IsolatedNodeBootstrapArguments
-
Label:
default: "Spot Instance Settings (both x86_64 and aarch64)"
Parameters:
- SpotNodeAutoScalingGroupMinSize
- SpotNodeAutoScalingGroupMaxSize
- SpotNodeVolumeSize
Resources:
NodeInstanceProfile:
Type: AWS::IAM::InstanceProfile
Properties:
InstanceProfileName: !Sub "${AWS::StackName}-NodeInstanceProfile"
Path: "/"
Roles:
- !Ref NodeInstanceRole
NodeInstanceRole:
Type: AWS::IAM::Role
Properties:
RoleName: !Sub "${AWS::StackName}-GitlabCIRole"
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Principal:
Service:
- ec2.amazonaws.com
Action:
- sts:AssumeRole
Path: "/"
ManagedPolicyArns:
- arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy
- arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy
- arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly
- arn:aws:iam::122027239387:policy/allow-s3-access-to-public-and-rti-connext-micro
Policies:
- PolicyName: !Sub "${AWS::StackName}-ClusterAutoscaler"
PolicyDocument:
Version: '2012-10-17'
Statement:
- Sid: K8NodeASGPerms
Effect: Allow
Action:
- autoscaling:DescribeAutoScalingGroups
- autoscaling:DescribeAutoScalingInstances
- autoscaling:DescribeLaunchConfigurations
- autoscaling:SetDesiredCapacity
- autoscaling:DescribeTags
- autoscaling:TerminateInstanceInAutoScalingGroup
- autoscaling:DescribeTags
Resource: "*"
NodeSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupName: !Sub "${AWS::StackName}-NodeSecurityGroup"
GroupDescription: Security group for all nodes in the cluster
VpcId:
!Ref VpcId
Tags:
- Key: !Sub "kubernetes.io/cluster/${ClusterName}"
Value: 'owned'
NodeSecurityGroupIngress:
Type: AWS::EC2::SecurityGroupIngress
DependsOn: NodeSecurityGroup
Properties:
Description: Allow node to communicate with each other
GroupId: !Ref NodeSecurityGroup
SourceSecurityGroupId: !Ref NodeSecurityGroup
IpProtocol: '-1'
FromPort: 0
ToPort: 65535
NodeSecurityGroupFromControlPlaneIngress:
Type: AWS::EC2::SecurityGroupIngress
DependsOn: NodeSecurityGroup
Properties:
Description: Allow worker Kubelets and pods to receive communication from the cluster control plane
GroupId: !Ref NodeSecurityGroup
SourceSecurityGroupId: !Ref ClusterControlPlaneSecurityGroup
IpProtocol: tcp
FromPort: 1025
ToPort: 65535
ControlPlaneEgressToNodeSecurityGroup:
Type: AWS::EC2::SecurityGroupEgress
DependsOn: NodeSecurityGroup
Properties:
Description: Allow the cluster control plane to communicate with worker Kubelet and pods
GroupId: !Ref ClusterControlPlaneSecurityGroup
DestinationSecurityGroupId: !Ref NodeSecurityGroup
IpProtocol: tcp
FromPort: 1025
ToPort: 65535
ControlPlaneEgressToNodeSecurityGroupOn443:
Type: AWS::EC2::SecurityGroupEgress
DependsOn: NodeSecurityGroup
Properties:
Description: Allow the cluster control plane to communicate with pods running extension API servers on port 443
GroupId: !Ref ClusterControlPlaneSecurityGroup
DestinationSecurityGroupId: !Ref NodeSecurityGroup
IpProtocol: tcp
FromPort: 443
ToPort: 443
ClusterControlPlaneSecurityGroupIngress:
Type: AWS::EC2::SecurityGroupIngress
DependsOn: NodeSecurityGroup
Properties:
Description: Allow pods to communicate with the cluster API Server
GroupId: !Ref ClusterControlPlaneSecurityGroup
SourceSecurityGroupId: !Ref NodeSecurityGroup
IpProtocol: tcp
ToPort: 443
FromPort: 443
SpotX8664NodeGroup:
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
AutoScalingGroupName: !Sub "${AWS::StackName}-gitlab-x86-64-asg"
# DesiredCapacity: !Ref SpotNodeAutoScalingGroupMinSize
DesiredCapacity: 0
LaunchConfigurationName: !Ref SpotX8664NodeLaunchConfig
MinSize: !Ref SpotNodeAutoScalingGroupMinSize
MaxSize: !Ref SpotNodeAutoScalingGroupMaxSize
VPCZoneIdentifier:
!Ref Subnets
Tags:
- Key: Name
Value: !Sub "${AWS::StackName}-${SpotX8664NodeGroupName}-Node"
PropagateAtLaunch: 'true'
- Key: StackName
Value: !Sub "${AWS::StackName}"
PropagateAtLaunch: 'true'
- Key: !Sub 'kubernetes.io/cluster/${ClusterName}'
Value: 'owned'
PropagateAtLaunch: 'true'
- Key: Spot
Value: 'true'
PropagateAtLaunch: 'true'
- Key: 'k8s.io/cluster-autoscaler/node-template/label/type'
Value: 'x86_64-test'
PropagateAtLaunch: 'false'
UpdatePolicy:
AutoScalingRollingUpdate:
MinInstancesInService: '0'
MaxBatchSize: '1'
SpotX8664IsolatedNodeGroup:
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
AutoScalingGroupName: !Sub "${AWS::StackName}-gitlab-x86-64-isolated-asg"
# DesiredCapacity: !Ref SpotNodeAutoScalingGroupMinSize
DesiredCapacity: 0
LaunchConfigurationName: !Ref SpotX8664IsolatedNodeLaunchConfig
MinSize: !Ref SpotNodeAutoScalingGroupMinSize
MaxSize: !Ref SpotNodeAutoScalingGroupMaxSize
VPCZoneIdentifier:
!Ref Subnets
Tags:
- Key: Name
Value: !Sub "${AWS::StackName}-${SpotX8664IsolatedNodeGroupName}-Node"
PropagateAtLaunch: 'true'
- Key: StackName
Value: !Sub "${AWS::StackName}"
PropagateAtLaunch: 'true'
- Key: !Sub 'kubernetes.io/cluster/${ClusterName}'
Value: 'owned'
PropagateAtLaunch: 'true'
- Key: Spot
Value: 'true'
PropagateAtLaunch: 'true'
- Key: 'k8s.io/cluster-autoscaler/node-template/label/type'
Value: 'x86_64-isolated-test'
PropagateAtLaunch: 'false'
UpdatePolicy:
AutoScalingRollingUpdate:
MinInstancesInService: '0'
MaxBatchSize: '1'
GitlabRunnerNodeGroup:
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
AutoScalingGroupName: !Sub "${AWS::StackName}-gitlab-runner-asg"
DesiredCapacity: !Ref GitlabRunnerNodeAutoScalingGroupMinSize
LaunchConfigurationName: !Ref GitlabRunnerNodeLaunchConfig
MinSize: !Ref GitlabRunnerNodeAutoScalingGroupMinSize
MaxSize: !Ref GitlabRunnerNodeAutoScalingGroupMaxSize
VPCZoneIdentifier:
!Ref Subnets
Tags:
- Key: Name
Value: !Sub "${AWS::StackName}-${GitlabRunnerNodeGroupName}-Node"
PropagateAtLaunch: 'true'
- Key: StackName
Value: !Sub "${AWS::StackName}"
PropagateAtLaunch: 'true'
- Key: !Sub 'kubernetes.io/cluster/${ClusterName}'
Value: 'owned'
PropagateAtLaunch: 'true'
- Key: Spot
Value: 'false'
PropagateAtLaunch: 'true'
UpdatePolicy:
AutoScalingRollingUpdate:
MinInstancesInService: '1'
MaxBatchSize: '1'
SpotX8664NodeLaunchConfig:
Type: AWS::AutoScaling::LaunchConfiguration
Properties:
LaunchConfigurationName: !Sub "${AWS::StackName}-SpotX8664NodeLaunchConfig"
AssociatePublicIpAddress: 'true'
IamInstanceProfile: !Ref NodeInstanceProfile
ImageId: !Ref X8664NodeImageId
InstanceType: !Ref SpotX8664NodeInstanceType
KeyName: !Ref KeyName
SpotPrice: !Ref SpotPrice
SecurityGroups:
- !Ref NodeSecurityGroup
- sg-069122e6c373f574e
BlockDeviceMappings:
- DeviceName: /dev/xvda
Ebs:
VolumeSize: !Ref SpotNodeVolumeSize
VolumeType: gp2
DeleteOnTermination: true
UserData:
Fn::Base64:
!Sub |
#!/bin/bash
set -o xtrace
echo '172.31.37.143 gitlab.smart.ai registry.smart.ai' >> /etc/hosts
cat > /etc/sysctl.d/11-smart-developer.conf <<EOF
# Enable coredumps
kernel.core_uses_pid = 0
kernel.core_pattern = /tmp/core_smart.%e.%p.%h.%t
EOF
sysctl --system
/etc/eks/bootstrap.sh ${ClusterName} ${SpotX8664NodeBootstrapArguments}
/opt/aws/bin/cfn-signal --exit-code $? \
--stack ${AWS::StackName} \
--resource SpotX8664NodeGroup \
--region ${AWS::Region}
SpotX8664IsolatedNodeLaunchConfig:
Type: AWS::AutoScaling::LaunchConfiguration
Properties:
LaunchConfigurationName: !Sub "${AWS::StackName}-SpotX8664IsolatedNodeLaunchConfig"
AssociatePublicIpAddress: 'true'
IamInstanceProfile: !Ref NodeInstanceProfile
ImageId: !Ref X8664NodeImageId
InstanceType: !Ref SpotX8664IsolatedNodeInstanceType
KeyName: !Ref KeyName
SpotPrice: !Ref SpotPrice
SecurityGroups:
- !Ref NodeSecurityGroup
- sg-069122e6c373f574e
BlockDeviceMappings:
- DeviceName: /dev/xvda
Ebs:
VolumeSize: !Ref SpotNodeVolumeSize
VolumeType: gp2
DeleteOnTermination: true
UserData:
Fn::Base64:
!Sub |
#!/bin/bash
set -o xtrace
echo '172.31.37.143 gitlab.smart.ai registry.smart.ai' >> /etc/hosts
cat > /etc/sysctl.d/11-smart-developer.conf <<EOF
# Enable coredumps
kernel.core_uses_pid = 0
kernel.core_pattern = /tmp/core_smart.%e.%p.%h.%t
EOF
sysctl --system
/etc/eks/bootstrap.sh ${ClusterName} ${SpotX8664IsolatedNodeBootstrapArguments}
/opt/aws/bin/cfn-signal --exit-code $? \
--stack ${AWS::StackName} \
--resource SpotX8664IsolatedNodeGroup \
--region ${AWS::Region}
GitlabRunnerNodeLaunchConfig:
Type: AWS::AutoScaling::LaunchConfiguration
Properties:
LaunchConfigurationName: !Sub "${AWS::StackName}-GitlabRunnerNodeLaunchConfig"
AssociatePublicIpAddress: 'true'
IamInstanceProfile: !Ref NodeInstanceProfile
ImageId: !Ref GitlabRunnerNodeImageId
InstanceType: !Ref GitlabRunnerNodeInstanceType
KeyName: !Ref KeyName
SecurityGroups:
- !Ref NodeSecurityGroup
- sg-069122e6c373f574e
BlockDeviceMappings:
- DeviceName: /dev/xvda
Ebs:
VolumeSize: !Ref GitlabRunnerNodeVolumeSize
VolumeType: gp2
DeleteOnTermination: true
UserData:
Fn::Base64:
!Sub |
#!/bin/bash
set -o xtrace
echo '172.31.37.143 gitlab.smart.ai registry.smart.ai' >> /etc/hosts
/etc/eks/bootstrap.sh ${ClusterName} ${GitlabRunnerNodeBootstrapArguments}
/opt/aws/bin/cfn-signal --exit-code $? \
--stack ${AWS::StackName} \
--resource GitlabRunnerNodeGroup \
--region ${AWS::Region}
Outputs:
NodeInstanceRole:
Description: The node instance role. Needed for aws-cm-auth.yaml
Value: !GetAtt NodeInstanceRole.Arn
MyStacksRegion:
Value: !Ref "AWS::Region"
Description: The region where the stack is created
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment