@@ -6,8 +6,8 @@ metadata:
66 namespace : scalability
77spec :
88 description : |
9- Create an EKS managed nodegroup for a given cluster.
10- This Task can be used to create an EKS managed nodegroup for a given VPC Subnets, security groups and service role in an AWS account.
9+ Create an EKS nodegroup, managed or unmanaged, for a given cluster.
10+ This Task can be used to create an EKS managed or unmanaged nodegroup for a given VPC Subnets, security groups and service role in an AWS account.
1111 params :
1212 - name : cluster-name
1313 description : The name of the EKS cluster you want to spin managed nodegroups for.
3636 - name : nodegroup-prefix
3737 description : Prefix that needs to be appended to asg names.
3838 default : " "
39+ - name : unmanaged-nodegroup-cfn-url
40+ default : " "
41+ description : URL for "unmanaged nodegroup" (AutoScaling group) CloudFormation template. If not specified, a managed nodegroup will be created.
42+ - name : launch-template-name
43+ default : " "
44+ description : Name of the launch template to be used for the nodegroup.
3945 workspaces :
4046 - name : config
4147 mountPath : /config/
4753 - name : create-nodegroup
4854 image : alpine/k8s:1.23.7
4955 script : |
56+ set -o xtrace
57+ set -o errexit
58+ set -o pipefail
59+ set -o nounset
60+
5061 ENDPOINT_FLAG=""
5162
5263 NODE_ROLE_NAME=$(params.host-cluster-node-role-name)
6071 TAINTS_FLAG="--taints $(params.host-taints)"
6172 fi
6273
63- NG_SUBNETS=$(aws eks $ENDPOINT_FLAG --region $(params.region) describe-cluster --name $(params.cluster-name) \
64- --query cluster.resourcesVpcConfig.subnetIds --output text \
65- )
74+ aws eks $ENDPOINT_FLAG --region $(params.region) describe-cluster --name $(params.cluster-name) --output json > cluster.json
75+ NG_SUBNETS=$(jq -r '.cluster.resourcesVpcConfig.subnetIds | join(" ")' cluster.json)
6676
6777 max_nodes=$(params.max-nodes)
6878 nodes=$(params.desired-nodes)
@@ -72,33 +82,100 @@ spec:
7282 create_and_validate_dp_nodes()
7383 {
7484 node_group_name=$node_group-$1
75- launch_template_name=$(params.cluster-name)-launchTemplate
76- CREATED_NODEGROUP=$(aws eks $ENDPOINT_FLAG --region $(params.region) list-nodegroups --cluster-name $(params.cluster-name) --query 'nodegroups[?@==`'$node_group_name'`]' --output text)
7785 EC2_INSTANCES=$3
78- if [ "$CREATED_NODEGROUP" == "" ]; then
79- #create node group
80- aws eks $ENDPOINT_FLAG create-nodegroup \
81- --cluster-name $(params.cluster-name) \
82- --nodegroup-name $node_group_name \
83- --node-role $NODE_ROLE_ARN \
84- --launch-template name=$launch_template_name\
85- --region $(params.region) \
86- --instance-types $EC2_INSTANCES \
87- --scaling-config minSize=$(params.min-nodes),maxSize=$2,desiredSize=$2 \
88- --subnets $NG_SUBNETS $TAINTS_FLAG
86+ # if no unmanaged nodegroup cfn template is provided, assume we want managed nodegroups
87+ if [ "$(params.unmanaged-nodegroup-cfn-url)" = "" ]; then
88+ CREATED_NODEGROUP=$(aws eks $ENDPOINT_FLAG --region $(params.region) list-nodegroups --cluster-name $(params.cluster-name) --query 'nodegroups[?@==`'$node_group_name'`]' --output text)
89+ if [ "$CREATED_NODEGROUP" == "" ]; then
90+ aws eks $ENDPOINT_FLAG create-nodegroup \
91+ --cluster-name $(params.cluster-name) \
92+ --nodegroup-name $node_group_name \
93+ --node-role $NODE_ROLE_ARN \
94+ --launch-template name=$(params.launch-template-name) \
95+ --region $(params.region) \
96+ --instance-types $EC2_INSTANCES \
97+ --scaling-config minSize=$(params.min-nodes),maxSize=$2,desiredSize=$2 \
98+ --subnets $NG_SUBNETS $TAINTS_FLAG
99+ fi
100+ echo "CREATED_NODEGROUP=$node_group_name"
101+ while [[ "$(aws eks $ENDPOINT_FLAG --region $(params.region) describe-nodegroup --cluster-name $(params.cluster-name) --nodegroup-name $node_group_name --query nodegroup.status --output text)" == "CREATING" ]]
102+ do
103+ echo "$node_group_name is "CREATING" at $(date)"
104+ sleep 2
105+ done
106+ # TODO: do this for unmanaged nodes as well
107+ # right now we don't have an appropriate label to filter on for unmanaged nodes
108+ while true; do
109+ ready_node=$(kubectl get nodes -l eks.amazonaws.com/nodegroup=$node_group_name --no-headers 2>/dev/null | grep -w Ready | wc -l)
110+ echo "ready-nodes=$ready_node out of $2, for nodegroup: $node_group_name"
111+ if [[ "$ready_node" -eq $2 ]]; then break; fi
112+ sleep 5
113+ done
114+ else
115+ echo "Creating or updating aws-auth ConfigMap..."
116+ if ! kubectl get configmap aws-auth -n kube-system &> /dev/null; then
117+ # Download the official template as shown in the Amazon EKS User Guide:
118+ # https://docs.aws.amazon.com/eks/latest/userguide/auth-configmap.html#aws-auth-configmap
119+ curl -O https://s3.us-west-2.amazonaws.com/amazon-eks/cloudformation/2020-10-29/aws-auth-cm.yaml
120+
121+ # Replace the placeholder with our role ARN and apply the configmap
122+ sed -i.bak -e "s|<ARN of instance role (not instance profile)>|${NODE_ROLE_ARN}|" aws-auth-cm.yaml
123+
124+ kubectl apply -f aws-auth-cm.yaml
125+ echo "Created aws-auth ConfigMap"
126+ # Wait for the config map to be ready
127+ echo "Verifying aws-auth ConfigMap..."
128+ for i in {1..10}; do
129+ if kubectl get configmap aws-auth -n kube-system -o yaml | grep -q "${NODE_ROLE_ARN}"; then
130+ echo "aws-auth ConfigMap verified successfully"
131+ break
132+ fi
133+ if [ $i -eq 10 ]; then
134+ echo "Warning: Could not verify aws-auth ConfigMap after 10 attempts"
135+ else
136+ echo "Waiting for aws-auth ConfigMap to be ready... attempt $i"
137+ sleep 5
138+ fi
139+ done
140+ else
141+ echo "aws-auth ConfigMap already exists"
142+ fi
143+
144+ STACK_NAME=$node_group_name
145+ STACK_STATUS=$(aws cloudformation describe-stacks --query 'Stacks[?StackName==`'${STACK_NAME}'`].StackStatus' --output text --region $(params.region))
146+ if [[ "$STACK_STATUS" == "" ]]; then
147+ curl -s $(params.unmanaged-nodegroup-cfn-url) -o ./cfn-template
148+
149+ # assemble the stack parameters as a JSON file
150+ # the AWS CLI can't handle a JSON string as a ParameterValue in the flag representation
151+ # and we need that for kubelet-config
152+ jq --null-input \
153+ --arg LaunchTemplateName "$(params.launch-template-name)" \
154+ --arg ClusterName "$(params.cluster-name)" \
155+ --arg AutoScalingGroupName "${node_group_name}" \
156+ --arg NodeCount "$2" \
157+ --arg SubnetIds $(jq -r '.cluster.resourcesVpcConfig.subnetIds | join(",")' cluster.json) \
158+ --arg SecurityGroup "$(jq -r '.cluster.resourcesVpcConfig.clusterSecurityGroupId' cluster.json)" \
159+ --arg VpcId $(jq -r '.cluster.resourcesVpcConfig.vpcId' cluster.json) \
160+ '$ARGS.named | to_entries | map({"ParameterKey": .key, "ParameterValue": .value})' \
161+ > parameters.json
162+
163+ # cloudformation really fights you every step of the way to pass JSON in, so let's just hack it
164+ LAUNCH_TEMPLATE_OVERRIDES=$(echo "$EC2_INSTANCES" | jq -R -c 'split(" ") | map({"InstanceType": .})')
165+ sed -i "s/PLACEHOLDER_LAUNCH_TEMPLATE_OVERRIDES/$LAUNCH_TEMPLATE_OVERRIDES/g" cfn-template
166+
167+ aws cloudformation create-stack \
168+ --region $(params.region) \
169+ --stack-name $STACK_NAME \
170+ --template-body file://$(pwd)/cfn-template \
171+ --parameters file://$(pwd)/parameters.json
172+
173+ aws cloudformation wait stack-create-complete --stack-name $STACK_NAME --region $(params.region)
174+ echo "CREATED_CFN_STACK=$STACK_NAME"
175+ else
176+ echo "$STACK_NAME Already exists"
177+ fi
89178 fi
90- echo "CREATED_NODEGROUP=$node_group_name"
91- while [[ "$(aws eks $ENDPOINT_FLAG --region $(params.region) describe-nodegroup --cluster-name $(params.cluster-name) --nodegroup-name $node_group_name --query nodegroup.status --output text)" == "CREATING" ]]
92- do
93- echo "$node_group_name is "CREATING" at $(date)"
94- sleep 2
95- done
96- while true; do
97- ready_node=$(kubectl get nodes -l eks.amazonaws.com/nodegroup=$node_group_name --no-headers 2>/dev/null | grep -w Ready | wc -l)
98- echo "ready-nodes=$ready_node out of $2, for nodegroup: $node_group_name"
99- if [[ "$ready_node" -eq $2 ]]; then break; fi
100- sleep 5
101- done
102179 }
103180 for i in $(seq 1 $asgs)
104181 do
0 commit comments