Skip to content

Commit c0f9653

Browse files
committed
ci to create and delete k8s
1 parent 5222470 commit c0f9653

File tree

3 files changed

+1969
-0
lines changed

3 files changed

+1969
-0
lines changed

.github/workflows/cleanup-k8s.yml

Lines changed: 390 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,390 @@
1+
name: Cleanup K8s Clusters and EC2 Instances
2+
3+
on:
4+
schedule:
5+
# Run daily at 2 AM UTC
6+
- cron: '0 2 * * *'
7+
workflow_dispatch:
8+
inputs:
9+
dry_run:
10+
description: 'Dry run mode (show what would be deleted without actually deleting)'
11+
required: false
12+
default: false
13+
type: boolean
14+
age_threshold_days:
15+
description: 'Delete resources older than this many days'
16+
required: false
17+
default: '2'
18+
type: string
19+
force_delete:
20+
description: 'Force delete resources even if they have protection'
21+
required: false
22+
default: false
23+
type: boolean
24+
25+
env:
26+
AWS_DEFAULT_REGION: us-west-2
27+
28+
jobs:
29+
cleanup-aws-resources:
30+
runs-on: ubuntu-latest
31+
timeout-minutes: 60
32+
continue-on-error: true # Fail open - don't fail the workflow if cleanup fails
33+
34+
steps:
35+
- name: Checkout repository
36+
uses: actions/checkout@v4
37+
38+
- name: Configure AWS credentials
39+
uses: aws-actions/configure-aws-credentials@v4
40+
with:
41+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
42+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
43+
aws-region: us-west-2
44+
45+
- name: Install prerequisites
46+
run: |
47+
# Install required tools
48+
sudo apt-get update
49+
sudo apt-get install -y jq awscli
50+
51+
# Install eksctl
52+
curl --silent --location "https://github.com/eksctl-io/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp
53+
sudo mv /tmp/eksctl /usr/local/bin
54+
eksctl version
55+
56+
# Install kubectl
57+
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
58+
chmod +x kubectl
59+
sudo mv kubectl /usr/local/bin/
60+
61+
- name: Set cleanup parameters
62+
id: params
63+
run: |
64+
AGE_DAYS="${{ inputs.age_threshold_days || '2' }}"
65+
DRY_RUN="${{ inputs.dry_run || 'false' }}"
66+
FORCE_DELETE="${{ inputs.force_delete || 'false' }}"
67+
68+
echo "age_days=$AGE_DAYS" >> $GITHUB_OUTPUT
69+
echo "dry_run=$DRY_RUN" >> $GITHUB_OUTPUT
70+
echo "force_delete=$FORCE_DELETE" >> $GITHUB_OUTPUT
71+
72+
# Calculate cutoff date
73+
CUTOFF_DATE=$(date -d "$AGE_DAYS days ago" --iso-8601=seconds)
74+
echo "cutoff_date=$CUTOFF_DATE" >> $GITHUB_OUTPUT
75+
76+
echo "Cleanup parameters:"
77+
echo "- Age threshold: $AGE_DAYS days"
78+
echo "- Cutoff date: $CUTOFF_DATE"
79+
echo "- Dry run: $DRY_RUN"
80+
echo "- Force delete: $FORCE_DELETE"
81+
82+
- name: List all AWS regions
83+
id: regions
84+
run: |
85+
# Get all enabled regions
86+
REGIONS=$(aws ec2 describe-regions --query 'Regions[].RegionName' --output text)
87+
echo "regions=$REGIONS" >> $GITHUB_OUTPUT
88+
echo "Will check regions: $REGIONS"
89+
90+
- name: Cleanup EKS clusters
91+
run: |
92+
echo "🔍 Scanning for old EKS clusters..."
93+
94+
# Track what we find and delete
95+
CLUSTERS_FOUND=0
96+
CLUSTERS_DELETED=0
97+
CLUSTERS_FAILED=0
98+
99+
for region in ${{ steps.regions.outputs.regions }}; do
100+
echo "Checking region: $region"
101+
102+
# List EKS clusters in this region
103+
CLUSTERS=$(aws eks list-clusters --region "$region" --query 'clusters' --output text 2>/dev/null || echo "")
104+
105+
if [[ -n "$CLUSTERS" && "$CLUSTERS" != "None" ]]; then
106+
for cluster in $CLUSTERS; do
107+
echo "Found EKS cluster: $cluster in $region"
108+
CLUSTERS_FOUND=$((CLUSTERS_FOUND + 1))
109+
110+
# Get cluster creation date
111+
CREATION_DATE=$(aws eks describe-cluster --region "$region" --name "$cluster" --query 'cluster.createdAt' --output text 2>/dev/null || echo "")
112+
113+
if [[ -n "$CREATION_DATE" ]]; then
114+
# Convert to comparable format
115+
CREATION_TIMESTAMP=$(date -d "$CREATION_DATE" +%s)
116+
CUTOFF_TIMESTAMP=$(date -d "${{ steps.params.outputs.cutoff_date }}" +%s)
117+
118+
if [[ $CREATION_TIMESTAMP -lt $CUTOFF_TIMESTAMP ]]; then
119+
echo "🗑️ Cluster $cluster is older than ${{ steps.params.outputs.age_days }} days (created: $CREATION_DATE)"
120+
121+
if [[ "${{ steps.params.outputs.dry_run }}" == "true" ]]; then
122+
echo "DRY RUN: Would delete EKS cluster $cluster in $region"
123+
else
124+
echo "Deleting EKS cluster: $cluster in $region"
125+
126+
# Try to delete the cluster
127+
if eksctl delete cluster --region "$region" --name "$cluster" --wait --timeout=20m; then
128+
echo "✅ Successfully deleted cluster: $cluster"
129+
CLUSTERS_DELETED=$((CLUSTERS_DELETED + 1))
130+
else
131+
echo "❌ Failed to delete cluster: $cluster"
132+
CLUSTERS_FAILED=$((CLUSTERS_FAILED + 1))
133+
134+
# Try force delete if enabled
135+
if [[ "${{ steps.params.outputs.force_delete }}" == "true" ]]; then
136+
echo "Attempting force delete..."
137+
aws eks delete-cluster --region "$region" --name "$cluster" || true
138+
fi
139+
fi
140+
fi
141+
else
142+
echo "✅ Cluster $cluster is recent (created: $CREATION_DATE)"
143+
fi
144+
else
145+
echo "⚠️ Could not get creation date for cluster: $cluster"
146+
fi
147+
done
148+
fi
149+
done
150+
151+
echo "EKS Cleanup Summary:"
152+
echo "- Clusters found: $CLUSTERS_FOUND"
153+
echo "- Clusters deleted: $CLUSTERS_DELETED"
154+
echo "- Clusters failed: $CLUSTERS_FAILED"
155+
156+
# Export for summary
157+
echo "CLUSTERS_FOUND=$CLUSTERS_FOUND" >> $GITHUB_ENV
158+
echo "CLUSTERS_DELETED=$CLUSTERS_DELETED" >> $GITHUB_ENV
159+
echo "CLUSTERS_FAILED=$CLUSTERS_FAILED" >> $GITHUB_ENV
160+
161+
- name: Cleanup EC2 instances
162+
run: |
163+
echo "🔍 Scanning for old EC2 instances..."
164+
165+
# Track what we find and delete
166+
INSTANCES_FOUND=0
167+
INSTANCES_DELETED=0
168+
INSTANCES_FAILED=0
169+
170+
for region in ${{ steps.regions.outputs.regions }}; do
171+
echo "Checking region: $region"
172+
173+
# List running EC2 instances in this region
174+
INSTANCES=$(aws ec2 describe-instances --region "$region" \
175+
--filters "Name=instance-state-name,Values=running,stopped,stopping" \
176+
--query 'Reservations[].Instances[].[InstanceId,LaunchTime,Tags[?Key==`Name`].Value|[0]]' \
177+
--output text 2>/dev/null || echo "")
178+
179+
if [[ -n "$INSTANCES" ]]; then
180+
while IFS=$'\t' read -r instance_id launch_time instance_name; do
181+
if [[ -n "$instance_id" && "$instance_id" != "None" ]]; then
182+
echo "Found EC2 instance: $instance_id ($instance_name) in $region"
183+
INSTANCES_FOUND=$((INSTANCES_FOUND + 1))
184+
185+
if [[ -n "$launch_time" ]]; then
186+
# Convert to comparable format
187+
LAUNCH_TIMESTAMP=$(date -d "$launch_time" +%s)
188+
CUTOFF_TIMESTAMP=$(date -d "${{ steps.params.outputs.cutoff_date }}" +%s)
189+
190+
if [[ $LAUNCH_TIMESTAMP -lt $CUTOFF_TIMESTAMP ]]; then
191+
echo "🗑️ Instance $instance_id is older than ${{ steps.params.outputs.age_days }} days (launched: $launch_time)"
192+
193+
# Check if instance has termination protection
194+
PROTECTION=$(aws ec2 describe-instance-attribute --region "$region" --instance-id "$instance_id" --attribute disableApiTermination --query 'DisableApiTermination.Value' --output text 2>/dev/null || echo "false")
195+
196+
if [[ "$PROTECTION" == "true" && "${{ steps.params.outputs.force_delete }}" != "true" ]]; then
197+
echo "⚠️ Instance $instance_id has termination protection enabled, skipping"
198+
continue
199+
fi
200+
201+
if [[ "${{ steps.params.outputs.dry_run }}" == "true" ]]; then
202+
echo "DRY RUN: Would terminate EC2 instance $instance_id in $region"
203+
else
204+
echo "Terminating EC2 instance: $instance_id in $region"
205+
206+
# Disable termination protection if needed
207+
if [[ "$PROTECTION" == "true" ]]; then
208+
echo "Disabling termination protection..."
209+
aws ec2 modify-instance-attribute --region "$region" --instance-id "$instance_id" --no-disable-api-termination || true
210+
fi
211+
212+
# Terminate the instance
213+
if aws ec2 terminate-instances --region "$region" --instance-ids "$instance_id"; then
214+
echo "✅ Successfully terminated instance: $instance_id"
215+
INSTANCES_DELETED=$((INSTANCES_DELETED + 1))
216+
else
217+
echo "❌ Failed to terminate instance: $instance_id"
218+
INSTANCES_FAILED=$((INSTANCES_FAILED + 1))
219+
fi
220+
fi
221+
else
222+
echo "✅ Instance $instance_id is recent (launched: $launch_time)"
223+
fi
224+
else
225+
echo "⚠️ Could not get launch time for instance: $instance_id"
226+
fi
227+
fi
228+
done <<< "$INSTANCES"
229+
fi
230+
done
231+
232+
echo "EC2 Cleanup Summary:"
233+
echo "- Instances found: $INSTANCES_FOUND"
234+
echo "- Instances deleted: $INSTANCES_DELETED"
235+
echo "- Instances failed: $INSTANCES_FAILED"
236+
237+
# Export for summary
238+
echo "INSTANCES_FOUND=$INSTANCES_FOUND" >> $GITHUB_ENV
239+
echo "INSTANCES_DELETED=$INSTANCES_DELETED" >> $GITHUB_ENV
240+
echo "INSTANCES_FAILED=$INSTANCES_FAILED" >> $GITHUB_ENV
241+
242+
- name: Cleanup orphaned EBS volumes
243+
run: |
244+
echo "🔍 Scanning for orphaned EBS volumes..."
245+
246+
VOLUMES_FOUND=0
247+
VOLUMES_DELETED=0
248+
VOLUMES_FAILED=0
249+
250+
for region in ${{ steps.regions.outputs.regions }}; do
251+
echo "Checking region: $region"
252+
253+
# List available (unattached) EBS volumes
254+
VOLUMES=$(aws ec2 describe-volumes --region "$region" \
255+
--filters "Name=status,Values=available" \
256+
--query 'Volumes[].[VolumeId,CreateTime]' \
257+
--output text 2>/dev/null || echo "")
258+
259+
if [[ -n "$VOLUMES" ]]; then
260+
while IFS=$'\t' read -r volume_id create_time; do
261+
if [[ -n "$volume_id" && "$volume_id" != "None" ]]; then
262+
echo "Found orphaned EBS volume: $volume_id in $region"
263+
VOLUMES_FOUND=$((VOLUMES_FOUND + 1))
264+
265+
if [[ -n "$create_time" ]]; then
266+
CREATE_TIMESTAMP=$(date -d "$create_time" +%s)
267+
CUTOFF_TIMESTAMP=$(date -d "${{ steps.params.outputs.cutoff_date }}" +%s)
268+
269+
if [[ $CREATE_TIMESTAMP -lt $CUTOFF_TIMESTAMP ]]; then
270+
echo "🗑️ Volume $volume_id is older than ${{ steps.params.outputs.age_days }} days (created: $create_time)"
271+
272+
if [[ "${{ steps.params.outputs.dry_run }}" == "true" ]]; then
273+
echo "DRY RUN: Would delete EBS volume $volume_id in $region"
274+
else
275+
echo "Deleting EBS volume: $volume_id in $region"
276+
277+
if aws ec2 delete-volume --region "$region" --volume-id "$volume_id"; then
278+
echo "✅ Successfully deleted volume: $volume_id"
279+
VOLUMES_DELETED=$((VOLUMES_DELETED + 1))
280+
else
281+
echo "❌ Failed to delete volume: $volume_id"
282+
VOLUMES_FAILED=$((VOLUMES_FAILED + 1))
283+
fi
284+
fi
285+
else
286+
echo "✅ Volume $volume_id is recent (created: $create_time)"
287+
fi
288+
fi
289+
fi
290+
done <<< "$VOLUMES"
291+
fi
292+
done
293+
294+
echo "EBS Cleanup Summary:"
295+
echo "- Volumes found: $VOLUMES_FOUND"
296+
echo "- Volumes deleted: $VOLUMES_DELETED"
297+
echo "- Volumes failed: $VOLUMES_FAILED"
298+
299+
# Export for summary
300+
echo "VOLUMES_FOUND=$VOLUMES_FOUND" >> $GITHUB_ENV
301+
echo "VOLUMES_DELETED=$VOLUMES_DELETED" >> $GITHUB_ENV
302+
echo "VOLUMES_FAILED=$VOLUMES_FAILED" >> $GITHUB_ENV
303+
304+
- name: Update cluster info file
305+
run: |
306+
# Make script executable and use it to clean up the clusters.info file
307+
chmod +x scripts/create-k8s.sh
308+
309+
echo "🔍 Checking clusters.info file for deleted clusters..."
310+
311+
if [[ -f scripts/clusters.info ]]; then
312+
# Create a backup
313+
cp scripts/clusters.info scripts/clusters.info.backup
314+
315+
# Process each cluster entry and remove those that no longer exist
316+
while IFS= read -r line; do
317+
if [[ "$line" =~ ^CLOUD= ]]; then
318+
# Extract cluster info
319+
CLOUD=$(echo "$line" | grep -o 'CLOUD=[^,]*' | cut -d= -f2)
320+
CLUSTER_NAME=$(echo "$line" | grep -o 'CLUSTER_NAME=[^,]*' | cut -d= -f2)
321+
REGION=$(echo "$line" | grep -o 'REGION=[^,]*' | cut -d= -f2)
322+
323+
if [[ "$CLOUD" == "aws" && -n "$CLUSTER_NAME" && -n "$REGION" ]]; then
324+
# Check if EKS cluster still exists
325+
if ! aws eks describe-cluster --region "$REGION" --name "$CLUSTER_NAME" >/dev/null 2>&1; then
326+
echo "Removing deleted cluster from info: $CLUSTER_NAME"
327+
# Use the script's deletion function
328+
sed -i "/CLOUD=aws,CLUSTER_NAME=$CLUSTER_NAME,REGION=$REGION/d" scripts/clusters.info
329+
fi
330+
fi
331+
fi
332+
done < scripts/clusters.info.backup
333+
334+
echo "Updated clusters.info file"
335+
fi
336+
337+
- name: Create cleanup summary
338+
if: always()
339+
run: |
340+
{
341+
echo "## AWS Resource Cleanup Summary"
342+
echo ""
343+
echo "**Cleanup Date:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
344+
echo "**Age Threshold:** ${{ steps.params.outputs.age_days }} days"
345+
echo "**Cutoff Date:** ${{ steps.params.outputs.cutoff_date }}"
346+
echo "**Mode:** ${{ steps.params.outputs.dry_run == 'true' && 'DRY RUN' || 'LIVE DELETION' }}"
347+
echo ""
348+
echo "### EKS Clusters"
349+
echo "- 🔍 Found: ${CLUSTERS_FOUND:-0}"
350+
echo "- 🗑️ Deleted: ${CLUSTERS_DELETED:-0}"
351+
echo "- ❌ Failed: ${CLUSTERS_FAILED:-0}"
352+
echo ""
353+
echo "### EC2 Instances"
354+
echo "- 🔍 Found: ${INSTANCES_FOUND:-0}"
355+
echo "- 🗑️ Deleted: ${INSTANCES_DELETED:-0}"
356+
echo "- ❌ Failed: ${INSTANCES_FAILED:-0}"
357+
echo ""
358+
echo "### EBS Volumes"
359+
echo "- 🔍 Found: ${VOLUMES_FOUND:-0}"
360+
echo "- 🗑️ Deleted: ${VOLUMES_DELETED:-0}"
361+
echo "- ❌ Failed: ${VOLUMES_FAILED:-0}"
362+
echo ""
363+
364+
TOTAL_FOUND=$((${CLUSTERS_FOUND:-0} + ${INSTANCES_FOUND:-0} + ${VOLUMES_FOUND:-0}))
365+
TOTAL_DELETED=$((${CLUSTERS_DELETED:-0} + ${INSTANCES_DELETED:-0} + ${VOLUMES_DELETED:-0}))
366+
TOTAL_FAILED=$((${CLUSTERS_FAILED:-0} + ${INSTANCES_FAILED:-0} + ${VOLUMES_FAILED:-0}))
367+
368+
echo "### Overall Summary"
369+
echo "- **Total Resources Found:** $TOTAL_FOUND"
370+
echo "- **Total Resources Deleted:** $TOTAL_DELETED"
371+
echo "- **Total Failures:** $TOTAL_FAILED"
372+
echo ""
373+
374+
if [[ $TOTAL_FAILED -gt 0 ]]; then
375+
echo "⚠️ **Note:** Some resources failed to delete. The workflow continues with 'fail open' behavior."
376+
elif [[ $TOTAL_DELETED -gt 0 ]]; then
377+
echo "✅ **Status:** Cleanup completed successfully"
378+
else
379+
echo "ℹ️ **Status:** No resources needed cleanup"
380+
fi
381+
} >> $GITHUB_STEP_SUMMARY
382+
383+
- name: Upload cleanup logs
384+
if: always()
385+
uses: actions/upload-artifact@v4
386+
with:
387+
name: cleanup-logs-${{ github.run_id }}
388+
path: |
389+
scripts/clusters.info*
390+
retention-days: 30

0 commit comments

Comments
 (0)