@@ -221,3 +221,378 @@ def format_cluster_list(clusters: list[Dict[str, Any]], region: str) -> str:
221221 )
222222
223223 return "\n " .join (lines )
224+
225+
226+ class ClusterNotFoundError (Exception ):
227+ """Raised when one or more clusters cannot be found."""
228+
229+ pass
230+
231+
232+ async def validate_clusters (cluster_names : list [str ], region : str ) -> list [str ]:
233+ """
234+ Validate that the specified clusters exist and return their ARNs.
235+
236+ Args:
237+ cluster_names: List of cluster names to validate
238+ region: AWS region to check clusters in
239+
240+ Returns:
241+ List of validated cluster ARNs
242+
243+ Raises:
244+ ClusterNotFoundError: If one or more clusters are not found
245+ """
246+ logger .info (f"Validating { len (cluster_names )} cluster(s) in region { region } " )
247+
248+ try :
249+ # Describe clusters to validate they exist
250+ describe_response = await ecs_api_operation (
251+ api_operation = "DescribeClusters" ,
252+ api_params = {"clusters" : cluster_names , "include" : ["TAGS" ]},
253+ )
254+
255+ found_clusters = describe_response .get ("clusters" , [])
256+ failures = describe_response .get ("failures" , [])
257+
258+ if failures :
259+ failed_names = [f ["arn" ] for f in failures ]
260+ raise ClusterNotFoundError (f"Clusters not found in region '{ region } ': { failed_names } " )
261+
262+ if len (found_clusters ) != len (cluster_names ):
263+ found_names = [c ["clusterName" ] for c in found_clusters ]
264+ missing = set (cluster_names ) - set (found_names )
265+ raise ClusterNotFoundError (f"Clusters not found in region '{ region } ': { list (missing )} " )
266+
267+ cluster_arns = [cluster ["clusterArn" ] for cluster in found_clusters ]
268+ logger .info (f"Successfully validated { len (cluster_arns )} cluster(s)" )
269+ return cluster_arns
270+
271+ except ClusterNotFoundError :
272+ raise
273+ except Exception as e :
274+ logger .error (f"Error validating clusters: { e } " )
275+ raise Exception (f"Failed to validate clusters: { str (e )} " ) from e
276+
277+
278+ async def collect_cluster_configuration (region : str , cluster_name : str ) -> Dict [str , Any ]:
279+ """
280+ Collect comprehensive configuration for an ECS cluster.
281+
282+ This function gathers all security-relevant configuration data for analysis:
283+ - Cluster metadata and settings
284+ - Service configurations
285+ - Task definition configurations
286+ - Security group configurations
287+ - IAM role references
288+
289+ Args:
290+ region: AWS region containing the cluster
291+ cluster_name: Name of the cluster to analyze
292+
293+ Returns:
294+ Dictionary containing complete cluster configuration
295+
296+ Note:
297+ This function collects data but does not perform security analysis.
298+ The analysis is performed by AI agents using the collected data.
299+ """
300+ logger .info (f"Collecting configuration for cluster '{ cluster_name } ' in region { region } " )
301+
302+ cluster_config = {
303+ "cluster_name" : cluster_name ,
304+ "region" : region ,
305+ "cluster_metadata" : {},
306+ "services" : [],
307+ "task_definitions" : [],
308+ "security_groups" : [],
309+ "collection_errors" : [],
310+ }
311+
312+ try :
313+ # Step 1: Collect cluster metadata
314+ logger .info (f"Step 1: Collecting cluster metadata for '{ cluster_name } '" )
315+ describe_response = await ecs_api_operation (
316+ api_operation = "DescribeClusters" ,
317+ api_params = {
318+ "clusters" : [cluster_name ],
319+ "include" : ["ATTACHMENTS" , "SETTINGS" , "STATISTICS" , "TAGS" ],
320+ },
321+ )
322+
323+ clusters = describe_response .get ("clusters" , [])
324+ if not clusters :
325+ raise Exception (f"Cluster '{ cluster_name } ' not found" )
326+
327+ cluster = clusters [0 ]
328+ cluster_config ["cluster_metadata" ] = {
329+ "cluster_arn" : cluster .get ("clusterArn" ),
330+ "cluster_name" : cluster .get ("clusterName" ),
331+ "status" : cluster .get ("status" ),
332+ "running_tasks_count" : cluster .get ("runningTasksCount" , 0 ),
333+ "pending_tasks_count" : cluster .get ("pendingTasksCount" , 0 ),
334+ "active_services_count" : cluster .get ("activeServicesCount" , 0 ),
335+ "registered_container_instances_count" : cluster .get (
336+ "registeredContainerInstancesCount" , 0
337+ ),
338+ "statistics" : cluster .get ("statistics" , []),
339+ "tags" : {tag ["key" ]: tag ["value" ] for tag in cluster .get ("tags" , [])},
340+ "settings" : cluster .get ("settings" , []),
341+ "configuration" : cluster .get ("configuration" , {}),
342+ "service_connect_defaults" : cluster .get ("serviceConnectDefaults" , {}),
343+ "attachments" : cluster .get ("attachments" , []),
344+ }
345+
346+ logger .info (f"Successfully collected cluster metadata for '{ cluster_name } '" )
347+
348+ except Exception as e :
349+ error_msg = f"Failed to collect cluster metadata: { str (e )} "
350+ logger .warning (error_msg )
351+ cluster_config ["collection_errors" ].append (error_msg )
352+
353+ # Step 2: Collect service configurations
354+ try :
355+ logger .info (f"Step 2: Collecting service configurations for cluster '{ cluster_name } '" )
356+ services_response = await ecs_api_operation (
357+ api_operation = "ListServices" , api_params = {"cluster" : cluster_name }
358+ )
359+
360+ service_arns = services_response .get ("serviceArns" , [])
361+ logger .info (f"Found { len (service_arns )} service(s) in cluster '{ cluster_name } '" )
362+
363+ if service_arns :
364+ # Process services in batches (DescribeServices has a limit)
365+ batch_size = 10
366+ services_list = []
367+ services_with_errors = []
368+
369+ for i in range (0 , len (service_arns ), batch_size ):
370+ batch_arns = service_arns [i : i + batch_size ]
371+ try :
372+ describe_services_response = await ecs_api_operation (
373+ api_operation = "DescribeServices" ,
374+ api_params = {"cluster" : cluster_name , "services" : batch_arns },
375+ )
376+
377+ services = describe_services_response .get ("services" , [])
378+
379+ for service in services :
380+ try :
381+ service_name = service .get ("serviceName" )
382+ service_status = service .get ("status" )
383+
384+ # Log warning for non-active services
385+ if service_status not in ["ACTIVE" , "DRAINING" ]:
386+ logger .warning (
387+ f"Service { service_name } is in { service_status } state"
388+ )
389+
390+ # Collect security group information
391+ security_group_details = []
392+ network_config = service .get ("networkConfiguration" , {})
393+ awsvpc_config = network_config .get ("awsvpcConfiguration" , {})
394+ security_group_ids = awsvpc_config .get ("securityGroups" , [])
395+
396+ if security_group_ids :
397+ try :
398+ # Describe security groups
399+ ec2_client = boto3 .client ("ec2" , region_name = region )
400+ sg_response = ec2_client .describe_security_groups (
401+ GroupIds = security_group_ids
402+ )
403+ security_group_details .extend (
404+ [
405+ {
406+ "group_id" : sg ["GroupId" ],
407+ "group_name" : sg .get ("GroupName" , "" ),
408+ "description" : sg .get ("Description" , "" ),
409+ "vpc_id" : sg .get ("VpcId" , "" ),
410+ "ingress_rules" : sg .get ("IpPermissions" , []),
411+ "egress_rules" : sg .get ("IpPermissionsEgress" , []),
412+ "tags" : {
413+ tag ["Key" ]: tag ["Value" ]
414+ for tag in sg .get ("Tags" , [])
415+ },
416+ }
417+ for sg in sg_response .get ("SecurityGroups" , [])
418+ ]
419+ )
420+ logger .info (
421+ f"Collected { len (security_group_details )} "
422+ f"security group(s) for service { service_name } "
423+ )
424+ except Exception as e :
425+ error_msg = (
426+ f"Failed to describe security groups "
427+ f"{ security_group_ids } : { str (e )} "
428+ )
429+ logger .error (f"Service { service_name } : { error_msg } " )
430+ services_with_errors .append (
431+ {"service_name" : service_name , "error" : error_msg }
432+ )
433+
434+ # Build service configuration
435+ service_config = {
436+ "service_name" : service_name ,
437+ "service_arn" : service .get ("serviceArn" ),
438+ "cluster_arn" : service .get ("clusterArn" ),
439+ "task_definition" : service .get ("taskDefinition" ),
440+ "desired_count" : service .get ("desiredCount" , 0 ),
441+ "running_count" : service .get ("runningCount" , 0 ),
442+ "pending_count" : service .get ("pendingCount" , 0 ),
443+ "status" : service_status ,
444+ "launch_type" : service .get ("launchType" ),
445+ "capacity_provider_strategy" : service .get (
446+ "capacityProviderStrategy" , []
447+ ),
448+ "platform_version" : service .get ("platformVersion" ),
449+ "platform_family" : service .get ("platformFamily" ),
450+ "network_configuration" : network_config ,
451+ "security_groups" : security_group_details ,
452+ "load_balancers" : service .get ("loadBalancers" , []),
453+ "service_registries" : service .get ("serviceRegistries" , []),
454+ "tags" : {
455+ tag ["key" ]: tag ["value" ] for tag in service .get ("tags" , [])
456+ },
457+ "enable_execute_command" : service .get (
458+ "enableExecuteCommand" , False
459+ ),
460+ "health_check_grace_period_seconds" : service .get (
461+ "healthCheckGracePeriodSeconds"
462+ ),
463+ "scheduling_strategy" : service .get ("schedulingStrategy" ),
464+ "deployment_controller" : service .get ("deploymentController" , {}),
465+ "service_connect_configuration" : service .get (
466+ "serviceConnectConfiguration" , {}
467+ ),
468+ }
469+ services_list .append (service_config )
470+
471+ except Exception as e :
472+ logger .error (f"Failed to process service: { e } " )
473+ services_with_errors .append (
474+ {
475+ "service_name" : service .get ("serviceName" , "Unknown" ),
476+ "error" : str (e ),
477+ }
478+ )
479+
480+ except Exception as e :
481+ logger .error (f"Failed to describe services batch: { e } " )
482+ for arn in batch_arns :
483+ service_name = arn .split ("/" )[- 1 ]
484+ services_with_errors .append ({"service_name" : service_name , "error" : str (e )})
485+
486+ cluster_config ["services" ] = services_list
487+ if services_with_errors :
488+ cluster_config ["collection_errors" ].extend (
489+ [f"Service { s ['service_name' ]} : { s ['error' ]} " for s in services_with_errors ]
490+ )
491+ logger .warning (
492+ f"{ len (services_with_errors )} service(s) had errors during collection"
493+ )
494+
495+ except Exception as e :
496+ error_msg = f"Failed to collect service configurations: { str (e )} "
497+ logger .warning (error_msg )
498+ cluster_config ["collection_errors" ].append (error_msg )
499+
500+ # Step 3: Collect task definition configurations
501+ try :
502+ logger .info (
503+ f"Step 3: Collecting task definition configurations for cluster '{ cluster_name } '"
504+ )
505+
506+ # Get unique task definition ARNs from services
507+ task_def_arns = set ()
508+ for service in cluster_config ["services" ]:
509+ task_def_arn = service .get ("task_definition" )
510+ if task_def_arn :
511+ task_def_arns .add (task_def_arn )
512+
513+ logger .info (f"Found { len (task_def_arns )} unique task definition(s) to describe" )
514+
515+ task_definitions_list = []
516+ for task_def_arn in task_def_arns :
517+ try :
518+ # Describe individual task definition
519+ task_def_response = await ecs_api_operation (
520+ api_operation = "DescribeTaskDefinition" ,
521+ api_params = {"taskDefinition" : task_def_arn , "include" : ["TAGS" ]},
522+ )
523+
524+ task_def = task_def_response .get ("taskDefinition" , {})
525+ task_def_config = {
526+ "family" : task_def .get ("family" ),
527+ "task_definition_arn" : task_def .get ("taskDefinitionArn" ),
528+ "revision" : task_def .get ("revision" ),
529+ "status" : task_def .get ("status" ),
530+ "requires_compatibilities" : task_def .get ("requiresCompatibilities" , []),
531+ "network_mode" : task_def .get ("networkMode" ),
532+ "cpu" : task_def .get ("cpu" ),
533+ "memory" : task_def .get ("memory" ),
534+ "task_role_arn" : task_def .get ("taskRoleArn" ),
535+ "execution_role_arn" : task_def .get ("executionRoleArn" ),
536+ "container_definitions" : task_def .get ("containerDefinitions" , []),
537+ "volumes" : task_def .get ("volumes" , []),
538+ "placement_constraints" : task_def .get ("placementConstraints" , []),
539+ "requires_attributes" : task_def .get ("requiresAttributes" , []),
540+ "pid_mode" : task_def .get ("pidMode" ),
541+ "ipc_mode" : task_def .get ("ipcMode" ),
542+ "proxy_configuration" : task_def .get ("proxyConfiguration" , {}),
543+ "inference_accelerators" : task_def .get ("inferenceAccelerators" , []),
544+ "ephemeral_storage" : task_def .get ("ephemeralStorage" , {}),
545+ "runtime_platform" : task_def .get ("runtimePlatform" , {}),
546+ "tags" : {tag ["key" ]: tag ["value" ] for tag in task_def_response .get ("tags" , [])},
547+ }
548+ task_definitions_list .append (task_def_config )
549+
550+ except Exception as e :
551+ logger .warning (f"Failed to describe task definition '{ task_def_arn } ': { e } " )
552+ cluster_config ["collection_errors" ].append (
553+ f"Task definition { task_def_arn } : { str (e )} "
554+ )
555+
556+ cluster_config ["task_definitions" ] = task_definitions_list
557+ logger .info (
558+ f"Successfully collected { len (task_definitions_list )} task definition configuration(s)"
559+ )
560+
561+ except Exception as e :
562+ error_msg = f"Failed to collect task definition configurations: { str (e )} "
563+ logger .warning (error_msg )
564+ cluster_config ["collection_errors" ].append (error_msg )
565+
566+ # Step 4: Collect unique security groups (deduplicated from services)
567+ try :
568+ logger .info ("Step 4: Deduplicating security group configurations" )
569+ unique_security_groups = {}
570+
571+ for service in cluster_config ["services" ]:
572+ for sg in service .get ("security_groups" , []):
573+ sg_id = sg .get ("group_id" )
574+ if sg_id and sg_id not in unique_security_groups :
575+ unique_security_groups [sg_id ] = sg
576+
577+ cluster_config ["security_groups" ] = list (unique_security_groups .values ())
578+ logger .info (f"Found { len (unique_security_groups )} unique security group(s)" )
579+
580+ except Exception as e :
581+ error_msg = f"Failed to process security groups: { str (e )} "
582+ logger .warning (error_msg )
583+ cluster_config ["collection_errors" ].append (error_msg )
584+
585+ # Log collection summary
586+ logger .info (
587+ f"Configuration collection complete for '{ cluster_name } ': "
588+ f"{ len (cluster_config ['services' ])} services, "
589+ f"{ len (cluster_config ['task_definitions' ])} task definitions, "
590+ f"{ len (cluster_config ['security_groups' ])} security groups"
591+ )
592+
593+ if cluster_config ["collection_errors" ]:
594+ logger .warning (
595+ f"Collection completed with { len (cluster_config ['collection_errors' ])} error(s)"
596+ )
597+
598+ return cluster_config
0 commit comments