Skip to content

Commit 14d794c

Browse files
author
Suresh Natarajan
committed
feat: Add cluster validation and configuration collection (PR #2)
1 parent 853f6aa commit 14d794c

File tree

4 files changed

+1314
-25
lines changed

4 files changed

+1314
-25
lines changed

src/ecs-mcp-server/awslabs/ecs_mcp_server/api/security_analysis.py

Lines changed: 375 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,378 @@ def format_cluster_list(clusters: list[Dict[str, Any]], region: str) -> str:
221221
)
222222

223223
return "\n".join(lines)
224+
225+
226+
class ClusterNotFoundError(Exception):
227+
"""Raised when one or more clusters cannot be found."""
228+
229+
pass
230+
231+
232+
async def validate_clusters(cluster_names: list[str], region: str) -> list[str]:
233+
"""
234+
Validate that the specified clusters exist and return their ARNs.
235+
236+
Args:
237+
cluster_names: List of cluster names to validate
238+
region: AWS region to check clusters in
239+
240+
Returns:
241+
List of validated cluster ARNs
242+
243+
Raises:
244+
ClusterNotFoundError: If one or more clusters are not found
245+
"""
246+
logger.info(f"Validating {len(cluster_names)} cluster(s) in region {region}")
247+
248+
try:
249+
# Describe clusters to validate they exist
250+
describe_response = await ecs_api_operation(
251+
api_operation="DescribeClusters",
252+
api_params={"clusters": cluster_names, "include": ["TAGS"]},
253+
)
254+
255+
found_clusters = describe_response.get("clusters", [])
256+
failures = describe_response.get("failures", [])
257+
258+
if failures:
259+
failed_names = [f["arn"] for f in failures]
260+
raise ClusterNotFoundError(f"Clusters not found in region '{region}': {failed_names}")
261+
262+
if len(found_clusters) != len(cluster_names):
263+
found_names = [c["clusterName"] for c in found_clusters]
264+
missing = set(cluster_names) - set(found_names)
265+
raise ClusterNotFoundError(f"Clusters not found in region '{region}': {list(missing)}")
266+
267+
cluster_arns = [cluster["clusterArn"] for cluster in found_clusters]
268+
logger.info(f"Successfully validated {len(cluster_arns)} cluster(s)")
269+
return cluster_arns
270+
271+
except ClusterNotFoundError:
272+
raise
273+
except Exception as e:
274+
logger.error(f"Error validating clusters: {e}")
275+
raise Exception(f"Failed to validate clusters: {str(e)}") from e
276+
277+
278+
async def collect_cluster_configuration(region: str, cluster_name: str) -> Dict[str, Any]:
279+
"""
280+
Collect comprehensive configuration for an ECS cluster.
281+
282+
This function gathers all security-relevant configuration data for analysis:
283+
- Cluster metadata and settings
284+
- Service configurations
285+
- Task definition configurations
286+
- Security group configurations
287+
- IAM role references
288+
289+
Args:
290+
region: AWS region containing the cluster
291+
cluster_name: Name of the cluster to analyze
292+
293+
Returns:
294+
Dictionary containing complete cluster configuration
295+
296+
Note:
297+
This function collects data but does not perform security analysis.
298+
The analysis is performed by AI agents using the collected data.
299+
"""
300+
logger.info(f"Collecting configuration for cluster '{cluster_name}' in region {region}")
301+
302+
cluster_config = {
303+
"cluster_name": cluster_name,
304+
"region": region,
305+
"cluster_metadata": {},
306+
"services": [],
307+
"task_definitions": [],
308+
"security_groups": [],
309+
"collection_errors": [],
310+
}
311+
312+
try:
313+
# Step 1: Collect cluster metadata
314+
logger.info(f"Step 1: Collecting cluster metadata for '{cluster_name}'")
315+
describe_response = await ecs_api_operation(
316+
api_operation="DescribeClusters",
317+
api_params={
318+
"clusters": [cluster_name],
319+
"include": ["ATTACHMENTS", "SETTINGS", "STATISTICS", "TAGS"],
320+
},
321+
)
322+
323+
clusters = describe_response.get("clusters", [])
324+
if not clusters:
325+
raise Exception(f"Cluster '{cluster_name}' not found")
326+
327+
cluster = clusters[0]
328+
cluster_config["cluster_metadata"] = {
329+
"cluster_arn": cluster.get("clusterArn"),
330+
"cluster_name": cluster.get("clusterName"),
331+
"status": cluster.get("status"),
332+
"running_tasks_count": cluster.get("runningTasksCount", 0),
333+
"pending_tasks_count": cluster.get("pendingTasksCount", 0),
334+
"active_services_count": cluster.get("activeServicesCount", 0),
335+
"registered_container_instances_count": cluster.get(
336+
"registeredContainerInstancesCount", 0
337+
),
338+
"statistics": cluster.get("statistics", []),
339+
"tags": {tag["key"]: tag["value"] for tag in cluster.get("tags", [])},
340+
"settings": cluster.get("settings", []),
341+
"configuration": cluster.get("configuration", {}),
342+
"service_connect_defaults": cluster.get("serviceConnectDefaults", {}),
343+
"attachments": cluster.get("attachments", []),
344+
}
345+
346+
logger.info(f"Successfully collected cluster metadata for '{cluster_name}'")
347+
348+
except Exception as e:
349+
error_msg = f"Failed to collect cluster metadata: {str(e)}"
350+
logger.warning(error_msg)
351+
cluster_config["collection_errors"].append(error_msg)
352+
353+
# Step 2: Collect service configurations
354+
try:
355+
logger.info(f"Step 2: Collecting service configurations for cluster '{cluster_name}'")
356+
services_response = await ecs_api_operation(
357+
api_operation="ListServices", api_params={"cluster": cluster_name}
358+
)
359+
360+
service_arns = services_response.get("serviceArns", [])
361+
logger.info(f"Found {len(service_arns)} service(s) in cluster '{cluster_name}'")
362+
363+
if service_arns:
364+
# Process services in batches (DescribeServices has a limit)
365+
batch_size = 10
366+
services_list = []
367+
services_with_errors = []
368+
369+
for i in range(0, len(service_arns), batch_size):
370+
batch_arns = service_arns[i : i + batch_size]
371+
try:
372+
describe_services_response = await ecs_api_operation(
373+
api_operation="DescribeServices",
374+
api_params={"cluster": cluster_name, "services": batch_arns},
375+
)
376+
377+
services = describe_services_response.get("services", [])
378+
379+
for service in services:
380+
try:
381+
service_name = service.get("serviceName")
382+
service_status = service.get("status")
383+
384+
# Log warning for non-active services
385+
if service_status not in ["ACTIVE", "DRAINING"]:
386+
logger.warning(
387+
f"Service {service_name} is in {service_status} state"
388+
)
389+
390+
# Collect security group information
391+
security_group_details = []
392+
network_config = service.get("networkConfiguration", {})
393+
awsvpc_config = network_config.get("awsvpcConfiguration", {})
394+
security_group_ids = awsvpc_config.get("securityGroups", [])
395+
396+
if security_group_ids:
397+
try:
398+
# Describe security groups
399+
ec2_client = boto3.client("ec2", region_name=region)
400+
sg_response = ec2_client.describe_security_groups(
401+
GroupIds=security_group_ids
402+
)
403+
security_group_details.extend(
404+
[
405+
{
406+
"group_id": sg["GroupId"],
407+
"group_name": sg.get("GroupName", ""),
408+
"description": sg.get("Description", ""),
409+
"vpc_id": sg.get("VpcId", ""),
410+
"ingress_rules": sg.get("IpPermissions", []),
411+
"egress_rules": sg.get("IpPermissionsEgress", []),
412+
"tags": {
413+
tag["Key"]: tag["Value"]
414+
for tag in sg.get("Tags", [])
415+
},
416+
}
417+
for sg in sg_response.get("SecurityGroups", [])
418+
]
419+
)
420+
logger.info(
421+
f"Collected {len(security_group_details)} "
422+
f"security group(s) for service {service_name}"
423+
)
424+
except Exception as e:
425+
error_msg = (
426+
f"Failed to describe security groups "
427+
f"{security_group_ids}: {str(e)}"
428+
)
429+
logger.error(f"Service {service_name}: {error_msg}")
430+
services_with_errors.append(
431+
{"service_name": service_name, "error": error_msg}
432+
)
433+
434+
# Build service configuration
435+
service_config = {
436+
"service_name": service_name,
437+
"service_arn": service.get("serviceArn"),
438+
"cluster_arn": service.get("clusterArn"),
439+
"task_definition": service.get("taskDefinition"),
440+
"desired_count": service.get("desiredCount", 0),
441+
"running_count": service.get("runningCount", 0),
442+
"pending_count": service.get("pendingCount", 0),
443+
"status": service_status,
444+
"launch_type": service.get("launchType"),
445+
"capacity_provider_strategy": service.get(
446+
"capacityProviderStrategy", []
447+
),
448+
"platform_version": service.get("platformVersion"),
449+
"platform_family": service.get("platformFamily"),
450+
"network_configuration": network_config,
451+
"security_groups": security_group_details,
452+
"load_balancers": service.get("loadBalancers", []),
453+
"service_registries": service.get("serviceRegistries", []),
454+
"tags": {
455+
tag["key"]: tag["value"] for tag in service.get("tags", [])
456+
},
457+
"enable_execute_command": service.get(
458+
"enableExecuteCommand", False
459+
),
460+
"health_check_grace_period_seconds": service.get(
461+
"healthCheckGracePeriodSeconds"
462+
),
463+
"scheduling_strategy": service.get("schedulingStrategy"),
464+
"deployment_controller": service.get("deploymentController", {}),
465+
"service_connect_configuration": service.get(
466+
"serviceConnectConfiguration", {}
467+
),
468+
}
469+
services_list.append(service_config)
470+
471+
except Exception as e:
472+
logger.error(f"Failed to process service: {e}")
473+
services_with_errors.append(
474+
{
475+
"service_name": service.get("serviceName", "Unknown"),
476+
"error": str(e),
477+
}
478+
)
479+
480+
except Exception as e:
481+
logger.error(f"Failed to describe services batch: {e}")
482+
for arn in batch_arns:
483+
service_name = arn.split("/")[-1]
484+
services_with_errors.append({"service_name": service_name, "error": str(e)})
485+
486+
cluster_config["services"] = services_list
487+
if services_with_errors:
488+
cluster_config["collection_errors"].extend(
489+
[f"Service {s['service_name']}: {s['error']}" for s in services_with_errors]
490+
)
491+
logger.warning(
492+
f"{len(services_with_errors)} service(s) had errors during collection"
493+
)
494+
495+
except Exception as e:
496+
error_msg = f"Failed to collect service configurations: {str(e)}"
497+
logger.warning(error_msg)
498+
cluster_config["collection_errors"].append(error_msg)
499+
500+
# Step 3: Collect task definition configurations
501+
try:
502+
logger.info(
503+
f"Step 3: Collecting task definition configurations for cluster '{cluster_name}'"
504+
)
505+
506+
# Get unique task definition ARNs from services
507+
task_def_arns = set()
508+
for service in cluster_config["services"]:
509+
task_def_arn = service.get("task_definition")
510+
if task_def_arn:
511+
task_def_arns.add(task_def_arn)
512+
513+
logger.info(f"Found {len(task_def_arns)} unique task definition(s) to describe")
514+
515+
task_definitions_list = []
516+
for task_def_arn in task_def_arns:
517+
try:
518+
# Describe individual task definition
519+
task_def_response = await ecs_api_operation(
520+
api_operation="DescribeTaskDefinition",
521+
api_params={"taskDefinition": task_def_arn, "include": ["TAGS"]},
522+
)
523+
524+
task_def = task_def_response.get("taskDefinition", {})
525+
task_def_config = {
526+
"family": task_def.get("family"),
527+
"task_definition_arn": task_def.get("taskDefinitionArn"),
528+
"revision": task_def.get("revision"),
529+
"status": task_def.get("status"),
530+
"requires_compatibilities": task_def.get("requiresCompatibilities", []),
531+
"network_mode": task_def.get("networkMode"),
532+
"cpu": task_def.get("cpu"),
533+
"memory": task_def.get("memory"),
534+
"task_role_arn": task_def.get("taskRoleArn"),
535+
"execution_role_arn": task_def.get("executionRoleArn"),
536+
"container_definitions": task_def.get("containerDefinitions", []),
537+
"volumes": task_def.get("volumes", []),
538+
"placement_constraints": task_def.get("placementConstraints", []),
539+
"requires_attributes": task_def.get("requiresAttributes", []),
540+
"pid_mode": task_def.get("pidMode"),
541+
"ipc_mode": task_def.get("ipcMode"),
542+
"proxy_configuration": task_def.get("proxyConfiguration", {}),
543+
"inference_accelerators": task_def.get("inferenceAccelerators", []),
544+
"ephemeral_storage": task_def.get("ephemeralStorage", {}),
545+
"runtime_platform": task_def.get("runtimePlatform", {}),
546+
"tags": {tag["key"]: tag["value"] for tag in task_def_response.get("tags", [])},
547+
}
548+
task_definitions_list.append(task_def_config)
549+
550+
except Exception as e:
551+
logger.warning(f"Failed to describe task definition '{task_def_arn}': {e}")
552+
cluster_config["collection_errors"].append(
553+
f"Task definition {task_def_arn}: {str(e)}"
554+
)
555+
556+
cluster_config["task_definitions"] = task_definitions_list
557+
logger.info(
558+
f"Successfully collected {len(task_definitions_list)} task definition configuration(s)"
559+
)
560+
561+
except Exception as e:
562+
error_msg = f"Failed to collect task definition configurations: {str(e)}"
563+
logger.warning(error_msg)
564+
cluster_config["collection_errors"].append(error_msg)
565+
566+
# Step 4: Collect unique security groups (deduplicated from services)
567+
try:
568+
logger.info("Step 4: Deduplicating security group configurations")
569+
unique_security_groups = {}
570+
571+
for service in cluster_config["services"]:
572+
for sg in service.get("security_groups", []):
573+
sg_id = sg.get("group_id")
574+
if sg_id and sg_id not in unique_security_groups:
575+
unique_security_groups[sg_id] = sg
576+
577+
cluster_config["security_groups"] = list(unique_security_groups.values())
578+
logger.info(f"Found {len(unique_security_groups)} unique security group(s)")
579+
580+
except Exception as e:
581+
error_msg = f"Failed to process security groups: {str(e)}"
582+
logger.warning(error_msg)
583+
cluster_config["collection_errors"].append(error_msg)
584+
585+
# Log collection summary
586+
logger.info(
587+
f"Configuration collection complete for '{cluster_name}': "
588+
f"{len(cluster_config['services'])} services, "
589+
f"{len(cluster_config['task_definitions'])} task definitions, "
590+
f"{len(cluster_config['security_groups'])} security groups"
591+
)
592+
593+
if cluster_config["collection_errors"]:
594+
logger.warning(
595+
f"Collection completed with {len(cluster_config['collection_errors'])} error(s)"
596+
)
597+
598+
return cluster_config

0 commit comments

Comments
 (0)