diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..c208433 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,132 @@ +name: lint + +on: + push: + branches: [main] + pull_request: + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "1.7.0" + # Disable the Node wrapper: it buffers stdout/stderr, which causes + # `terraform console` to hang when fed expressions via a pipe in CI. + terraform_wrapper: false + + - name: JSON parse + run: | + set -e + for f in policies/*.json; do + python3 -c "import json; json.load(open('$f'))" \ + || { echo "BROKEN: $f"; exit 1; } + echo "ok: $f" + done + + - name: Substitution + post-substitution lint (round-trip) + run: | + set -e + ACCOUNT_ID="123456789012" + AGENT_ROLE_NAME="loki-agent-role" + IAM_PATH="loki/" + BOUNDARY_POLICY_NAME="LokiPermissionsBoundary" + TRAIL_BUCKET_NAME="my-org-cloudtrail-logs" + KMS_REGION="us-east-1" + TRAIL_KMS_KEY_ID="abcd1234-abcd-1234-abcd-123456789012" + + mkdir -p out + for f in policies/*.json; do + # Longest tokens first to prevent IAM_PATH matching inside IAM_PATHAGENT_ROLE_NAME. + # MIRROR of README.md substitution helper — keep the two in sync. + sed \ + -e "s|IAM_PATHAGENT_ROLE_NAME|${IAM_PATH}${AGENT_ROLE_NAME}|g" \ + -e "s|IAM_PATHLokiPermissionsBoundary|${IAM_PATH}${BOUNDARY_POLICY_NAME}|g" \ + -e "s|IAM_PATH|${IAM_PATH}|g" \ + -e "s|ACCOUNT_ID|${ACCOUNT_ID}|g" \ + -e "s|TRAIL_BUCKET_NAME|${TRAIL_BUCKET_NAME}|g" \ + -e "s|KMS_REGION|${KMS_REGION}|g" \ + -e "s|TRAIL_KMS_KEY_ID|${TRAIL_KMS_KEY_ID}|g" \ + "$f" > "out/$(basename "$f")" + done + + # Lint 1: no placeholders should remain (only check tokens that JSON actually contains) + # Placeholders are uppercase tokens that should not survive substitution. + # No word boundaries: substitution outputs are slash/quote-bounded in JSON, + # and \b doesn't fire between two word chars (e.g. \bIAM_PATH\b would NOT + # match IAM_PATHAGENT_ROLE_NAME because the trailing 'A' is a word char). + if grep -E '(ACCOUNT_ID|AGENT_ROLE_NAME|IAM_PATH|KMS_REGION|TRAIL_BUCKET_NAME|TRAIL_KMS_KEY_ID)' out/*.json; then + echo "Placeholders survived substitution above"; exit 1 + fi + # Lint 2: no double-slash IAM ARNs (catches IAM_PATH leading-slash bug) + if grep -E 'role//|policy//|instance-profile//' out/*.json; then + echo "Double-slash ARN detected above (IAM_PATH substitution bug)"; exit 1 + fi + # Lint 3: still parse as JSON + for f in out/*.json; do python3 -c "import json; json.load(open('$f'))"; done + echo "ok: substitution helper produces valid policies" + + - name: Terraform fmt + working-directory: terraform + run: terraform fmt -check -recursive + + - name: Terraform validate + working-directory: terraform + run: | + terraform init -backend=false -input=false + terraform validate + + - name: JSON ↔ Terraform per-Sid parity (all 3 policies) + # The Terraform module and policies/*.json must encode identical Sid + # → (Action|NotAction) sets. Drift would leave one deployment path + # under-protected (or over-permissive). We render each Terraform-emitted + # statement list via `terraform console` and diff per-Sid action sets. + # Per-Sid (vs union-flat) catches the case where an action moves + # between statements with different Resource scopes. + working-directory: terraform + run: | + set -e + + # Re-init in this step (each GHA step starts in a fresh shell; + # while files persist, the terraform console subcommand needs a + # populated .terraform/ that matches the current init flags). + terraform init -backend=false -input=false + + cat > ci.auto.tfvars <<'TFVARS' + account_id = "123456789012" + trail_bucket_name = "my-org-cloudtrail-logs" + trail_kms_key_arn = "arn:aws:kms:us-east-1:123456789012:key/abcd1234-abcd-1234-abcd-123456789012" + TFVARS + + # (json_file, terraform_expression) tuples — one per policy. + # The expression must render the full Statement list, normalizing + # Action/NotAction lists vs strings. + # `terraform console` is wrapped in `timeout` because it can hang + # on CI runners (no TTY) if init state is incomplete — fail loud + # instead of silently consuming the job's wall-clock budget. + render_tf() { + local out="$1"; local expr="$2" + echo "$expr" | timeout 30 terraform console > "${out}.raw" + if [ ! -s "${out}.raw" ] || grep -qE '^(Error|Warning):' "${out}.raw"; then + echo "terraform console failed for ${out}:"; cat "${out}.raw"; exit 1 + fi + python3 -c 'import json; print(json.loads(open("'${out}'.raw").read().strip()))' > "$out" + rm "${out}.raw" + } + + render_tf tf-deny.json \ + 'jsonencode([for s in concat(local.deny_guardrails_base_statements, [local.deny_trail_storage_statement], [local.deny_trail_kms_statement]) : { Sid = s.Sid, Action = try(tolist(s.Action), can(s.Action) ? [s.Action] : []), NotAction = try(tolist(s.NotAction), can(s.NotAction) ? [s.NotAction] : []) }])' + + render_tf tf-iam-scoped.json \ + 'jsonencode([for s in local.iam_scoped_statements : { Sid = s.Sid, Action = try(tolist(s.Action), can(s.Action) ? [s.Action] : []), NotAction = try(tolist(s.NotAction), can(s.NotAction) ? [s.NotAction] : []) }])' + + render_tf tf-boundary.json \ + 'jsonencode([for s in local.permissions_boundary_statements : { Sid = s.Sid, Action = try(tolist(s.Action), can(s.Action) ? [s.Action] : []), NotAction = try(tolist(s.NotAction), can(s.NotAction) ? [s.NotAction] : []) }])' + + rm ci.auto.tfvars + + python3 ../scripts/check_parity.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..60583cc --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +# Terraform local state and provider cache +**/.terraform/ +**/.terraform.lock.hcl +*.tfstate +*.tfstate.* +*.tfplan +crash.log +crash.*.log + +# Tfvars often contain secrets +*.auto.tfvars +*.tfvars +!example.tfvars + +# OS / editor +.DS_Store +*.swp +*.swo diff --git a/README.md b/README.md index 89127cc..1175851 100644 --- a/README.md +++ b/README.md @@ -111,30 +111,44 @@ Combined with `PowerUserAccess` (AWS managed), this gives the agent full service ## Quick Start +> ⚠️ **Step 0 first.** The `policies/*.json` files contain literal placeholders +> (`ACCOUNT_ID`, `IAM_PATH`, `TRAIL_BUCKET_NAME`, etc.). Handing raw template files +> to `aws iam create-policy` fails with `MalformedPolicyDocument`. Run the +> substitution helper from the [Customization](#customization) section below +> first; it produces resolved `out/*.json` files. The commands below consume +> `out/*.json`, not `policies/*.json`. + ```bash +# 0. Resolve placeholders → out/*.json (see "Customization" section for the helper) +# After running it, you should have: out/permissions-boundary.json, out/iam-scoped.json, +# out/deny-guardrails.json, out/trust-policy.json + # 1. Create the permissions boundary (admin does this) aws iam create-policy \ --policy-name LokiPermissionsBoundary \ --path "/loki/" \ - --policy-document file://policies/permissions-boundary.json + --policy-document file://out/permissions-boundary.json # 2. Create the agent role aws iam create-role \ --role-name loki-agent-role \ - --assume-role-policy-document file://policies/trust-policy.json + --path "/loki/" \ + --assume-role-policy-document file://out/trust-policy.json # 3. Attach all policies aws iam attach-role-policy --role-name loki-agent-role \ --policy-arn arn:aws:iam::aws:policy/PowerUserAccess aws iam put-role-policy --role-name loki-agent-role \ --policy-name LokiIAMScoped \ - --policy-document file://policies/iam-scoped.json + --policy-document file://out/iam-scoped.json aws iam put-role-policy --role-name loki-agent-role \ --policy-name LokiDenyGuardrails \ - --policy-document file://policies/deny-guardrails.json + --policy-document file://out/deny-guardrails.json # 4. Create instance profile and attach to EC2 -aws iam create-instance-profile --instance-profile-name loki-agent-profile +aws iam create-instance-profile \ + --instance-profile-name loki-agent-profile \ + --path "/loki/" aws iam add-role-to-instance-profile \ --instance-profile-name loki-agent-profile \ --role-name loki-agent-role @@ -143,7 +157,26 @@ aws ec2 associate-iam-instance-profile \ --iam-instance-profile Name=loki-agent-profile ``` -See [docs/](docs/) for detailed setup, migration, and Terraform integration guides. +See [docs/](docs/) for detailed policy architecture and Terraform integration notes. + +### Terraform + +```hcl +module "loki_permissions" { + source = "github.com/inceptionstack/loki-permissions//terraform" + + account_id = "123456789012" + agent_role_name = "loki-agent-role" + + # Optional: scoped denies on the audit-trail S3 bucket and KMS key. + # Leave null if you have no CloudTrail or it's unencrypted. + # IMPORTANT: these resources must be managed outside this state. + trail_bucket_name = "my-org-cloudtrail-logs" + trail_kms_key_arn = "arn:aws:kms:us-east-1:123456789012:key/abcd1234-..." +} +``` + +The `trail_kms_key_arn` variable has plan-time validation — partial values (key UUIDs, alias ARNs) are rejected. If your trail is unencrypted, leave it `null` and the `DenyTrailKmsTampering` statement is omitted entirely (preferred over deploying a dead deny). ## Repository Structure @@ -156,10 +189,14 @@ See [docs/](docs/) for detailed setup, migration, and Terraform integration guid ├── terraform/ # Terraform module │ ├── main.tf # Agent role + policies │ ├── variables.tf # Configurable inputs -│ └── outputs.tf # ARNs and names +│ ├── outputs.tf # ARNs and names +│ └── examples/ # Standalone consumer examples (NOT part of module) +│ ├── README.md +│ └── downstream-consumer.tf ├── docs/ -│ ├── policy-design.md # Full policy architecture docs -│ └── migration-guide.md # Step-by-step migration from admin +│ └── policy-design.md # Full policy architecture docs +├── .github/workflows/ +│ └── lint.yml # JSON parse, sub round-trip, TF validate, JSON↔TF parity └── README.md ``` @@ -170,10 +207,74 @@ Before deploying, update these values in the policy files: | Placeholder | Description | Example | |------------|-------------|---------| | `ACCOUNT_ID` | Your AWS account ID | `123456789012` | -| `AGENT_ROLE_NAME` | Name of the agent's IAM role | `loki-agent-role` | -| `BOUNDARY_POLICY_NAME` | Name of the permissions boundary | `LokiPermissionsBoundary` | -| `IAM_PATH` | Path prefix for agent-created roles | `/loki/` | +| `AGENT_ROLE_NAME` | Bare name of the agent's IAM role (no path). The path is supplied separately via `IAM_PATH`. Used by `DenySelfEscalation` together with `IAM_PATH` to build the role ARN. | `loki-agent-role` | +| `IAM_PATH` | Path prefix for agent-created roles. **Substitute with NO leading slash** (e.g. `loki/`) so it composes correctly into ARNs as `role/loki/...`. The Terraform variable accepts the conventional leading-slash form (`/loki/`) and handles ARN composition itself. | `loki/` (in JSON)
`/loki/` (Terraform var) | +| `TRAIL_BUCKET_NAME` | S3 bucket holding CloudTrail logs (used by `DenyTrailStorageTampering`) | `my-org-cloudtrail-logs` | +| `KMS_REGION` | Region of the trail's KMS CMK (used by `DenyTrailKmsTampering`) | `us-east-1` | +| `TRAIL_KMS_KEY_ID` | UUID of the trail's KMS CMK (used by `DenyTrailKmsTampering`) | `abcd1234-...` | + +> ⚠️ **Both `TRAIL_*` placeholders must be replaced with real values before deployment.** A leftover literal placeholder will deploy a syntactically valid statement that matches no resource — silent no-op. If your trail is **unencrypted**, delete the entire `DenyTrailKmsTampering` statement rather than supplying a fake KMS ARN. Likewise, if you have no CloudTrail at all, delete `DenyTrailStorageTampering` and `DenyTrailKmsTampering`. +> +> **Pre-deploy lint** (run after substitution, before `aws iam put-role-policy`): +> +> ```bash +> # 1. No literal placeholders should remain +> ! grep -E 'ACCOUNT_ID|AGENT_ROLE_NAME|IAM_PATH|KMS_REGION|TRAIL_(BUCKET_NAME|KMS_KEY_ID)' out/*.json +> +> # 2. No double-slash ARNs (catches IAM_PATH substituted with leading slash) +> ! grep -E 'role//|policy//|instance-profile//' out/*.json +> +> # 3. Strict JSON parse on the substituted output (templates are checked by CI) +> for f in out/*.json; do python3 -c "import json; json.load(open('$f'))" || echo "BROKEN: $f"; done +> ``` +> +> **Substitution helper** (avoids ordering footguns when tokens share substrings, e.g. `IAM_PATH` is a prefix of `IAM_PATHAGENT_ROLE_NAME`): +> +> ```bash +> # Edit these for your environment +> ACCOUNT_ID="123456789012" +> AGENT_ROLE_NAME="loki-agent-role" +> IAM_PATH="loki/" # NO leading slash for JSON substitution +> TRAIL_BUCKET_NAME="my-org-cloudtrail-logs" +> KMS_REGION="us-east-1" +> TRAIL_KMS_KEY_ID="abcd1234-abcd-1234-abcd-123456789012" +> +> # Substitute longest tokens first — prevents IAM_PATH matching inside IAM_PATHAGENT_ROLE_NAME. +> # PARALLEL to `.github/workflows/lint.yml` substitution step but NOT identical: +> # this README hardcodes "LokiPermissionsBoundary" while CI uses ${BOUNDARY_POLICY_NAME}. +> # The two paths are equivalent for the default boundary name; if the boundary is +> # renamed in Terraform, this CLI flow does not pick it up. (Extract to +> # scripts/substitute.sh if drift becomes a problem in practice.) +> # +> # NOTE: The JSON template (this CLI flow) hardcodes the boundary name +> # "LokiPermissionsBoundary". To use a different boundary name, either +> # (a) deploy via the Terraform module which parameterizes it as +> # var.boundary_policy_name, or (b) edit the literal in policies/*.json +> # before running this helper. +> mkdir -p out +> for f in policies/*.json; do +> sed \ +> -e "s|IAM_PATHAGENT_ROLE_NAME|${IAM_PATH}${AGENT_ROLE_NAME}|g" \ +> -e "s|IAM_PATHLokiPermissionsBoundary|${IAM_PATH}LokiPermissionsBoundary|g" \ +> -e "s|IAM_PATH|${IAM_PATH}|g" \ +> -e "s|ACCOUNT_ID|${ACCOUNT_ID}|g" \ +> -e "s|TRAIL_BUCKET_NAME|${TRAIL_BUCKET_NAME}|g" \ +> -e "s|KMS_REGION|${KMS_REGION}|g" \ +> -e "s|TRAIL_KMS_KEY_ID|${TRAIL_KMS_KEY_ID}|g" \ +> "$f" > "out/$(basename "$f")" +> done +> +> # Then run the lint above against out/*.json +> ``` +> +> The KMS resource is split into `KMS_REGION:ACCOUNT_ID:key/TRAIL_KMS_KEY_ID` rather than a single `TRAIL_KMS_KEY_ARN` placeholder so partial substitution still produces an ARN-shaped string — a common mistake (pasting only the key UUID) at least fails loudly instead of deploying a dead deny. +> +> **Day-2 ops warning:** `DenyTrailStorageTampering` blocks `s3:PutBucketPolicy`, `PutEncryptionConfiguration`, `PutBucketVersioning`, etc. on the trail bucket; `DenyTrailKmsTampering` blocks `kms:PutKeyPolicy`, `ScheduleKeyDeletion`, etc. on the trail's CMK. The trail bucket and KMS key **must be managed outside this agent's Terraform state** (separate state file, separate role, or admin-only). Otherwise day-2 maintenance — KMS key rotation, bucket policy update for new accounts, lifecycle-rule changes — will silently fail with no remediation path until the deny is lifted manually. Recommended layout: a dedicated `audit-trail/` Terraform module owned by the platform/security team, run with an admin role; this `loki-permissions` module references its outputs but never writes to the bucket/key. +> +> **Terraform users:** if you deploy via the `terraform/` module, set `trail_bucket_name` and `trail_kms_key_arn` (full ARN) variables — the module variable validation rejects partial ARNs at plan-time. Leave them `null` to skip the trail-storage and trail-KMS statements entirely. ## License -MIT +Apache License 2.0 — see [LICENSE](LICENSE). + +SPDX-License-Identifier: Apache-2.0 diff --git a/docs/migration-guide.md b/docs/migration-guide.md deleted file mode 100644 index d08dc0b..0000000 --- a/docs/migration-guide.md +++ /dev/null @@ -1,349 +0,0 @@ -# Loki Policy Migration Template — YourCurrentAdminRole → Scoped Permissions - -> Step-by-step migration guide for downgrading an AI agent from full admin to scoped permissions. -> Designed for zero-downtime migration with rollback capability. -> Last updated: 2026-03-16 - ---- - -## Prerequisites - -- [ ] Read `Loki-Policy-Template.md` — understand the target policy architecture -- [ ] Admin access to create the new role and policies (human does this, NOT the agent) -- [ ] List of all existing IAM roles created by Terraform (the agent can generate this) -- [ ] Terraform state access for all managed projects - ---- - -## Phase 1: Inventory (Agent does this) - -### 1.1 List all IAM roles created by the agent's Terraform projects - -```bash -# For each infra repo, find all IAM resources -for REPO in $(aws codecommit list-repositories --query 'repositories[*].repositoryName' --output text); do - echo "=== $REPO ===" - # Clone and scan for IAM resources - git clone /tmp/$REPO 2>/dev/null - grep -r 'aws_iam_role\|aws_iam_policy' /tmp/$REPO/*.tf 2>/dev/null | grep 'resource' -done -``` - -### 1.2 Generate migration manifest - -Create a JSON file listing every role that needs to move to `/loki/` path: - -```json -{ - "migration_date": "2026-03-16", - "account_id": "ACCOUNT_ID", - "roles_to_migrate": [ - { - "project": "myapp", - "current_name": "myapp-enqueue-role", - "current_arn": "arn:aws:iam::ACCOUNT_ID:role/myapp-enqueue-role", - "new_path": "/loki/", - "new_name": "myapp-enqueue-role", - "new_arn": "arn:aws:iam::ACCOUNT_ID:role/loki/myapp-enqueue-role", - "terraform_file": "iam.tf", - "terraform_resource": "aws_iam_role.enqueue", - "services_using_role": ["lambda:myapp-enqueue"] - } - ], - "policies_to_migrate": [], - "instance_profiles_to_migrate": [] -} -``` - -### 1.3 Check for cross-references - -Some roles are referenced by ARN in other services (Lambda function configs, ECS task definitions, etc.). These need to be updated too: - -```bash -# Find all places a role ARN is hardcoded -grep -r "arn:aws:iam.*role/" /tmp/*/ # In Terraform -aws lambda list-functions --query 'Functions[*].{fn: FunctionName, role: Role}' # In Lambda configs -aws ecs list-task-definitions # In ECS task defs -``` - ---- - -## Phase 2: Prepare (Human admin does steps 2.1-2.3, Agent does 2.4) - -### 2.1 Create the new agent role (Human admin) - -```bash -# Create the agent role that will replace YourCurrentAdminRole -aws iam create-role \ - --role-name loki-agent-role \ - --assume-role-policy-document '{ - "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Principal": {"Service": "ec2.amazonaws.com"}, - "Action": "sts:AssumeRole" - }] - }' -``` - -### 2.2 Attach policies to new role (Human admin) - -```bash -# Base: PowerUserAccess -aws iam attach-role-policy --role-name loki-agent-role \ - --policy-arn arn:aws:iam::aws:policy/PowerUserAccess - -# Scoped IAM (from Loki-Policy-Template.md) -aws iam put-role-policy --role-name loki-agent-role \ - --policy-name LokiIAMScoped \ - --policy-document file://loki-iam-scoped.json - -# Deny guardrails (from Loki-Policy-Template.md) -aws iam put-role-policy --role-name loki-agent-role \ - --policy-name LokiDenyGuardrails \ - --policy-document file://loki-deny-guardrails.json -``` - -### 2.3 Create instance profile (Human admin) - -```bash -aws iam create-instance-profile --instance-profile-name your-agent-profile -aws iam add-role-to-instance-profile \ - --instance-profile-name your-agent-profile \ - --role-name loki-agent-role -``` - -### 2.4 Update all Terraform configs (Agent) - -For every Terraform project, update IAM resources to use `/loki/` path: - -```hcl -# Add path = "/loki/" to every aws_iam_role -resource "aws_iam_role" "example" { - name = "my-app-role" - path = "/loki/" # ← ADD THIS - # ... rest unchanged -} - -# Add path = "/loki/" to every aws_iam_policy -resource "aws_iam_policy" "example" { - name = "my-app-policy" - path = "/loki/" # ← ADD THIS - # ... rest unchanged -} - -# Add path = "/loki/" to every aws_iam_instance_profile -resource "aws_iam_instance_profile" "example" { - name = "my-app-profile" - path = "/loki/" # ← ADD THIS - # ... rest unchanged -} -``` - -**Important:** Adding `path` to an existing role is a **destructive change** — Terraform will destroy the old role and create a new one. This means: -- Lambda functions will briefly lose their execution role -- ECS services will need task def updates -- CodePipeline/CodeBuild roles will need re-attachment - ---- - -## Phase 3: Migrate Roles (Agent, one project at a time) - -### Migration Strategy: Parallel Create → Switch → Delete - -To avoid downtime, create new `/loki/` roles alongside old ones, switch services over, then delete old roles. - -### 3.1 Per-project migration steps - -```bash -# For each project (e.g., myapp): - -# Step 1: terraform plan — review what will change -cd /tmp/-infra -terraform plan - -# Step 2: If Terraform shows destroy+create for roles, proceed carefully -# The plan should show: -# - aws_iam_role.xxx will be destroyed (old path) -# - aws_iam_role.xxx will be created (new /loki/ path) - -# Step 3: Apply with -target for IAM resources first -terraform apply -target=aws_iam_role.enqueue -target=aws_iam_role.parser ... - -# Step 4: Apply the rest (Lambda configs will update to new role ARNs) -terraform apply - -# Step 5: Verify all services are working -aws lambda invoke --function-name /dev/null # Test each Lambda -aws codepipeline start-pipeline-execution --name # Test pipeline -``` - -### 3.2 Alternative: Terraform state manipulation (advanced, zero-downtime) - -For critical production services, use `terraform state rm` + `terraform import` to avoid destroy+create: - -```bash -# 1. Manually create new role with /loki/ path via CLI -aws iam create-role --role-name my-role --path /loki/ --assume-role-policy-document ... -aws iam put-role-policy --role-name my-role --policy-name ... --policy-document ... - -# 2. Update Lambda to use new role -aws lambda update-function-configuration --function-name my-fn --role arn:aws:iam::...:role/loki/my-role - -# 3. Remove old resource from Terraform state -terraform state rm aws_iam_role.my_role - -# 4. Import new role into Terraform state -terraform import aws_iam_role.my_role my-role - -# 5. Delete old role manually -aws iam delete-role-policy --role-name my-old-role --policy-name ... -aws iam delete-role --role-name my-old-role -``` - ---- - -## Phase 4: Switch Instance Profile (Human admin) - -**⚠️ This is the critical moment. Do this during a maintenance window.** - -```bash -# 1. Disassociate current instance profile -ASSOC_ID=$(aws ec2 describe-iam-instance-profile-associations \ - --filters "Name=instance-id,Values=i-XXXXXXXXX" \ - --query 'IamInstanceProfileAssociations[0].AssociationId' --output text) - -aws ec2 replace-iam-instance-profile-association \ - --association-id $ASSOC_ID \ - --iam-instance-profile Name=your-agent-profile - -# 2. Verify agent can still operate -# Agent should run verification checklist from Loki-Policy-Template.md -``` - -### Rollback plan - -If anything breaks: -```bash -# Immediately revert to YourCurrentAdminRole -aws ec2 replace-iam-instance-profile-association \ - --association-id $ASSOC_ID \ - --iam-instance-profile Name= -``` - ---- - -## Phase 5: Verify (Agent) - -Run the full verification checklist: - -```bash -echo "=== Positive tests (should succeed) ===" - -# Can create /loki/ roles -aws iam create-role --role-name migration-test --path /loki/ \ - --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' -echo "✅ Create /loki/ role" -aws iam delete-role --role-name migration-test -echo "✅ Delete /loki/ role" - -# Can use PowerUser services -aws s3 ls >/dev/null && echo "✅ S3 access" -aws lambda list-functions --max-items 1 >/dev/null && echo "✅ Lambda access" -aws dynamodb list-tables --max-items 1 >/dev/null && echo "✅ DynamoDB access" - -echo "" -echo "=== Negative tests (should fail with AccessDenied) ===" - -# Cannot create users -aws iam create-user --user-name test-should-fail 2>&1 | grep -q "AccessDenied" && echo "✅ Blocked: create user" - -# Cannot create roles outside /loki/ -aws iam create-role --role-name outside-path-test \ - --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' 2>&1 | grep -q "AccessDenied" && echo "✅ Blocked: role outside /loki/" - -# Cannot modify own role -aws iam attach-role-policy --role-name loki-agent-role \ - --policy-arn arn:aws:iam::aws:policy/AdministratorAccess 2>&1 | grep -q "AccessDenied" && echo "✅ Blocked: self-escalation" - -# Cannot create access keys -aws iam create-access-key --user-name admin 2>&1 | grep -q "AccessDenied" && echo "✅ Blocked: create access key" -``` - ---- - -## Phase 6: Cleanup (Agent) - -```bash -# 1. Delete old IAM roles that are no longer in use -# (Only after verifying all services use /loki/ roles) -for OLD_ROLE in $(cat migration-manifest.json | jq -r '.roles_to_migrate[].current_name'); do - echo "Deleting old role: $OLD_ROLE" - # Remove inline policies first - for POLICY in $(aws iam list-role-policies --role-name $OLD_ROLE --query 'PolicyNames[*]' --output text); do - aws iam delete-role-policy --role-name $OLD_ROLE --policy-name $POLICY - done - # Detach managed policies - for POLICY_ARN in $(aws iam list-attached-role-policies --role-name $OLD_ROLE --query 'AttachedPolicies[*].PolicyArn' --output text); do - aws iam detach-role-policy --role-name $OLD_ROLE --policy-arn $POLICY_ARN - done - # Delete role - aws iam delete-role --role-name $OLD_ROLE -done - -# 2. Remove old instance profile (human admin) -# aws iam remove-role-from-instance-profile ... -# aws iam delete-instance-profile ... - -# 3. Update MEMORY.md and AGENTS.md with new role info -``` - ---- - -## Post-Migration Updates - -### AGENTS.md -Add to Safety section: -```markdown -- **IAM roles must use path `/loki/`** — Terraform `path = "/loki/"` on all `aws_iam_role`, `aws_iam_policy`, and `aws_iam_instance_profile` resources. Agent cannot create roles outside this path. -``` - -### new-project-template.md -Update IAM section to include `/loki/` path requirement. - -### MEMORY.md -Update IAM Role entry: -```markdown -- **IAM Role:** loki-agent-role (PowerUserAccess + LokiIAMScoped + LokiDenyGuardrails) -- **IAM Path:** /loki/ (all Terraform IAM resources must use this path) -``` - ---- - -## Troubleshooting - -| Symptom | Cause | Fix | -|---------|-------|-----| -| `terraform apply` fails with AccessDenied on IAM | Missing `path = "/loki/"` in Terraform | Add `path = "/loki/"` to the resource | -| Lambda fails with "role cannot be assumed" | New role ARN not propagated (IAM eventual consistency) | Wait 10-30 seconds and retry | -| CodePipeline fails | Pipeline role moved but stage configs reference old ARN | Update pipeline stage configs | -| Agent can't `PassRole` | Role is outside `/loki/` path | Move role to `/loki/` path first | -| `terraform plan` shows destroy+create for roles | Path change = new resource | Expected — use parallel create strategy or state manipulation | - ---- - -## Timeline Estimate - -| Phase | Duration | Who | -|-------|----------|-----| -| Phase 1: Inventory | 15 min | Agent | -| Phase 2: Prepare | 30 min | Human (role) + Agent (Terraform) | -| Phase 3: Migrate roles | 15-30 min per project | Agent | -| Phase 4: Switch profile | 5 min | Human | -| Phase 5: Verify | 10 min | Agent | -| Phase 6: Cleanup | 15 min | Agent | -| **Total** | **~2-3 hours** | Mixed | - ---- - -*This is a template. Adjust phases and steps for your specific environment.* diff --git a/docs/policy-design.md b/docs/policy-design.md index 7d16393..a1e5eec 100644 --- a/docs/policy-design.md +++ b/docs/policy-design.md @@ -40,45 +40,20 @@ EC2 Instance Profile The boundary allows all services EXCEPT `iam:*`, `organizations:*`, and `account:*`. This means even if a role has `AdministratorAccess` attached, the effective permissions are capped at PowerUser-level. -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "AllowEverythingExceptDangerous", - "Effect": "Allow", - "NotAction": [ - "iam:*", - "organizations:*", - "account:*" - ], - "Resource": "*" - }, - { - "Sid": "AllowPassRoleOnlyLoki", - "Effect": "Allow", - "Action": "iam:PassRole", - "Resource": "arn:aws:iam::*:role/loki/*" - }, - { - "Sid": "AllowReadOnlyIAM", - "Effect": "Allow", - "Action": [ - "iam:GetRole", - "iam:GetRolePolicy", - "iam:ListRolePolicies", - "iam:ListAttachedRolePolicies" - ], - "Resource": "*" - } - ] -} -``` +> **Canonical source:** [`policies/permissions-boundary.json`](../policies/permissions-boundary.json) (placeholders form) and `terraform/main.tf` `aws_iam_policy.permissions_boundary` (Terraform form). + +| Sid | Effect | What it does | +|------|--------|--------------| +| AllowEverythingExceptDangerous | Allow | `NotAction: [iam:*, organizations:*, account:*]` on `Resource: *` — caps every role attached to the boundary at PowerUser-level | +| AllowPassRoleOnlyAgentRoles | Allow | `iam:PassRole` only to `role/IAM_PATH*` — boundary-attached roles can hand off only to agent-path roles | +| AllowReadOnlyIAM | Allow | Get/List role-policy basics for self-introspection | ### Guardrails That Enforce the Boundary These statements in `LokiDenyGuardrails` ensure the boundary can't be bypassed: +> **Illustrative — see [`policies/deny-guardrails.json`](../policies/deny-guardrails.json) for the canonical form.** The snippet below uses concrete `loki/` / `LokiPermissionsBoundary` literals for readability; the canonical file uses `IAM_PATH` / `IAM_PATHLokiPermissionsBoundary` placeholders. + ```json { "Sid": "DenyCreateRoleWithoutBoundary", @@ -120,102 +95,22 @@ These statements in `LokiDenyGuardrails` ensure the boundary can't be bypassed: Allows the agent to create/manage IAM roles and policies **only under the `/loki/` path**. This lets Terraform create execution roles for Lambda, ECS, CodeBuild, CodePipeline, etc. -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "AllowRoleManagementUnderLokiPath", - "Effect": "Allow", - "Action": [ - "iam:CreateRole", - "iam:DeleteRole", - "iam:GetRole", - "iam:GetRolePolicy", - "iam:ListRolePolicies", - "iam:ListAttachedRolePolicies", - "iam:ListInstanceProfilesForRole", - "iam:TagRole", - "iam:UntagRole", - "iam:ListRoleTags", - "iam:UpdateRole", - "iam:UpdateRoleDescription", - "iam:PutRolePolicy", - "iam:DeleteRolePolicy", - "iam:AttachRolePolicy", - "iam:DetachRolePolicy" - ], - "Resource": "arn:aws:iam::*:role/loki/*" - }, - { - "Sid": "AllowPolicyManagementUnderLokiPath", - "Effect": "Allow", - "Action": [ - "iam:CreatePolicy", - "iam:DeletePolicy", - "iam:GetPolicy", - "iam:GetPolicyVersion", - "iam:ListPolicyVersions", - "iam:CreatePolicyVersion", - "iam:DeletePolicyVersion", - "iam:TagPolicy", - "iam:UntagPolicy" - ], - "Resource": "arn:aws:iam::*:policy/loki/*" - }, - { - "Sid": "AllowInstanceProfileManagementUnderLokiPath", - "Effect": "Allow", - "Action": [ - "iam:CreateInstanceProfile", - "iam:DeleteInstanceProfile", - "iam:GetInstanceProfile", - "iam:AddRoleToInstanceProfile", - "iam:RemoveRoleFromInstanceProfile", - "iam:TagInstanceProfile" - ], - "Resource": "arn:aws:iam::*:instance-profile/loki/*" - }, - { - "Sid": "AllowPassRoleOnlyLokiRoles", - "Effect": "Allow", - "Action": "iam:PassRole", - "Resource": "arn:aws:iam::*:role/loki/*" - }, - { - "Sid": "AllowServiceLinkedRoles", - "Effect": "Allow", - "Action": [ - "iam:CreateServiceLinkedRole", - "iam:DeleteServiceLinkedRole", - "iam:GetServiceLinkedRoleDeletionStatus" - ], - "Resource": "arn:aws:iam::*:role/aws-service-role/*" - }, - { - "Sid": "AllowIAMReadOnly", - "Effect": "Allow", - "Action": [ - "iam:ListRoles", - "iam:ListPolicies", - "iam:ListInstanceProfiles", - "iam:GetAccountSummary", - "iam:GetAccountAuthorizationDetails", - "iam:SimulatePrincipalPolicy", - "iam:ListOpenIDConnectProviders", - "iam:ListSAMLProviders" - ], - "Resource": "*" - } - ] -} -``` +> **Canonical source:** [`policies/iam-scoped.json`](../policies/iam-scoped.json) (placeholders form) and `terraform/main.tf` `aws_iam_role_policy.iam_scoped` (Terraform form). Both must stay in sync; the table below is a Sid-level summary, not a full reproduction. + +| Sid | Effect | Resource | Purpose | +|------|--------|----------|---------| +| AllowRoleManagementUnderAgentPath | Allow | `role/IAM_PATH*` | Create/manage roles only under the agent path | +| AllowPolicyManagementUnderAgentPath | Allow | `policy/IAM_PATH*` | Create/manage policies only under the agent path | +| AllowInstanceProfileManagementUnderAgentPath | Allow | `instance-profile/IAM_PATH*` | Same scope for instance profiles | +| AllowPassRoleOnlyAgentRoles | Allow | `role/IAM_PATH*` | `iam:PassRole` only to agent-created roles | +| AllowServiceLinkedRoles | Allow | `role/aws-service-role/*` | AWS services need to create their own SLRs | +| AllowIAMReadOnly | Allow | `*` | Read-only IAM (Get/List/Simulate) account-wide | ### Important Notes - Replace `*` in the account position of ARNs with your actual AWS account ID for tighter scoping -- The `/loki/` path means all Terraform-created roles must use `path = "/loki/"` in their config -- `PassRole` is restricted to `/loki/*` roles only — the agent can't assign roles it didn't create +- The `IAM_PATH` (e.g. `/loki/` for Terraform, `loki/` for JSON substitution — see README) means all Terraform-created roles must use `path = var.iam_path` in their config +- `PassRole` is restricted to agent-path roles only — the agent can't assign roles it didn't create - Service-linked roles are allowed because AWS services create these automatically --- @@ -225,94 +120,47 @@ This lets Terraform create execution roles for Lambda, ECS, CodeBuild, CodePipel Explicit denies that prevent privilege escalation and dangerous actions. **Deny always wins over Allow** — these can't be bypassed even with PowerUserAccess. -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "DenyIdentityManagement", - "Effect": "Deny", - "Action": [ - "iam:CreateUser", - "iam:DeleteUser", - "iam:CreateGroup", - "iam:DeleteGroup", - "iam:CreateAccessKey", - "iam:DeleteAccessKey", - "iam:CreateLoginProfile", - "iam:DeleteLoginProfile", - "iam:UpdateLoginProfile", - "iam:AddUserToGroup", - "iam:RemoveUserFromGroup", - "iam:AttachUserPolicy", - "iam:DetachUserPolicy", - "iam:PutUserPolicy", - "iam:DeleteUserPolicy", - "iam:AttachGroupPolicy", - "iam:DetachGroupPolicy", - "iam:PutGroupPolicy", - "iam:DeleteGroupPolicy", - "iam:DeactivateMFADevice", - "iam:DeleteVirtualMFADevice" - ], - "Resource": "*" - }, - { - "Sid": "DenySelfEscalation", - "Effect": "Deny", - "Action": [ - "iam:AttachRolePolicy", - "iam:DetachRolePolicy", - "iam:PutRolePolicy", - "iam:DeleteRolePolicy", - "iam:UpdateAssumeRolePolicy", - "iam:DeleteRole" - ], - "Resource": [ - "arn:aws:iam::*:role/YourCurrentAdminRole", - "arn:aws:iam::*:role/loki-agent-role", - "arn:aws:iam::*:instance-profile/your-agent-profile" - ], - "Condition": {} - }, - { - "Sid": "DenyOrganizationsAndAccount", - "Effect": "Deny", - "Action": [ - "organizations:*", - "account:*" - ], - "Resource": "*" - }, - { - "Sid": "DenyRoleManagementOutsideLokiPath", - "Effect": "Deny", - "Action": [ - "iam:CreateRole", - "iam:DeleteRole", - "iam:PutRolePolicy", - "iam:DeleteRolePolicy", - "iam:AttachRolePolicy", - "iam:DetachRolePolicy", - "iam:UpdateAssumeRolePolicy" - ], - "NotResource": [ - "arn:aws:iam::*:role/loki/*", - "arn:aws:iam::*:role/aws-service-role/*" - ] - } - ] -} -``` +> **Two equivalent representations.** This policy is shipped in two forms: +> +> - [`policies/deny-guardrails.json`](../policies/deny-guardrails.json) — raw IAM policy document with literal +> placeholders (`ACCOUNT_ID`, `IAM_PATH`, `AGENT_ROLE_NAME`, etc.). +> Used by the AWS-CLI `Quick Start` flow in the README. +> - `terraform/main.tf` `aws_iam_role_policy.deny_guardrails` — same +> policy expressed via `jsonencode()` over a list of statement objects. +> Used by the `terraform/` module flow. +> +> The two **must stay in sync** — enforced by the per-Sid Action-set parity +> check in `.github/workflows/lint.yml`. The Terraform form is canonical for +> ARN composition (uses `aws_iam_role.agent.arn` directly, no path footgun) +> and gates the trail-storage / trail-KMS statements behind input variables +> with validation. The JSON form is canonical for documentation, review, +> and copy/paste auditing. When changing one, change both; CI fails the PR +> otherwise. + +The table below is a Sid-level summary; consult the canonical files for the full action lists. + +| Sid | Resource scope | Purpose | +|------|----------------|---------| +| DenyIdentityManagement | `*` | No new IAM users, access keys, login profiles, MFA devices | +| DenySelfEscalation | `role/IAM_PATHAGENT_ROLE_NAME` (JSON) / `aws_iam_role.agent.arn` (TF) | Agent cannot mutate its own role (policies, trust, tags, description, boundary) | +| DenyOrganizationsAndAccount | `*` | No `organizations:*` / `account:*` | +| DenyRoleManagementOutsideAgentPath | `NotResource: [role/IAM_PATH*, role/aws-service-role/*]` | Role mutation only inside agent path | +| DenyCreateRoleWithoutBoundary | `role/IAM_PATH*` | New roles must attach the permissions boundary | +| DenyRemovingBoundary | `role/IAM_PATH*` | Cannot remove boundary from agent-path roles | +| DenyBoundaryPolicyModification | `policy/IAM_PATHLokiPermissionsBoundary` | Cannot mutate the boundary policy itself | +| DenyCloudTrailTampering | `*` | Cannot stop/delete/update trails, event-data-stores, channels, selectors, resource policies | +| DenyAuditServiceTampering | `*` | Cannot disable Config/GuardDuty/SecurityHub recorders, members, filters, finding triage | +| DenyTrailStorageTampering | trail S3 bucket | Cannot delete/policy-modify/notify-redirect/object-overwrite the trail bucket | +| DenyTrailKmsTampering | trail KMS CMK | Cannot delete/disable/grant-modify/import-material the trail's CMK | ### Guardrail Explanations | Rule | Why | |------|-----| | DenyIdentityManagement | Agent can't create users, access keys, or login profiles — no new identities | -| DenySelfEscalation | Agent can't modify its own role or instance profile — no privilege escalation | +| DenySelfEscalation | Agent can't modify its own role or instance profile — no privilege escalation. The JSON template builds the role ARN as `role/IAM_PATHAGENT_ROLE_NAME` so the deny works whether the agent role lives at the root or under a path — substitute `IAM_PATH` (e.g. `loki/`) and `AGENT_ROLE_NAME` (e.g. `loki-agent-role`) independently. The Terraform module avoids the placeholder entirely by referencing `aws_iam_role.agent.arn`. | | DenyOrganizationsAndAccount | Agent can't manage the AWS Organization or account settings | -| DenyRoleManagementOutsideLokiPath | Agent can't touch ANY role outside `/loki/*` — protects admin roles, service roles, etc. | +| DenyRoleManagementOutsideAgentPath | Agent can't touch ANY role outside the agent path — protects admin roles, service roles, etc. | --- @@ -348,42 +196,7 @@ resource "aws_iam_policy" "custom" { ## Instance Profile Setup -```bash -# 1. Create the agent role (do this as the human admin, NOT the agent) -aws iam create-role \ - --role-name loki-agent-role \ - --assume-role-policy-document '{ - "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Principal": {"Service": "ec2.amazonaws.com"}, - "Action": "sts:AssumeRole" - }] - }' - -# 2. Attach policies -aws iam attach-role-policy --role-name loki-agent-role \ - --policy-arn arn:aws:iam::aws:policy/PowerUserAccess - -aws iam put-role-policy --role-name loki-agent-role \ - --policy-name LokiIAMScoped \ - --policy-document file://loki-iam-scoped.json - -aws iam put-role-policy --role-name loki-agent-role \ - --policy-name LokiDenyGuardrails \ - --policy-document file://loki-deny-guardrails.json - -# 3. Create instance profile and attach -aws iam create-instance-profile --instance-profile-name your-agent-profile -aws iam add-role-to-instance-profile \ - --instance-profile-name your-agent-profile \ - --role-name loki-agent-role - -# 4. Associate with EC2 instance -aws ec2 associate-iam-instance-profile \ - --instance-id i-XXXXXXXXX \ - --iam-instance-profile Name=your-agent-profile -``` +The authoritative setup flow lives in the [main README's Quick Start](../README.md#quick-start). It includes the substitution helper that resolves placeholders in `policies/*.json` to runnable IAM policy documents under `out/*.json`, then runs `aws iam create-policy` / `create-role` / `put-role-policy` / `create-instance-profile` against the resolved files. Don't duplicate that flow here — single source of truth. --- @@ -418,7 +231,107 @@ aws iam create-role --role-name test-outside-path \ 1. **No privilege escalation** — agent can't modify its own permissions 2. **No lateral movement** — agent can't create users/keys to persist access 3. **Blast radius limited** — agent can only create/modify roles under `/loki/` -4. **Audit trail** — all IAM actions logged in CloudTrail +4. **Audit trail integrity** — all IAM actions logged in CloudTrail. + The agent is explicitly denied: + - CloudTrail tampering: `StopLogging`, `DeleteTrail`, `UpdateTrail`, + `PutEventSelectors`, `PutInsightSelectors`, event-data-store mutation + - Config / GuardDuty / SecurityHub recorder tampering (delete, stop, + overwrite, or disable individual standards/controls) + - Trail S3 bucket tampering: delete, policy/ACL change, object + deletion, lifecycle/versioning rewrite, governance-retention bypass + - Trail KMS key tampering: schedule-deletion, disable, key-policy + rewrite, grant create/revoke + + The S3 bucket and KMS key denies are scoped to specific ARNs + (placeholders `TRAIL_BUCKET_NAME` and `TRAIL_KMS_KEY_ARN` in the + template) so they do not block the agent from working with other + buckets/keys. + + **Trade-offs and caveats:** + + - *CloudTrail denies use `Resource: "*"`.* Most CloudTrail actions + support trail-ARN scoping, but we deny on `*` so the agent cannot + stand up a competing/parallel trail and then disable it. To make + that rationale airtight, the deny also covers `cloudtrail:CreateTrail`, + `CreateEventDataStore`, and `CreateChannel` — the agent cannot + create new audit infrastructure at all. The side effect is that + trail provisioning is an admin/Terraform task, not an agent task. + Adjust if your operating model differs. + + - *KMS grant denies (`CreateGrant` / `RevokeGrant`) cover the + direct path only.* They prevent the agent from issuing or + revoking grants on the trail's CMK. They do **not** affect + service-linked grants created by AWS services on the agent's + behalf (those use `kms:GrantIsForAWSResource`). For the threat + model here (agent blinding its own audit) this is sufficient; + if you also want to prevent service-mediated grant churn, add + a condition `"Bool": {"kms:GrantIsForAWSResource": "false"}`. + + - *Placeholders must be substituted before deployment.* A + literal `TRAIL_BUCKET_NAME` or `TRAIL_KMS_KEY_ID` left in the + policy is syntactically valid but matches nothing — a silent + no-op. If the trail is unencrypted or absent entirely, delete + the corresponding statement rather than supplying a fake ARN. + A pre-deploy lint (`grep -E 'KMS_REGION|TRAIL_(BUCKET_NAME|KMS_KEY_ID)' + policies/*.json`) should return nothing. The KMS resource is + split into `KMS_REGION:ACCOUNT_ID:key/TRAIL_KMS_KEY_ID` rather + than a single `TRAIL_KMS_KEY_ARN` placeholder so partial + substitution still produces an ARN-shaped string — partial + fills fail loudly instead of deploying a dead deny. + + - *Day-2 ops on the trail bucket and CMK are blocked for the agent.* + `s3:PutBucketPolicy`, `PutEncryptionConfiguration`, `PutBucketVersioning`, + `kms:PutKeyPolicy`, `ScheduleKeyDeletion`, `CreateGrant`, etc. + are all denied. The trail bucket and KMS key **must be managed + outside this agent's Terraform state** — use a separate state + file with a separate (admin) role, or treat the audit trail as + unmanaged infra. Otherwise routine maintenance (KMS key rotation, + bucket policy update for a new principal, lifecycle-rule change) + will silently fail with no remediation path until the deny is + lifted manually. Recommended layout: a dedicated `audit-trail/` + module owned by the platform/security team, run with an admin + role; this `loki-permissions` module references its outputs but + never writes to the bucket/key. + + - *Config / GuardDuty / SecurityHub initial setup is also blocked + for the agent.* `DenyAuditServiceTampering` covers + `config:PutConfigurationRecorder` and `config:PutDeliveryChannel` + (so the agent cannot overwrite an existing recorder to point at + a black-hole bucket). The side effect is that *first-time setup* + of these services must also be done outside the agent's Terraform + state — same separation-of-duties pattern as the trail bucket/CMK. + If the agent attempts to enable Config / GuardDuty / SecurityHub + for the first time, the apply fails on these actions; the fix is + to bootstrap them via an admin role and have the agent reference + the resulting infrastructure read-only. + + - *Cross-partition templates.* The JSON and Terraform templates hardcode + `aws` partition (commercial region ARNs). GovCloud (`aws-us-gov`) and + China (`aws-cn`) deployments would require manual partition substitution + throughout all 4 JSON files + Terraform module. **Partition support is + planned as a future enhancement** to thread a `var.aws_partition` parameter + and systematically replace all `arn:aws:` with `arn:${aws_partition}:`. + This is a separate scope from the current audit-trail deny set; users + deploying to non-commercial regions should use this template as a + reference and manually update partitions. + + - *Residual gaps (not currently denied, intentional):* the agent + can still call `cloudtrail:GetTrail` / `LookupEvents` / + `DescribeTrails` for legitimate debugging, and can still create + **new** S3 buckets / KMS keys unrelated to the audit trail. The + deny statements above are surgically targeted at the audit + infrastructure; they do not impose a blanket S3/KMS read-only + posture, which would break the agent's day job. + + - *Triage actions denied (intentional, broad).* `DenyAuditServiceTampering` + denies `securityhub:BatchUpdateFindings` and `guardduty:CreateFilter`/ + `UpdateFilter`/`DeleteFilter` with `Resource: "*"`. These actions can + legitimately be used for triage (mark findings RESOLVED, suppress noise + via filter), but the same actions can also be used to silence findings + about the agent's own activity. We deny broadly because triage is a + human/SOC task, not an agent task. If your operating model needs the + agent to do triage, scope these by `securityhub:ASFFSyntaxPath` / + `guardduty:DetectorId` conditions or move them out of the deny set. 5. **Reversible** — admin can delete `/loki/*` roles to revoke all agent-created permissions 6. **Human retains control** — admin role and instance profile are protected by explicit deny diff --git a/policies/deny-guardrails.json b/policies/deny-guardrails.json index 6bc841d..616249f 100644 --- a/policies/deny-guardrails.json +++ b/policies/deny-guardrails.json @@ -38,10 +38,16 @@ "iam:PutRolePolicy", "iam:DeleteRolePolicy", "iam:UpdateAssumeRolePolicy", - "iam:DeleteRole" + "iam:DeleteRole", + "iam:TagRole", + "iam:UntagRole", + "iam:UpdateRole", + "iam:UpdateRoleDescription", + "iam:PutRolePermissionsBoundary", + "iam:DeleteRolePermissionsBoundary" ], "Resource": [ - "arn:aws:iam::ACCOUNT_ID:role/AGENT_ROLE_NAME" + "arn:aws:iam::ACCOUNT_ID:role/IAM_PATHAGENT_ROLE_NAME" ] }, { @@ -65,7 +71,11 @@ "iam:DetachRolePolicy", "iam:UpdateAssumeRolePolicy", "iam:PutRolePermissionsBoundary", - "iam:DeleteRolePermissionsBoundary" + "iam:DeleteRolePermissionsBoundary", + "iam:TagRole", + "iam:UntagRole", + "iam:UpdateRole", + "iam:UpdateRoleDescription" ], "NotResource": [ "arn:aws:iam::ACCOUNT_ID:role/IAM_PATH*", @@ -102,6 +112,116 @@ "iam:SetDefaultPolicyVersion" ], "Resource": "arn:aws:iam::ACCOUNT_ID:policy/IAM_PATHLokiPermissionsBoundary" + }, + { + "Sid": "DenyCloudTrailTampering", + "Effect": "Deny", + "Action": [ + "cloudtrail:CreateTrail", + "cloudtrail:CreateEventDataStore", + "cloudtrail:CreateChannel", + "cloudtrail:StopLogging", + "cloudtrail:DeleteTrail", + "cloudtrail:UpdateTrail", + "cloudtrail:PutEventSelectors", + "cloudtrail:PutInsightSelectors", + "cloudtrail:PutResourcePolicy", + "cloudtrail:DeleteResourcePolicy", + "cloudtrail:DeleteEventDataStore", + "cloudtrail:UpdateEventDataStore", + "cloudtrail:DeleteChannel", + "cloudtrail:UpdateChannel" + ], + "Resource": "*" + }, + { + "Sid": "DenyAuditServiceTampering", + "Effect": "Deny", + "Action": [ + "config:DeleteConfigurationRecorder", + "config:StopConfigurationRecorder", + "config:PutConfigurationRecorder", + "config:DeleteDeliveryChannel", + "config:PutDeliveryChannel", + "config:DeleteConfigRule", + "config:DeleteConfigurationAggregator", + "config:DeleteOrganizationConfigRule", + "config:DeleteRetentionConfiguration", + "config:DeleteRemediationConfiguration", + "config:DeleteEvaluationResults", + "guardduty:DeleteDetector", + "guardduty:UpdateDetector", + "guardduty:DisassociateFromMasterAccount", + "guardduty:StopMonitoringMembers", + "guardduty:DeletePublishingDestination", + "guardduty:UpdatePublishingDestination", + "guardduty:DisassociateMembers", + "guardduty:DeleteMembers", + "guardduty:UpdateMemberDetectors", + "guardduty:CreateFilter", + "guardduty:UpdateFilter", + "guardduty:DeleteFilter", + "securityhub:DisableSecurityHub", + "securityhub:DisassociateFromMasterAccount", + "securityhub:BatchDisableStandards", + "securityhub:UpdateStandardsControl", + "securityhub:DeleteInsight", + "securityhub:UpdateInsight", + "securityhub:BatchUpdateFindings" + ], + "Resource": "*" + }, + { + "Sid": "DenyTrailStorageTampering", + "Effect": "Deny", + "Action": [ + "s3:DeleteBucket", + "s3:DeleteBucketPolicy", + "s3:PutBucketPolicy", + "s3:PutBucketAcl", + "s3:PutBucketPublicAccessBlock", + "s3:PutBucketOwnershipControls", + "s3:PutBucketNotification", + "s3:PutBucketWebsite", + "s3:PutBucketVersioning", + "s3:PutBucketLogging", + "s3:PutLifecycleConfiguration", + "s3:PutReplicationConfiguration", + "s3:PutEncryptionConfiguration", + "s3:PutBucketObjectLockConfiguration", + "s3:DeleteObject", + "s3:DeleteObjectVersion", + "s3:PutObject", + "s3:PutObjectAcl", + "s3:PutObjectLegalHold", + "s3:PutObjectRetention", + "s3:BypassGovernanceRetention" + ], + "Resource": [ + "arn:aws:s3:::TRAIL_BUCKET_NAME", + "arn:aws:s3:::TRAIL_BUCKET_NAME/*" + ] + }, + { + "Sid": "DenyTrailKmsTampering", + "Effect": "Deny", + "Action": [ + "kms:ScheduleKeyDeletion", + "kms:DisableKey", + "kms:PutKeyPolicy", + "kms:CreateGrant", + "kms:RevokeGrant", + "kms:CancelKeyDeletion", + "kms:UpdateAlias", + "kms:DeleteAlias", + "kms:PutResourcePolicy", + "kms:DeleteResourcePolicy", + "kms:ImportKeyMaterial", + "kms:DeleteImportedKeyMaterial" + ], + "Resource": [ + "arn:aws:kms:KMS_REGION:ACCOUNT_ID:key/TRAIL_KMS_KEY_ID" + ] } ] } diff --git a/scripts/check_parity.py b/scripts/check_parity.py new file mode 100755 index 0000000..0bc1980 --- /dev/null +++ b/scripts/check_parity.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +"""Parity check between policies/*.json and Terraform-rendered statement lists. + +The Terraform module's deny_guardrails / iam_scoped / permissions_boundary +policies and policies/*.json must encode identical Sid → (Action|NotAction) +sets. Drift would leave one deployment path under-protected. + +Usage: + + # Default: assumes you're in terraform/ with rendered tf-*.json there. + python3 ../scripts/check_parity.py + + # Or pass a render directory explicitly: + python3 scripts/check_parity.py --render-dir terraform + +Exits 0 on parity, 1 on drift. + +Note on scope: this verifies Action/NotAction parity per Sid. +Resource/NotResource/Condition are NOT checked because `terraform console` +can't resolve resource refs like `aws_iam_role.agent.arn` without an apply +(returns "known after apply"). Scope correctness is enforced by review + +the JSON template's literal placeholders being humanly auditable. +""" + +import argparse +import json +import sys +import pathlib + +# Repo-relative paths resolved against this script's location, not CWD. +REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent + +# (logical name, JSON source path relative to REPO_ROOT, TF render filename) +POLICIES = [ + ("deny-guardrails", "policies/deny-guardrails.json", "tf-deny.json"), + ("iam-scoped", "policies/iam-scoped.json", "tf-iam-scoped.json"), + ("permissions-boundary", "policies/permissions-boundary.json", "tf-boundary.json"), +] + + +def actions_of(stmt): + """Return (action_set, notaction_set) tuple. Either may be empty. + Handles Action/NotAction being either a list or a string.""" + def to_set(key): + if key not in stmt or stmt[key] is None: + return frozenset() + v = stmt[key] + return frozenset(v) if isinstance(v, list) else frozenset([v]) + return to_set("Action"), to_set("NotAction") + + +def by_sid(stmts): + return {s["Sid"]: actions_of(s) for s in stmts} + + +def main(): + parser = argparse.ArgumentParser(description=__doc__.split("\n")[0]) + parser.add_argument( + "--render-dir", + default="terraform", + help="Directory containing the Terraform-rendered tf-*.json files " + "(default: terraform, resolved relative to repo root).", + ) + args = parser.parse_args() + + render_dir = (REPO_ROOT / args.render_dir).resolve() + + any_drift = False + for name, json_rel, tf_filename in POLICIES: + json_path = REPO_ROOT / json_rel + tf_path = render_dir / tf_filename + + if not tf_path.exists(): + print(f"[{name}] MISSING render file: {tf_path}", file=sys.stderr) + print(f" Run `terraform console` to produce it before this check.", file=sys.stderr) + sys.exit(2) + + j = json.loads(json_path.read_text()) + tf = json.loads(tf_path.read_text()) + js = by_sid(j["Statement"]) + ts = by_sid(tf) + + miss_tf = set(js) - set(ts) + miss_json = set(ts) - set(js) + if miss_tf or miss_json: + any_drift = True + print(f"[{name}] STATEMENT DRIFT:") + if miss_tf: print(f" Sids in JSON only: {sorted(miss_tf)}") + if miss_json: print(f" Sids in TF only: {sorted(miss_json)}") + + for sid in sorted(set(js) & set(ts)): + ja, jna = js[sid] + ta, tna = ts[sid] + if ja != ta: + any_drift = True + print(f"[{name}] ACTION DRIFT in Sid='{sid}':") + if ja - ta: print(f" JSON only: {sorted(ja - ta)}") + if ta - ja: print(f" TF only: {sorted(ta - ja)}") + if jna != tna: + any_drift = True + print(f"[{name}] NOTACTION DRIFT in Sid='{sid}':") + if jna - tna: print(f" JSON only: {sorted(jna - tna)}") + if tna - jna: print(f" TF only: {sorted(tna - jna)}") + + total_a = sum(len(a) for a, _ in js.values()) + total_na = sum(len(na) for _, na in js.values()) + extras = f", {total_na} NotActions" if total_na else "" + print(f"[{name}] {len(js)} statements, {total_a} actions{extras}") + + if any_drift: + sys.exit(1) + print("all policies parity OK") + + +if __name__ == "__main__": + main() diff --git a/terraform/examples/README.md b/terraform/examples/README.md new file mode 100644 index 0000000..30f2359 --- /dev/null +++ b/terraform/examples/README.md @@ -0,0 +1,27 @@ +# Terraform Examples + +This directory contains **standalone, non-module** Terraform snippets that +demonstrate how to author IAM resources from a *consuming* project (a +project that runs *under* the agent role and creates roles for its own +Lambda/CodeBuild/etc.). + +These files are **not** part of the `loki-permissions` module. They are +reference material only. The `terraform/` directory is the actual module; +this `examples/` subdirectory is kept in a subdirectory because Terraform +would treat sibling `.tf` files as part of the same module otherwise +(causing `Duplicate variable declaration` errors when both define +`variable "account_id"`). + +## Files + +- `downstream-consumer.tf` — shows what an agent-spawned Lambda / + CodeBuild / CodePipeline role looks like with the required `path` and + `permissions_boundary` attributes set. Copy/adapt into your project. + +## Usage + +```bash +# In your project, NOT in this repo: +cp terraform/examples/downstream-consumer.tf my-project/iam.tf +# Then edit variables and `terraform apply` from my-project/. +``` diff --git a/terraform/example.tf b/terraform/examples/downstream-consumer.tf similarity index 76% rename from terraform/example.tf rename to terraform/examples/downstream-consumer.tf index 5c7f806..25f2067 100644 --- a/terraform/example.tf +++ b/terraform/examples/downstream-consumer.tf @@ -2,6 +2,18 @@ # # Add this to your project's Terraform to create IAM roles # that comply with the agent's scoped permissions. +# +# ⚠️ IMPORTANT: This example HARDCODES the defaults from the loki-permissions +# module: +# - path = "/loki/" (matches module var.iam_path default) +# - boundary policy name "LokiPermissionsBoundary" (matches module var.boundary_policy_name default) +# +# If your loki-permissions module deployment customizes EITHER of these vars, +# you MUST update the literals below to match. Otherwise: +# - path mismatch → DenyRoleManagementOutsideAgentPath blocks role creation +# - boundary mismatch → DenyCreateRoleWithoutBoundary blocks role creation +# Both fail at apply time with cryptic IAM errors. Parameterize via +# variables if you expect to change them per-environment. variable "account_id" { type = string diff --git a/terraform/main.tf b/terraform/main.tf index 224c2e6..b24ba02 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -1,6 +1,24 @@ locals { - boundary_arn = "arn:aws:iam::${var.account_id}:policy${var.iam_path}${var.boundary_policy_name}" - role_path = var.iam_path + + # --------------------------------------------------------------------------- + # PARTITION LOCK-IN (deferred; revisit if GovCloud / China support is needed) + # --------------------------------------------------------------------------- + # The AWS partition `aws` is hardcoded in 3 places: + # 1. terraform/main.tf — ARN composition (`arn:aws:iam:...`, etc.) + # 2. policies/*.json — inline ARN literals + # 3. terraform/variables.tf — `trail_kms_key_arn` validation regex + # User decision (2026-05-13): no GovCloud/China support in this template. + # If that changes, introduce `var.aws_partition` (default "aws") and thread + # it through all 3 sites — don't fix one in isolation. + # --------------------------------------------------------------------------- + + # Fail-closed safety check: if neither trail var is set, the user + # must explicitly acknowledge they have no CloudTrail to protect. + # Otherwise the deny statements silently disappear while the agent + # keeps PowerUser-level S3/KMS access to whatever audit trail does + # exist in the account. See trail_protection_acknowledged in + # variables.tf. + trail_protection_omitted = var.trail_bucket_name == null && var.trail_kms_key_arn == null } # --- Permissions Boundary --- @@ -13,32 +31,8 @@ resource "aws_iam_policy" "permissions_boundary" { description = "Permissions boundary for AI agent-created roles. Blocks IAM/Orgs/Account." policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - Sid = "AllowEverythingExceptDangerous" - Effect = "Allow" - NotAction = ["iam:*", "organizations:*", "account:*"] - Resource = "*" - }, - { - Sid = "AllowPassRoleOnlyAgentRoles" - Effect = "Allow" - Action = "iam:PassRole" - Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*" - }, - { - Sid = "AllowReadOnlyIAM" - Effect = "Allow" - Action = [ - "iam:GetRole", - "iam:GetRolePolicy", - "iam:ListRolePolicies", - "iam:ListAttachedRolePolicies" - ] - Resource = "*" - } - ] + Version = "2012-10-17" + Statement = local.permissions_boundary_statements }) tags = var.tags @@ -48,6 +42,7 @@ resource "aws_iam_policy" "permissions_boundary" { resource "aws_iam_role" "agent" { name = var.agent_role_name + path = var.iam_path assume_role_policy = jsonencode({ Version = "2012-10-17" @@ -73,72 +68,8 @@ resource "aws_iam_role_policy" "iam_scoped" { role = aws_iam_role.agent.name policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - Sid = "AllowRoleManagementUnderAgentPath" - Effect = "Allow" - Action = [ - "iam:CreateRole", "iam:DeleteRole", "iam:GetRole", - "iam:GetRolePolicy", "iam:ListRolePolicies", - "iam:ListAttachedRolePolicies", "iam:ListInstanceProfilesForRole", - "iam:TagRole", "iam:UntagRole", "iam:ListRoleTags", - "iam:UpdateRole", "iam:UpdateRoleDescription", - "iam:UpdateAssumeRolePolicy", - "iam:PutRolePolicy", "iam:DeleteRolePolicy", - "iam:AttachRolePolicy", "iam:DetachRolePolicy", - "iam:PutRolePermissionsBoundary" - ] - Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*" - }, - { - Sid = "AllowPolicyManagementUnderAgentPath" - Effect = "Allow" - Action = [ - "iam:CreatePolicy", "iam:DeletePolicy", - "iam:GetPolicy", "iam:GetPolicyVersion", - "iam:ListPolicyVersions", "iam:CreatePolicyVersion", - "iam:DeletePolicyVersion", "iam:TagPolicy", "iam:UntagPolicy" - ] - Resource = "arn:aws:iam::${var.account_id}:policy${var.iam_path}*" - }, - { - Sid = "AllowInstanceProfileManagement" - Effect = "Allow" - Action = [ - "iam:CreateInstanceProfile", "iam:DeleteInstanceProfile", - "iam:GetInstanceProfile", "iam:AddRoleToInstanceProfile", - "iam:RemoveRoleFromInstanceProfile", "iam:TagInstanceProfile" - ] - Resource = "arn:aws:iam::${var.account_id}:instance-profile${var.iam_path}*" - }, - { - Sid = "AllowPassRoleOnlyAgentRoles" - Effect = "Allow" - Action = "iam:PassRole" - Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*" - }, - { - Sid = "AllowServiceLinkedRoles" - Effect = "Allow" - Action = [ - "iam:CreateServiceLinkedRole", - "iam:DeleteServiceLinkedRole", - "iam:GetServiceLinkedRoleDeletionStatus" - ] - Resource = "arn:aws:iam::${var.account_id}:role/aws-service-role/*" - }, - { - Sid = "AllowIAMReadOnly" - Effect = "Allow" - Action = [ - "iam:ListRoles", "iam:ListPolicies", "iam:ListInstanceProfiles", - "iam:GetAccountSummary", "iam:SimulatePrincipalPolicy", - "iam:ListOpenIDConnectProviders", "iam:ListSAMLProviders" - ] - Resource = "*" - } - ] + Version = "2012-10-17" + Statement = local.iam_scoped_statements }) } @@ -147,94 +78,296 @@ resource "aws_iam_role_policy" "deny_guardrails" { name = "LokiDenyGuardrails" role = aws_iam_role.agent.name + lifecycle { + precondition { + condition = !local.trail_protection_omitted || var.trail_protection_acknowledged + error_message = "BOTH trail_bucket_name AND trail_kms_key_arn are null — DenyTrailStorageTampering and DenyTrailKmsTampering will NOT be deployed. ** This disables an entire defense layer. ** If your account has a CloudTrail, the agent retains PowerUser-level S3/KMS access to its bucket and CMK; audit-trail tampering will not be blocked. To proceed anyway (e.g. no trail exists, or trail protection is enforced elsewhere), set trail_protection_acknowledged = true — you are responsible for the resulting risk." + } + } + policy = jsonencode({ Version = "2012-10-17" - Statement = [ - { - Sid = "DenyIdentityManagement" - Effect = "Deny" - Action = [ - "iam:CreateUser", "iam:DeleteUser", - "iam:CreateGroup", "iam:DeleteGroup", - "iam:CreateAccessKey", "iam:DeleteAccessKey", - "iam:CreateLoginProfile", "iam:DeleteLoginProfile", "iam:UpdateLoginProfile", - "iam:AddUserToGroup", "iam:RemoveUserFromGroup", - "iam:AttachUserPolicy", "iam:DetachUserPolicy", - "iam:PutUserPolicy", "iam:DeleteUserPolicy", - "iam:AttachGroupPolicy", "iam:DetachGroupPolicy", - "iam:PutGroupPolicy", "iam:DeleteGroupPolicy", - "iam:DeactivateMFADevice", "iam:DeleteVirtualMFADevice" - ] - Resource = "*" - }, - { - Sid = "DenySelfEscalation" - Effect = "Deny" - Action = [ - "iam:AttachRolePolicy", "iam:DetachRolePolicy", - "iam:PutRolePolicy", "iam:DeleteRolePolicy", - "iam:UpdateAssumeRolePolicy", "iam:DeleteRole" - ] - Resource = [aws_iam_role.agent.arn] - }, - { - Sid = "DenyOrganizationsAndAccount" - Effect = "Deny" - Action = ["organizations:*", "account:*"] - Resource = "*" - }, - { - Sid = "DenyRoleManagementOutsideAgentPath" - Effect = "Deny" - Action = [ - "iam:CreateRole", "iam:DeleteRole", - "iam:PutRolePolicy", "iam:DeleteRolePolicy", - "iam:AttachRolePolicy", "iam:DetachRolePolicy", - "iam:UpdateAssumeRolePolicy", - "iam:PutRolePermissionsBoundary", "iam:DeleteRolePermissionsBoundary" - ] - NotResource = [ - "arn:aws:iam::${var.account_id}:role${var.iam_path}*", - "arn:aws:iam::${var.account_id}:role/aws-service-role/*" - ] - }, - { - Sid = "DenyCreateRoleWithoutBoundary" - Effect = "Deny" - Action = "iam:CreateRole" - Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*" - Condition = { - StringNotEquals = { - "iam:PermissionsBoundary" = aws_iam_policy.permissions_boundary.arn - } + Statement = concat( + local.deny_guardrails_base_statements, + var.trail_bucket_name != null ? [local.deny_trail_storage_statement] : [], + var.trail_kms_key_arn != null ? [local.deny_trail_kms_statement] : [] + ) + }) +} + +locals { + permissions_boundary_statements = [ + { + Sid = "AllowEverythingExceptDangerous" + Effect = "Allow" + NotAction = ["iam:*", "organizations:*", "account:*"] + Resource = "*" + }, + { + Sid = "AllowPassRoleOnlyAgentRoles" + Effect = "Allow" + Action = "iam:PassRole" + Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*" + }, + { + Sid = "AllowReadOnlyIAM" + Effect = "Allow" + Action = [ + "iam:GetRole", + "iam:GetRolePolicy", + "iam:ListRolePolicies", + "iam:ListAttachedRolePolicies" + ] + Resource = "*" + } + ] + + iam_scoped_statements = [ + { + Sid = "AllowRoleManagementUnderAgentPath" + Effect = "Allow" + Action = [ + "iam:CreateRole", "iam:DeleteRole", "iam:GetRole", + "iam:GetRolePolicy", "iam:ListRolePolicies", + "iam:ListAttachedRolePolicies", "iam:ListInstanceProfilesForRole", + "iam:TagRole", "iam:UntagRole", "iam:ListRoleTags", + "iam:UpdateRole", "iam:UpdateRoleDescription", + "iam:UpdateAssumeRolePolicy", + "iam:PutRolePolicy", "iam:DeleteRolePolicy", + "iam:AttachRolePolicy", "iam:DetachRolePolicy", + "iam:PutRolePermissionsBoundary" + ] + Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*" + }, + { + Sid = "AllowPolicyManagementUnderAgentPath" + Effect = "Allow" + Action = [ + "iam:CreatePolicy", "iam:DeletePolicy", + "iam:GetPolicy", "iam:GetPolicyVersion", + "iam:ListPolicyVersions", "iam:CreatePolicyVersion", + "iam:DeletePolicyVersion", "iam:TagPolicy", "iam:UntagPolicy" + ] + Resource = "arn:aws:iam::${var.account_id}:policy${var.iam_path}*" + }, + { + Sid = "AllowInstanceProfileManagementUnderAgentPath" + Effect = "Allow" + Action = [ + "iam:CreateInstanceProfile", "iam:DeleteInstanceProfile", + "iam:GetInstanceProfile", "iam:AddRoleToInstanceProfile", + "iam:RemoveRoleFromInstanceProfile", "iam:TagInstanceProfile" + ] + Resource = "arn:aws:iam::${var.account_id}:instance-profile${var.iam_path}*" + }, + { + Sid = "AllowPassRoleOnlyAgentRoles" + Effect = "Allow" + Action = "iam:PassRole" + Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*" + }, + { + Sid = "AllowServiceLinkedRoles" + Effect = "Allow" + Action = [ + "iam:CreateServiceLinkedRole", + "iam:DeleteServiceLinkedRole", + "iam:GetServiceLinkedRoleDeletionStatus" + ] + Resource = "arn:aws:iam::${var.account_id}:role/aws-service-role/*" + }, + { + Sid = "AllowIAMReadOnly" + Effect = "Allow" + Action = [ + "iam:ListRoles", "iam:ListPolicies", "iam:ListInstanceProfiles", + "iam:GetAccountSummary", "iam:SimulatePrincipalPolicy", + "iam:ListOpenIDConnectProviders", "iam:ListSAMLProviders" + ] + Resource = "*" + } + ] + + deny_guardrails_base_statements = [ + { + Sid = "DenyIdentityManagement" + Effect = "Deny" + Action = [ + "iam:CreateUser", "iam:DeleteUser", + "iam:CreateGroup", "iam:DeleteGroup", + "iam:CreateAccessKey", "iam:DeleteAccessKey", + "iam:CreateLoginProfile", "iam:DeleteLoginProfile", "iam:UpdateLoginProfile", + "iam:AddUserToGroup", "iam:RemoveUserFromGroup", + "iam:AttachUserPolicy", "iam:DetachUserPolicy", + "iam:PutUserPolicy", "iam:DeleteUserPolicy", + "iam:AttachGroupPolicy", "iam:DetachGroupPolicy", + "iam:PutGroupPolicy", "iam:DeleteGroupPolicy", + "iam:DeactivateMFADevice", "iam:DeleteVirtualMFADevice" + ] + Resource = "*" + }, + { + Sid = "DenySelfEscalation" + Effect = "Deny" + Action = [ + "iam:AttachRolePolicy", "iam:DetachRolePolicy", + "iam:PutRolePolicy", "iam:DeleteRolePolicy", + "iam:UpdateAssumeRolePolicy", "iam:DeleteRole", + "iam:TagRole", "iam:UntagRole", + "iam:UpdateRole", "iam:UpdateRoleDescription", + "iam:PutRolePermissionsBoundary", "iam:DeleteRolePermissionsBoundary" + ] + Resource = [aws_iam_role.agent.arn] + }, + { + Sid = "DenyOrganizationsAndAccount" + Effect = "Deny" + Action = ["organizations:*", "account:*"] + Resource = "*" + }, + { + Sid = "DenyRoleManagementOutsideAgentPath" + Effect = "Deny" + Action = [ + "iam:CreateRole", "iam:DeleteRole", + "iam:PutRolePolicy", "iam:DeleteRolePolicy", + "iam:AttachRolePolicy", "iam:DetachRolePolicy", + "iam:UpdateAssumeRolePolicy", + "iam:PutRolePermissionsBoundary", "iam:DeleteRolePermissionsBoundary", + "iam:TagRole", "iam:UntagRole", + "iam:UpdateRole", "iam:UpdateRoleDescription" + ] + NotResource = [ + "arn:aws:iam::${var.account_id}:role${var.iam_path}*", + "arn:aws:iam::${var.account_id}:role/aws-service-role/*" + ] + }, + { + Sid = "DenyCreateRoleWithoutBoundary" + Effect = "Deny" + Action = "iam:CreateRole" + Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*" + Condition = { + StringNotEquals = { + "iam:PermissionsBoundary" = aws_iam_policy.permissions_boundary.arn } - }, - { - Sid = "DenyRemovingBoundary" - Effect = "Deny" - Action = [ - "iam:DeleteRolePermissionsBoundary", - "iam:PutRolePermissionsBoundary" - ] - Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*" - }, - { - Sid = "DenyBoundaryPolicyModification" - Effect = "Deny" - Action = [ - "iam:DeletePolicy", "iam:CreatePolicyVersion", - "iam:DeletePolicyVersion", "iam:SetDefaultPolicyVersion" - ] - Resource = aws_iam_policy.permissions_boundary.arn } + }, + { + Sid = "DenyRemovingBoundary" + Effect = "Deny" + Action = [ + "iam:DeleteRolePermissionsBoundary", + "iam:PutRolePermissionsBoundary" + ] + Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*" + }, + { + Sid = "DenyBoundaryPolicyModification" + Effect = "Deny" + Action = [ + "iam:DeletePolicy", "iam:CreatePolicyVersion", + "iam:DeletePolicyVersion", "iam:SetDefaultPolicyVersion" + ] + Resource = aws_iam_policy.permissions_boundary.arn + }, + { + Sid = "DenyCloudTrailTampering" + Effect = "Deny" + Action = [ + "cloudtrail:CreateTrail", "cloudtrail:CreateEventDataStore", + "cloudtrail:CreateChannel", + "cloudtrail:StopLogging", "cloudtrail:DeleteTrail", "cloudtrail:UpdateTrail", + "cloudtrail:PutEventSelectors", "cloudtrail:PutInsightSelectors", + "cloudtrail:PutResourcePolicy", "cloudtrail:DeleteResourcePolicy", + "cloudtrail:DeleteEventDataStore", "cloudtrail:UpdateEventDataStore", + "cloudtrail:DeleteChannel", "cloudtrail:UpdateChannel" + ] + Resource = "*" + }, + { + Sid = "DenyAuditServiceTampering" + Effect = "Deny" + Action = [ + "config:DeleteConfigurationRecorder", "config:StopConfigurationRecorder", + "config:PutConfigurationRecorder", + "config:DeleteDeliveryChannel", "config:PutDeliveryChannel", + "config:DeleteConfigRule", + "config:DeleteConfigurationAggregator", "config:DeleteOrganizationConfigRule", + "config:DeleteRetentionConfiguration", "config:DeleteRemediationConfiguration", + "config:DeleteEvaluationResults", + "guardduty:DeleteDetector", "guardduty:UpdateDetector", + "guardduty:DisassociateFromMasterAccount", "guardduty:StopMonitoringMembers", + "guardduty:DeletePublishingDestination", "guardduty:UpdatePublishingDestination", + "guardduty:DisassociateMembers", "guardduty:DeleteMembers", + "guardduty:UpdateMemberDetectors", + "guardduty:CreateFilter", "guardduty:UpdateFilter", "guardduty:DeleteFilter", + "securityhub:DisableSecurityHub", "securityhub:DisassociateFromMasterAccount", + "securityhub:BatchDisableStandards", "securityhub:UpdateStandardsControl", + "securityhub:DeleteInsight", "securityhub:UpdateInsight", + "securityhub:BatchUpdateFindings" + ] + Resource = "*" + } + ] + + deny_trail_storage_statement = { + Sid = "DenyTrailStorageTampering" + Effect = "Deny" + Action = [ + "s3:DeleteBucket", "s3:DeleteBucketPolicy", "s3:PutBucketPolicy", + "s3:PutBucketAcl", "s3:PutBucketPublicAccessBlock", + "s3:PutBucketOwnershipControls", + "s3:PutBucketNotification", "s3:PutBucketWebsite", + "s3:PutBucketVersioning", "s3:PutBucketLogging", + "s3:PutLifecycleConfiguration", "s3:PutReplicationConfiguration", + "s3:PutEncryptionConfiguration", "s3:PutBucketObjectLockConfiguration", + "s3:DeleteObject", "s3:DeleteObjectVersion", + "s3:PutObject", + "s3:PutObjectAcl", "s3:PutObjectLegalHold", + "s3:PutObjectRetention", "s3:BypassGovernanceRetention" ] - }) + Resource = [ + # coalesce() shields against null when the deny statement is + # gated out by var.trail_bucket_name == null in concat() above. + # Terraform evaluates this local eagerly, so a null var would + # crash the whole plan even though the value is never used. + # The sentinel "INVALID_UNUSED" uses uppercase + underscore (both + # forbidden in real S3 bucket names) so the deploy would not + # match any actual bucket. NOTE: IAM policy *syntax* validation + # would still accept the resulting ARN — the safety here comes + # from the concat() gate, not from the sentinel itself. The + # sentinel is defense-in-depth: if the gate is ever dropped by + # mistake, the resulting deny is a no-op rather than a deny + # against an attacker-controlled bucket name. + "arn:aws:s3:::${coalesce(var.trail_bucket_name, "INVALID_UNUSED")}", + "arn:aws:s3:::${coalesce(var.trail_bucket_name, "INVALID_UNUSED")}/*" + ] + } + + deny_trail_kms_statement = { + Sid = "DenyTrailKmsTampering" + Effect = "Deny" + Action = [ + "kms:ScheduleKeyDeletion", "kms:DisableKey", "kms:PutKeyPolicy", + "kms:CreateGrant", "kms:RevokeGrant", "kms:CancelKeyDeletion", + "kms:UpdateAlias", "kms:DeleteAlias", + "kms:PutResourcePolicy", "kms:DeleteResourcePolicy", + "kms:ImportKeyMaterial", "kms:DeleteImportedKeyMaterial" + ] + # See coalesce() comment on deny_trail_storage_statement.Resource + # above for why the sentinel is needed. The sentinel is ARN-shaped + # but uses an invalid region ("invalid") and all-zero account/key, + # so it deploys cleanly if ever ungated but matches no real key. + Resource = [coalesce(var.trail_kms_key_arn, "arn:aws:kms:invalid:000000000000:key/00000000-0000-0000-0000-000000000000")] + } } # --- Instance Profile --- resource "aws_iam_instance_profile" "agent" { name = "${var.agent_role_name}-profile" + path = var.iam_path role = aws_iam_role.agent.name tags = var.tags } diff --git a/terraform/variables.tf b/terraform/variables.tf index a437127..320861d 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -7,6 +7,11 @@ variable "agent_role_name" { description = "Name for the agent's IAM role" type = string default = "loki-agent-role" + + validation { + condition = can(regex("^[\\w+=,.@-]{1,64}$", var.agent_role_name)) + error_message = "agent_role_name must match IAM naming rules: letters/digits and any of +=,.@-_, 1–64 chars. No slashes, spaces, or colons (would corrupt ARN composition)." + } } variable "iam_path" { @@ -15,8 +20,8 @@ variable "iam_path" { default = "/loki/" validation { - condition = can(regex("^/.*/$", var.iam_path)) - error_message = "iam_path must start and end with /" + condition = can(regex("^/([\\w+=,.@-]+/)+$", var.iam_path)) + error_message = "iam_path must be a valid IAM path (e.g. /loki/ or /loki/sub/) starting and ending with /, with at least one path segment. Empty string and bare root '/' are both rejected because they would widen ARN-scoped allows like 'role/${var.iam_path}*' to 'role/*' (every role in the account)." } } @@ -24,6 +29,11 @@ variable "boundary_policy_name" { description = "Name of the permissions boundary policy" type = string default = "LokiPermissionsBoundary" + + validation { + condition = can(regex("^[\\w+=,.@-]{1,128}$", var.boundary_policy_name)) + error_message = "boundary_policy_name must match IAM policy naming rules: letters/digits and any of +=,.@-_, 1–128 chars. No slashes, spaces, or colons (would corrupt ARN composition)." + } } variable "tags" { @@ -31,3 +41,72 @@ variable "tags" { type = map(string) default = {} } + +variable "trail_bucket_name" { + description = <<-EOT + Name of the S3 bucket holding CloudTrail logs. When set, adds + DenyTrailStorageTampering scoped to this bucket. Leave null to skip + (e.g. if no CloudTrail exists, or the bucket is managed by a + separate Terraform state with its own protections). Must be + managed *outside* this agent's Terraform state — the agent role + will be denied PutBucketPolicy/PutEncryptionConfiguration on it. + + Pass the BARE bucket name (e.g. "my-org-cloudtrail-logs"), NOT a + full S3 ARN. Pasting an ARN produces a malformed deny resource + (arn:aws:s3:::arn:aws:s3:::foo) that silently matches nothing. + EOT + type = string + default = null + + validation { + # S3 bucket-name rules (subset that catches the common mistakes): + # - 3–63 chars + # - lowercase letters, digits, dot, hyphen only + # - must start and end with letter or digit + # - no consecutive dots (S3 rejects "a..b") + # Notably rejects: ARNs (contain colons), uppercase, underscores. + # Does NOT validate IP-format names (192.168.x.x) or xn-- prefix — + # AWS rejects those server-side at apply time. + condition = ( + var.trail_bucket_name == null || + ( + can(regex("^[a-z0-9][a-z0-9.-]{1,61}[a-z0-9]$", var.trail_bucket_name)) && + !can(regex("\\.\\.", var.trail_bucket_name)) + ) + ) + error_message = "trail_bucket_name must be a bare S3 bucket name (3–63 chars, lowercase alphanumerics + dots/hyphens, no colons), not a full ARN. Got: ${var.trail_bucket_name == null ? "" : (var.trail_bucket_name == "" ? "" : var.trail_bucket_name)}" + } +} + +variable "trail_kms_key_arn" { + description = <<-EOT + Full ARN of the KMS CMK encrypting CloudTrail. When set, adds + DenyTrailKmsTampering scoped to this key. Leave null if the trail + is unencrypted or absent. Must be a full key ARN + (arn:aws:kms:REGION:ACCOUNT_ID:key/KEY_ID), not a key UUID or alias + — a partial value yields a silent no-op deny. + EOT + type = string + default = null + + validation { + condition = var.trail_kms_key_arn == null || can(regex("^arn:aws:kms:[a-z0-9-]+:[0-9]{12}:key/(mrk-[a-f0-9]{32}|[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$", var.trail_kms_key_arn)) + error_message = "trail_kms_key_arn must be a full KMS key ARN for AWS commercial region (arn:aws:kms:REGION:ACCOUNT_ID:key/KEY_ID or .../key/mrk-... for multi-region keys), or null. Note: partition support (aws-us-gov, aws-cn) is planned for a future release; currently this policy only supports AWS commercial regions." + } +} + +variable "trail_protection_acknowledged" { + description = <<-EOT + Set to true when you have intentionally left trail_bucket_name AND + trail_kms_key_arn null because the account has no CloudTrail (or the + trail is managed in a way where these denies are inappropriate). + + This is a fail-closed safety check: if both vars are null and this + flag is false, plan/apply errors out. The intent is to prevent the + common case of "forgot to set the trail vars" silently deploying + without audit-trail tampering protection while the agent retains + PowerUser-level S3/KMS access to the (existing) trail bucket/CMK. + EOT + type = bool + default = false +}