diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 0000000..c208433
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,132 @@
+name: lint
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Terraform
+ uses: hashicorp/setup-terraform@v3
+ with:
+ terraform_version: "1.7.0"
+ # Disable the Node wrapper: it buffers stdout/stderr, which causes
+ # `terraform console` to hang when fed expressions via a pipe in CI.
+ terraform_wrapper: false
+
+ - name: JSON parse
+ run: |
+ set -e
+ for f in policies/*.json; do
+ python3 -c "import json; json.load(open('$f'))" \
+ || { echo "BROKEN: $f"; exit 1; }
+ echo "ok: $f"
+ done
+
+ - name: Substitution + post-substitution lint (round-trip)
+ run: |
+ set -e
+ ACCOUNT_ID="123456789012"
+ AGENT_ROLE_NAME="loki-agent-role"
+ IAM_PATH="loki/"
+ BOUNDARY_POLICY_NAME="LokiPermissionsBoundary"
+ TRAIL_BUCKET_NAME="my-org-cloudtrail-logs"
+ KMS_REGION="us-east-1"
+ TRAIL_KMS_KEY_ID="abcd1234-abcd-1234-abcd-123456789012"
+
+ mkdir -p out
+ for f in policies/*.json; do
+ # Longest tokens first to prevent IAM_PATH matching inside IAM_PATHAGENT_ROLE_NAME.
+ # MIRROR of README.md substitution helper — keep the two in sync.
+ sed \
+ -e "s|IAM_PATHAGENT_ROLE_NAME|${IAM_PATH}${AGENT_ROLE_NAME}|g" \
+ -e "s|IAM_PATHLokiPermissionsBoundary|${IAM_PATH}${BOUNDARY_POLICY_NAME}|g" \
+ -e "s|IAM_PATH|${IAM_PATH}|g" \
+ -e "s|ACCOUNT_ID|${ACCOUNT_ID}|g" \
+ -e "s|TRAIL_BUCKET_NAME|${TRAIL_BUCKET_NAME}|g" \
+ -e "s|KMS_REGION|${KMS_REGION}|g" \
+ -e "s|TRAIL_KMS_KEY_ID|${TRAIL_KMS_KEY_ID}|g" \
+ "$f" > "out/$(basename "$f")"
+ done
+
+ # Lint 1: no placeholders should remain (only check tokens that JSON actually contains)
+ # Placeholders are uppercase tokens that should not survive substitution.
+ # No word boundaries: substitution outputs are slash/quote-bounded in JSON,
+ # and \b doesn't fire between two word chars (e.g. \bIAM_PATH\b would NOT
+ # match IAM_PATHAGENT_ROLE_NAME because the trailing 'A' is a word char).
+ if grep -E '(ACCOUNT_ID|AGENT_ROLE_NAME|IAM_PATH|KMS_REGION|TRAIL_BUCKET_NAME|TRAIL_KMS_KEY_ID)' out/*.json; then
+ echo "Placeholders survived substitution above"; exit 1
+ fi
+ # Lint 2: no double-slash IAM ARNs (catches IAM_PATH leading-slash bug)
+ if grep -E 'role//|policy//|instance-profile//' out/*.json; then
+ echo "Double-slash ARN detected above (IAM_PATH substitution bug)"; exit 1
+ fi
+ # Lint 3: still parse as JSON
+ for f in out/*.json; do python3 -c "import json; json.load(open('$f'))"; done
+ echo "ok: substitution helper produces valid policies"
+
+ - name: Terraform fmt
+ working-directory: terraform
+ run: terraform fmt -check -recursive
+
+ - name: Terraform validate
+ working-directory: terraform
+ run: |
+ terraform init -backend=false -input=false
+ terraform validate
+
+ - name: JSON ↔ Terraform per-Sid parity (all 3 policies)
+ # The Terraform module and policies/*.json must encode identical Sid
+ # → (Action|NotAction) sets. Drift would leave one deployment path
+ # under-protected (or over-permissive). We render each Terraform-emitted
+ # statement list via `terraform console` and diff per-Sid action sets.
+ # Per-Sid (vs union-flat) catches the case where an action moves
+ # between statements with different Resource scopes.
+ working-directory: terraform
+ run: |
+ set -e
+
+ # Re-init in this step (each GHA step starts in a fresh shell;
+ # while files persist, the terraform console subcommand needs a
+ # populated .terraform/ that matches the current init flags).
+ terraform init -backend=false -input=false
+
+ cat > ci.auto.tfvars <<'TFVARS'
+ account_id = "123456789012"
+ trail_bucket_name = "my-org-cloudtrail-logs"
+ trail_kms_key_arn = "arn:aws:kms:us-east-1:123456789012:key/abcd1234-abcd-1234-abcd-123456789012"
+ TFVARS
+
+ # (json_file, terraform_expression) tuples — one per policy.
+ # The expression must render the full Statement list, normalizing
+ # Action/NotAction lists vs strings.
+ # `terraform console` is wrapped in `timeout` because it can hang
+ # on CI runners (no TTY) if init state is incomplete — fail loud
+ # instead of silently consuming the job's wall-clock budget.
+ render_tf() {
+ local out="$1"; local expr="$2"
+ echo "$expr" | timeout 30 terraform console > "${out}.raw"
+ if [ ! -s "${out}.raw" ] || grep -qE '^(Error|Warning):' "${out}.raw"; then
+ echo "terraform console failed for ${out}:"; cat "${out}.raw"; exit 1
+ fi
+ python3 -c 'import json; print(json.loads(open("'${out}'.raw").read().strip()))' > "$out"
+ rm "${out}.raw"
+ }
+
+ render_tf tf-deny.json \
+ 'jsonencode([for s in concat(local.deny_guardrails_base_statements, [local.deny_trail_storage_statement], [local.deny_trail_kms_statement]) : { Sid = s.Sid, Action = try(tolist(s.Action), can(s.Action) ? [s.Action] : []), NotAction = try(tolist(s.NotAction), can(s.NotAction) ? [s.NotAction] : []) }])'
+
+ render_tf tf-iam-scoped.json \
+ 'jsonencode([for s in local.iam_scoped_statements : { Sid = s.Sid, Action = try(tolist(s.Action), can(s.Action) ? [s.Action] : []), NotAction = try(tolist(s.NotAction), can(s.NotAction) ? [s.NotAction] : []) }])'
+
+ render_tf tf-boundary.json \
+ 'jsonencode([for s in local.permissions_boundary_statements : { Sid = s.Sid, Action = try(tolist(s.Action), can(s.Action) ? [s.Action] : []), NotAction = try(tolist(s.NotAction), can(s.NotAction) ? [s.NotAction] : []) }])'
+
+ rm ci.auto.tfvars
+
+ python3 ../scripts/check_parity.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..60583cc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,18 @@
+# Terraform local state and provider cache
+**/.terraform/
+**/.terraform.lock.hcl
+*.tfstate
+*.tfstate.*
+*.tfplan
+crash.log
+crash.*.log
+
+# Tfvars often contain secrets
+*.auto.tfvars
+*.tfvars
+!example.tfvars
+
+# OS / editor
+.DS_Store
+*.swp
+*.swo
diff --git a/README.md b/README.md
index 89127cc..1175851 100644
--- a/README.md
+++ b/README.md
@@ -111,30 +111,44 @@ Combined with `PowerUserAccess` (AWS managed), this gives the agent full service
## Quick Start
+> ⚠️ **Step 0 first.** The `policies/*.json` files contain literal placeholders
+> (`ACCOUNT_ID`, `IAM_PATH`, `TRAIL_BUCKET_NAME`, etc.). Handing raw template files
+> to `aws iam create-policy` fails with `MalformedPolicyDocument`. Run the
+> substitution helper from the [Customization](#customization) section below
+> first; it produces resolved `out/*.json` files. The commands below consume
+> `out/*.json`, not `policies/*.json`.
+
```bash
+# 0. Resolve placeholders → out/*.json (see "Customization" section for the helper)
+# After running it, you should have: out/permissions-boundary.json, out/iam-scoped.json,
+# out/deny-guardrails.json, out/trust-policy.json
+
# 1. Create the permissions boundary (admin does this)
aws iam create-policy \
--policy-name LokiPermissionsBoundary \
--path "/loki/" \
- --policy-document file://policies/permissions-boundary.json
+ --policy-document file://out/permissions-boundary.json
# 2. Create the agent role
aws iam create-role \
--role-name loki-agent-role \
- --assume-role-policy-document file://policies/trust-policy.json
+ --path "/loki/" \
+ --assume-role-policy-document file://out/trust-policy.json
# 3. Attach all policies
aws iam attach-role-policy --role-name loki-agent-role \
--policy-arn arn:aws:iam::aws:policy/PowerUserAccess
aws iam put-role-policy --role-name loki-agent-role \
--policy-name LokiIAMScoped \
- --policy-document file://policies/iam-scoped.json
+ --policy-document file://out/iam-scoped.json
aws iam put-role-policy --role-name loki-agent-role \
--policy-name LokiDenyGuardrails \
- --policy-document file://policies/deny-guardrails.json
+ --policy-document file://out/deny-guardrails.json
# 4. Create instance profile and attach to EC2
-aws iam create-instance-profile --instance-profile-name loki-agent-profile
+aws iam create-instance-profile \
+ --instance-profile-name loki-agent-profile \
+ --path "/loki/"
aws iam add-role-to-instance-profile \
--instance-profile-name loki-agent-profile \
--role-name loki-agent-role
@@ -143,7 +157,26 @@ aws ec2 associate-iam-instance-profile \
--iam-instance-profile Name=loki-agent-profile
```
-See [docs/](docs/) for detailed setup, migration, and Terraform integration guides.
+See [docs/](docs/) for detailed policy architecture and Terraform integration notes.
+
+### Terraform
+
+```hcl
+module "loki_permissions" {
+ source = "github.com/inceptionstack/loki-permissions//terraform"
+
+ account_id = "123456789012"
+ agent_role_name = "loki-agent-role"
+
+ # Optional: scoped denies on the audit-trail S3 bucket and KMS key.
+ # Leave null if you have no CloudTrail or it's unencrypted.
+ # IMPORTANT: these resources must be managed outside this state.
+ trail_bucket_name = "my-org-cloudtrail-logs"
+ trail_kms_key_arn = "arn:aws:kms:us-east-1:123456789012:key/abcd1234-..."
+}
+```
+
+The `trail_kms_key_arn` variable has plan-time validation — partial values (key UUIDs, alias ARNs) are rejected. If your trail is unencrypted, leave it `null` and the `DenyTrailKmsTampering` statement is omitted entirely (preferred over deploying a dead deny).
## Repository Structure
@@ -156,10 +189,14 @@ See [docs/](docs/) for detailed setup, migration, and Terraform integration guid
├── terraform/ # Terraform module
│ ├── main.tf # Agent role + policies
│ ├── variables.tf # Configurable inputs
-│ └── outputs.tf # ARNs and names
+│ ├── outputs.tf # ARNs and names
+│ └── examples/ # Standalone consumer examples (NOT part of module)
+│ ├── README.md
+│ └── downstream-consumer.tf
├── docs/
-│ ├── policy-design.md # Full policy architecture docs
-│ └── migration-guide.md # Step-by-step migration from admin
+│ └── policy-design.md # Full policy architecture docs
+├── .github/workflows/
+│ └── lint.yml # JSON parse, sub round-trip, TF validate, JSON↔TF parity
└── README.md
```
@@ -170,10 +207,74 @@ Before deploying, update these values in the policy files:
| Placeholder | Description | Example |
|------------|-------------|---------|
| `ACCOUNT_ID` | Your AWS account ID | `123456789012` |
-| `AGENT_ROLE_NAME` | Name of the agent's IAM role | `loki-agent-role` |
-| `BOUNDARY_POLICY_NAME` | Name of the permissions boundary | `LokiPermissionsBoundary` |
-| `IAM_PATH` | Path prefix for agent-created roles | `/loki/` |
+| `AGENT_ROLE_NAME` | Bare name of the agent's IAM role (no path). The path is supplied separately via `IAM_PATH`. Used by `DenySelfEscalation` together with `IAM_PATH` to build the role ARN. | `loki-agent-role` |
+| `IAM_PATH` | Path prefix for agent-created roles. **Substitute with NO leading slash** (e.g. `loki/`) so it composes correctly into ARNs as `role/loki/...`. The Terraform variable accepts the conventional leading-slash form (`/loki/`) and handles ARN composition itself. | `loki/` (in JSON)
`/loki/` (Terraform var) |
+| `TRAIL_BUCKET_NAME` | S3 bucket holding CloudTrail logs (used by `DenyTrailStorageTampering`) | `my-org-cloudtrail-logs` |
+| `KMS_REGION` | Region of the trail's KMS CMK (used by `DenyTrailKmsTampering`) | `us-east-1` |
+| `TRAIL_KMS_KEY_ID` | UUID of the trail's KMS CMK (used by `DenyTrailKmsTampering`) | `abcd1234-...` |
+
+> ⚠️ **Both `TRAIL_*` placeholders must be replaced with real values before deployment.** A leftover literal placeholder will deploy a syntactically valid statement that matches no resource — silent no-op. If your trail is **unencrypted**, delete the entire `DenyTrailKmsTampering` statement rather than supplying a fake KMS ARN. Likewise, if you have no CloudTrail at all, delete `DenyTrailStorageTampering` and `DenyTrailKmsTampering`.
+>
+> **Pre-deploy lint** (run after substitution, before `aws iam put-role-policy`):
+>
+> ```bash
+> # 1. No literal placeholders should remain
+> ! grep -E 'ACCOUNT_ID|AGENT_ROLE_NAME|IAM_PATH|KMS_REGION|TRAIL_(BUCKET_NAME|KMS_KEY_ID)' out/*.json
+>
+> # 2. No double-slash ARNs (catches IAM_PATH substituted with leading slash)
+> ! grep -E 'role//|policy//|instance-profile//' out/*.json
+>
+> # 3. Strict JSON parse on the substituted output (templates are checked by CI)
+> for f in out/*.json; do python3 -c "import json; json.load(open('$f'))" || echo "BROKEN: $f"; done
+> ```
+>
+> **Substitution helper** (avoids ordering footguns when tokens share substrings, e.g. `IAM_PATH` is a prefix of `IAM_PATHAGENT_ROLE_NAME`):
+>
+> ```bash
+> # Edit these for your environment
+> ACCOUNT_ID="123456789012"
+> AGENT_ROLE_NAME="loki-agent-role"
+> IAM_PATH="loki/" # NO leading slash for JSON substitution
+> TRAIL_BUCKET_NAME="my-org-cloudtrail-logs"
+> KMS_REGION="us-east-1"
+> TRAIL_KMS_KEY_ID="abcd1234-abcd-1234-abcd-123456789012"
+>
+> # Substitute longest tokens first — prevents IAM_PATH matching inside IAM_PATHAGENT_ROLE_NAME.
+> # PARALLEL to `.github/workflows/lint.yml` substitution step but NOT identical:
+> # this README hardcodes "LokiPermissionsBoundary" while CI uses ${BOUNDARY_POLICY_NAME}.
+> # The two paths are equivalent for the default boundary name; if the boundary is
+> # renamed in Terraform, this CLI flow does not pick it up. (Extract to
+> # scripts/substitute.sh if drift becomes a problem in practice.)
+> #
+> # NOTE: The JSON template (this CLI flow) hardcodes the boundary name
+> # "LokiPermissionsBoundary". To use a different boundary name, either
+> # (a) deploy via the Terraform module which parameterizes it as
+> # var.boundary_policy_name, or (b) edit the literal in policies/*.json
+> # before running this helper.
+> mkdir -p out
+> for f in policies/*.json; do
+> sed \
+> -e "s|IAM_PATHAGENT_ROLE_NAME|${IAM_PATH}${AGENT_ROLE_NAME}|g" \
+> -e "s|IAM_PATHLokiPermissionsBoundary|${IAM_PATH}LokiPermissionsBoundary|g" \
+> -e "s|IAM_PATH|${IAM_PATH}|g" \
+> -e "s|ACCOUNT_ID|${ACCOUNT_ID}|g" \
+> -e "s|TRAIL_BUCKET_NAME|${TRAIL_BUCKET_NAME}|g" \
+> -e "s|KMS_REGION|${KMS_REGION}|g" \
+> -e "s|TRAIL_KMS_KEY_ID|${TRAIL_KMS_KEY_ID}|g" \
+> "$f" > "out/$(basename "$f")"
+> done
+>
+> # Then run the lint above against out/*.json
+> ```
+>
+> The KMS resource is split into `KMS_REGION:ACCOUNT_ID:key/TRAIL_KMS_KEY_ID` rather than a single `TRAIL_KMS_KEY_ARN` placeholder so partial substitution still produces an ARN-shaped string — a common mistake (pasting only the key UUID) at least fails loudly instead of deploying a dead deny.
+>
+> **Day-2 ops warning:** `DenyTrailStorageTampering` blocks `s3:PutBucketPolicy`, `PutEncryptionConfiguration`, `PutBucketVersioning`, etc. on the trail bucket; `DenyTrailKmsTampering` blocks `kms:PutKeyPolicy`, `ScheduleKeyDeletion`, etc. on the trail's CMK. The trail bucket and KMS key **must be managed outside this agent's Terraform state** (separate state file, separate role, or admin-only). Otherwise day-2 maintenance — KMS key rotation, bucket policy update for new accounts, lifecycle-rule changes — will silently fail with no remediation path until the deny is lifted manually. Recommended layout: a dedicated `audit-trail/` Terraform module owned by the platform/security team, run with an admin role; this `loki-permissions` module references its outputs but never writes to the bucket/key.
+>
+> **Terraform users:** if you deploy via the `terraform/` module, set `trail_bucket_name` and `trail_kms_key_arn` (full ARN) variables — the module variable validation rejects partial ARNs at plan-time. Leave them `null` to skip the trail-storage and trail-KMS statements entirely.
## License
-MIT
+Apache License 2.0 — see [LICENSE](LICENSE).
+
+SPDX-License-Identifier: Apache-2.0
diff --git a/docs/migration-guide.md b/docs/migration-guide.md
deleted file mode 100644
index d08dc0b..0000000
--- a/docs/migration-guide.md
+++ /dev/null
@@ -1,349 +0,0 @@
-# Loki Policy Migration Template — YourCurrentAdminRole → Scoped Permissions
-
-> Step-by-step migration guide for downgrading an AI agent from full admin to scoped permissions.
-> Designed for zero-downtime migration with rollback capability.
-> Last updated: 2026-03-16
-
----
-
-## Prerequisites
-
-- [ ] Read `Loki-Policy-Template.md` — understand the target policy architecture
-- [ ] Admin access to create the new role and policies (human does this, NOT the agent)
-- [ ] List of all existing IAM roles created by Terraform (the agent can generate this)
-- [ ] Terraform state access for all managed projects
-
----
-
-## Phase 1: Inventory (Agent does this)
-
-### 1.1 List all IAM roles created by the agent's Terraform projects
-
-```bash
-# For each infra repo, find all IAM resources
-for REPO in $(aws codecommit list-repositories --query 'repositories[*].repositoryName' --output text); do
- echo "=== $REPO ==="
- # Clone and scan for IAM resources
- git clone /tmp/$REPO 2>/dev/null
- grep -r 'aws_iam_role\|aws_iam_policy' /tmp/$REPO/*.tf 2>/dev/null | grep 'resource'
-done
-```
-
-### 1.2 Generate migration manifest
-
-Create a JSON file listing every role that needs to move to `/loki/` path:
-
-```json
-{
- "migration_date": "2026-03-16",
- "account_id": "ACCOUNT_ID",
- "roles_to_migrate": [
- {
- "project": "myapp",
- "current_name": "myapp-enqueue-role",
- "current_arn": "arn:aws:iam::ACCOUNT_ID:role/myapp-enqueue-role",
- "new_path": "/loki/",
- "new_name": "myapp-enqueue-role",
- "new_arn": "arn:aws:iam::ACCOUNT_ID:role/loki/myapp-enqueue-role",
- "terraform_file": "iam.tf",
- "terraform_resource": "aws_iam_role.enqueue",
- "services_using_role": ["lambda:myapp-enqueue"]
- }
- ],
- "policies_to_migrate": [],
- "instance_profiles_to_migrate": []
-}
-```
-
-### 1.3 Check for cross-references
-
-Some roles are referenced by ARN in other services (Lambda function configs, ECS task definitions, etc.). These need to be updated too:
-
-```bash
-# Find all places a role ARN is hardcoded
-grep -r "arn:aws:iam.*role/" /tmp/*/ # In Terraform
-aws lambda list-functions --query 'Functions[*].{fn: FunctionName, role: Role}' # In Lambda configs
-aws ecs list-task-definitions # In ECS task defs
-```
-
----
-
-## Phase 2: Prepare (Human admin does steps 2.1-2.3, Agent does 2.4)
-
-### 2.1 Create the new agent role (Human admin)
-
-```bash
-# Create the agent role that will replace YourCurrentAdminRole
-aws iam create-role \
- --role-name loki-agent-role \
- --assume-role-policy-document '{
- "Version": "2012-10-17",
- "Statement": [{
- "Effect": "Allow",
- "Principal": {"Service": "ec2.amazonaws.com"},
- "Action": "sts:AssumeRole"
- }]
- }'
-```
-
-### 2.2 Attach policies to new role (Human admin)
-
-```bash
-# Base: PowerUserAccess
-aws iam attach-role-policy --role-name loki-agent-role \
- --policy-arn arn:aws:iam::aws:policy/PowerUserAccess
-
-# Scoped IAM (from Loki-Policy-Template.md)
-aws iam put-role-policy --role-name loki-agent-role \
- --policy-name LokiIAMScoped \
- --policy-document file://loki-iam-scoped.json
-
-# Deny guardrails (from Loki-Policy-Template.md)
-aws iam put-role-policy --role-name loki-agent-role \
- --policy-name LokiDenyGuardrails \
- --policy-document file://loki-deny-guardrails.json
-```
-
-### 2.3 Create instance profile (Human admin)
-
-```bash
-aws iam create-instance-profile --instance-profile-name your-agent-profile
-aws iam add-role-to-instance-profile \
- --instance-profile-name your-agent-profile \
- --role-name loki-agent-role
-```
-
-### 2.4 Update all Terraform configs (Agent)
-
-For every Terraform project, update IAM resources to use `/loki/` path:
-
-```hcl
-# Add path = "/loki/" to every aws_iam_role
-resource "aws_iam_role" "example" {
- name = "my-app-role"
- path = "/loki/" # ← ADD THIS
- # ... rest unchanged
-}
-
-# Add path = "/loki/" to every aws_iam_policy
-resource "aws_iam_policy" "example" {
- name = "my-app-policy"
- path = "/loki/" # ← ADD THIS
- # ... rest unchanged
-}
-
-# Add path = "/loki/" to every aws_iam_instance_profile
-resource "aws_iam_instance_profile" "example" {
- name = "my-app-profile"
- path = "/loki/" # ← ADD THIS
- # ... rest unchanged
-}
-```
-
-**Important:** Adding `path` to an existing role is a **destructive change** — Terraform will destroy the old role and create a new one. This means:
-- Lambda functions will briefly lose their execution role
-- ECS services will need task def updates
-- CodePipeline/CodeBuild roles will need re-attachment
-
----
-
-## Phase 3: Migrate Roles (Agent, one project at a time)
-
-### Migration Strategy: Parallel Create → Switch → Delete
-
-To avoid downtime, create new `/loki/` roles alongside old ones, switch services over, then delete old roles.
-
-### 3.1 Per-project migration steps
-
-```bash
-# For each project (e.g., myapp):
-
-# Step 1: terraform plan — review what will change
-cd /tmp/-infra
-terraform plan
-
-# Step 2: If Terraform shows destroy+create for roles, proceed carefully
-# The plan should show:
-# - aws_iam_role.xxx will be destroyed (old path)
-# - aws_iam_role.xxx will be created (new /loki/ path)
-
-# Step 3: Apply with -target for IAM resources first
-terraform apply -target=aws_iam_role.enqueue -target=aws_iam_role.parser ...
-
-# Step 4: Apply the rest (Lambda configs will update to new role ARNs)
-terraform apply
-
-# Step 5: Verify all services are working
-aws lambda invoke --function-name /dev/null # Test each Lambda
-aws codepipeline start-pipeline-execution --name # Test pipeline
-```
-
-### 3.2 Alternative: Terraform state manipulation (advanced, zero-downtime)
-
-For critical production services, use `terraform state rm` + `terraform import` to avoid destroy+create:
-
-```bash
-# 1. Manually create new role with /loki/ path via CLI
-aws iam create-role --role-name my-role --path /loki/ --assume-role-policy-document ...
-aws iam put-role-policy --role-name my-role --policy-name ... --policy-document ...
-
-# 2. Update Lambda to use new role
-aws lambda update-function-configuration --function-name my-fn --role arn:aws:iam::...:role/loki/my-role
-
-# 3. Remove old resource from Terraform state
-terraform state rm aws_iam_role.my_role
-
-# 4. Import new role into Terraform state
-terraform import aws_iam_role.my_role my-role
-
-# 5. Delete old role manually
-aws iam delete-role-policy --role-name my-old-role --policy-name ...
-aws iam delete-role --role-name my-old-role
-```
-
----
-
-## Phase 4: Switch Instance Profile (Human admin)
-
-**⚠️ This is the critical moment. Do this during a maintenance window.**
-
-```bash
-# 1. Disassociate current instance profile
-ASSOC_ID=$(aws ec2 describe-iam-instance-profile-associations \
- --filters "Name=instance-id,Values=i-XXXXXXXXX" \
- --query 'IamInstanceProfileAssociations[0].AssociationId' --output text)
-
-aws ec2 replace-iam-instance-profile-association \
- --association-id $ASSOC_ID \
- --iam-instance-profile Name=your-agent-profile
-
-# 2. Verify agent can still operate
-# Agent should run verification checklist from Loki-Policy-Template.md
-```
-
-### Rollback plan
-
-If anything breaks:
-```bash
-# Immediately revert to YourCurrentAdminRole
-aws ec2 replace-iam-instance-profile-association \
- --association-id $ASSOC_ID \
- --iam-instance-profile Name=
-```
-
----
-
-## Phase 5: Verify (Agent)
-
-Run the full verification checklist:
-
-```bash
-echo "=== Positive tests (should succeed) ==="
-
-# Can create /loki/ roles
-aws iam create-role --role-name migration-test --path /loki/ \
- --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}'
-echo "✅ Create /loki/ role"
-aws iam delete-role --role-name migration-test
-echo "✅ Delete /loki/ role"
-
-# Can use PowerUser services
-aws s3 ls >/dev/null && echo "✅ S3 access"
-aws lambda list-functions --max-items 1 >/dev/null && echo "✅ Lambda access"
-aws dynamodb list-tables --max-items 1 >/dev/null && echo "✅ DynamoDB access"
-
-echo ""
-echo "=== Negative tests (should fail with AccessDenied) ==="
-
-# Cannot create users
-aws iam create-user --user-name test-should-fail 2>&1 | grep -q "AccessDenied" && echo "✅ Blocked: create user"
-
-# Cannot create roles outside /loki/
-aws iam create-role --role-name outside-path-test \
- --assume-role-policy-document '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"lambda.amazonaws.com"},"Action":"sts:AssumeRole"}]}' 2>&1 | grep -q "AccessDenied" && echo "✅ Blocked: role outside /loki/"
-
-# Cannot modify own role
-aws iam attach-role-policy --role-name loki-agent-role \
- --policy-arn arn:aws:iam::aws:policy/AdministratorAccess 2>&1 | grep -q "AccessDenied" && echo "✅ Blocked: self-escalation"
-
-# Cannot create access keys
-aws iam create-access-key --user-name admin 2>&1 | grep -q "AccessDenied" && echo "✅ Blocked: create access key"
-```
-
----
-
-## Phase 6: Cleanup (Agent)
-
-```bash
-# 1. Delete old IAM roles that are no longer in use
-# (Only after verifying all services use /loki/ roles)
-for OLD_ROLE in $(cat migration-manifest.json | jq -r '.roles_to_migrate[].current_name'); do
- echo "Deleting old role: $OLD_ROLE"
- # Remove inline policies first
- for POLICY in $(aws iam list-role-policies --role-name $OLD_ROLE --query 'PolicyNames[*]' --output text); do
- aws iam delete-role-policy --role-name $OLD_ROLE --policy-name $POLICY
- done
- # Detach managed policies
- for POLICY_ARN in $(aws iam list-attached-role-policies --role-name $OLD_ROLE --query 'AttachedPolicies[*].PolicyArn' --output text); do
- aws iam detach-role-policy --role-name $OLD_ROLE --policy-arn $POLICY_ARN
- done
- # Delete role
- aws iam delete-role --role-name $OLD_ROLE
-done
-
-# 2. Remove old instance profile (human admin)
-# aws iam remove-role-from-instance-profile ...
-# aws iam delete-instance-profile ...
-
-# 3. Update MEMORY.md and AGENTS.md with new role info
-```
-
----
-
-## Post-Migration Updates
-
-### AGENTS.md
-Add to Safety section:
-```markdown
-- **IAM roles must use path `/loki/`** — Terraform `path = "/loki/"` on all `aws_iam_role`, `aws_iam_policy`, and `aws_iam_instance_profile` resources. Agent cannot create roles outside this path.
-```
-
-### new-project-template.md
-Update IAM section to include `/loki/` path requirement.
-
-### MEMORY.md
-Update IAM Role entry:
-```markdown
-- **IAM Role:** loki-agent-role (PowerUserAccess + LokiIAMScoped + LokiDenyGuardrails)
-- **IAM Path:** /loki/ (all Terraform IAM resources must use this path)
-```
-
----
-
-## Troubleshooting
-
-| Symptom | Cause | Fix |
-|---------|-------|-----|
-| `terraform apply` fails with AccessDenied on IAM | Missing `path = "/loki/"` in Terraform | Add `path = "/loki/"` to the resource |
-| Lambda fails with "role cannot be assumed" | New role ARN not propagated (IAM eventual consistency) | Wait 10-30 seconds and retry |
-| CodePipeline fails | Pipeline role moved but stage configs reference old ARN | Update pipeline stage configs |
-| Agent can't `PassRole` | Role is outside `/loki/` path | Move role to `/loki/` path first |
-| `terraform plan` shows destroy+create for roles | Path change = new resource | Expected — use parallel create strategy or state manipulation |
-
----
-
-## Timeline Estimate
-
-| Phase | Duration | Who |
-|-------|----------|-----|
-| Phase 1: Inventory | 15 min | Agent |
-| Phase 2: Prepare | 30 min | Human (role) + Agent (Terraform) |
-| Phase 3: Migrate roles | 15-30 min per project | Agent |
-| Phase 4: Switch profile | 5 min | Human |
-| Phase 5: Verify | 10 min | Agent |
-| Phase 6: Cleanup | 15 min | Agent |
-| **Total** | **~2-3 hours** | Mixed |
-
----
-
-*This is a template. Adjust phases and steps for your specific environment.*
diff --git a/docs/policy-design.md b/docs/policy-design.md
index 7d16393..a1e5eec 100644
--- a/docs/policy-design.md
+++ b/docs/policy-design.md
@@ -40,45 +40,20 @@ EC2 Instance Profile
The boundary allows all services EXCEPT `iam:*`, `organizations:*`, and `account:*`. This means even if a role has `AdministratorAccess` attached, the effective permissions are capped at PowerUser-level.
-```json
-{
- "Version": "2012-10-17",
- "Statement": [
- {
- "Sid": "AllowEverythingExceptDangerous",
- "Effect": "Allow",
- "NotAction": [
- "iam:*",
- "organizations:*",
- "account:*"
- ],
- "Resource": "*"
- },
- {
- "Sid": "AllowPassRoleOnlyLoki",
- "Effect": "Allow",
- "Action": "iam:PassRole",
- "Resource": "arn:aws:iam::*:role/loki/*"
- },
- {
- "Sid": "AllowReadOnlyIAM",
- "Effect": "Allow",
- "Action": [
- "iam:GetRole",
- "iam:GetRolePolicy",
- "iam:ListRolePolicies",
- "iam:ListAttachedRolePolicies"
- ],
- "Resource": "*"
- }
- ]
-}
-```
+> **Canonical source:** [`policies/permissions-boundary.json`](../policies/permissions-boundary.json) (placeholders form) and `terraform/main.tf` `aws_iam_policy.permissions_boundary` (Terraform form).
+
+| Sid | Effect | What it does |
+|------|--------|--------------|
+| AllowEverythingExceptDangerous | Allow | `NotAction: [iam:*, organizations:*, account:*]` on `Resource: *` — caps every role attached to the boundary at PowerUser-level |
+| AllowPassRoleOnlyAgentRoles | Allow | `iam:PassRole` only to `role/IAM_PATH*` — boundary-attached roles can hand off only to agent-path roles |
+| AllowReadOnlyIAM | Allow | Get/List role-policy basics for self-introspection |
### Guardrails That Enforce the Boundary
These statements in `LokiDenyGuardrails` ensure the boundary can't be bypassed:
+> **Illustrative — see [`policies/deny-guardrails.json`](../policies/deny-guardrails.json) for the canonical form.** The snippet below uses concrete `loki/` / `LokiPermissionsBoundary` literals for readability; the canonical file uses `IAM_PATH` / `IAM_PATHLokiPermissionsBoundary` placeholders.
+
```json
{
"Sid": "DenyCreateRoleWithoutBoundary",
@@ -120,102 +95,22 @@ These statements in `LokiDenyGuardrails` ensure the boundary can't be bypassed:
Allows the agent to create/manage IAM roles and policies **only under the `/loki/` path**.
This lets Terraform create execution roles for Lambda, ECS, CodeBuild, CodePipeline, etc.
-```json
-{
- "Version": "2012-10-17",
- "Statement": [
- {
- "Sid": "AllowRoleManagementUnderLokiPath",
- "Effect": "Allow",
- "Action": [
- "iam:CreateRole",
- "iam:DeleteRole",
- "iam:GetRole",
- "iam:GetRolePolicy",
- "iam:ListRolePolicies",
- "iam:ListAttachedRolePolicies",
- "iam:ListInstanceProfilesForRole",
- "iam:TagRole",
- "iam:UntagRole",
- "iam:ListRoleTags",
- "iam:UpdateRole",
- "iam:UpdateRoleDescription",
- "iam:PutRolePolicy",
- "iam:DeleteRolePolicy",
- "iam:AttachRolePolicy",
- "iam:DetachRolePolicy"
- ],
- "Resource": "arn:aws:iam::*:role/loki/*"
- },
- {
- "Sid": "AllowPolicyManagementUnderLokiPath",
- "Effect": "Allow",
- "Action": [
- "iam:CreatePolicy",
- "iam:DeletePolicy",
- "iam:GetPolicy",
- "iam:GetPolicyVersion",
- "iam:ListPolicyVersions",
- "iam:CreatePolicyVersion",
- "iam:DeletePolicyVersion",
- "iam:TagPolicy",
- "iam:UntagPolicy"
- ],
- "Resource": "arn:aws:iam::*:policy/loki/*"
- },
- {
- "Sid": "AllowInstanceProfileManagementUnderLokiPath",
- "Effect": "Allow",
- "Action": [
- "iam:CreateInstanceProfile",
- "iam:DeleteInstanceProfile",
- "iam:GetInstanceProfile",
- "iam:AddRoleToInstanceProfile",
- "iam:RemoveRoleFromInstanceProfile",
- "iam:TagInstanceProfile"
- ],
- "Resource": "arn:aws:iam::*:instance-profile/loki/*"
- },
- {
- "Sid": "AllowPassRoleOnlyLokiRoles",
- "Effect": "Allow",
- "Action": "iam:PassRole",
- "Resource": "arn:aws:iam::*:role/loki/*"
- },
- {
- "Sid": "AllowServiceLinkedRoles",
- "Effect": "Allow",
- "Action": [
- "iam:CreateServiceLinkedRole",
- "iam:DeleteServiceLinkedRole",
- "iam:GetServiceLinkedRoleDeletionStatus"
- ],
- "Resource": "arn:aws:iam::*:role/aws-service-role/*"
- },
- {
- "Sid": "AllowIAMReadOnly",
- "Effect": "Allow",
- "Action": [
- "iam:ListRoles",
- "iam:ListPolicies",
- "iam:ListInstanceProfiles",
- "iam:GetAccountSummary",
- "iam:GetAccountAuthorizationDetails",
- "iam:SimulatePrincipalPolicy",
- "iam:ListOpenIDConnectProviders",
- "iam:ListSAMLProviders"
- ],
- "Resource": "*"
- }
- ]
-}
-```
+> **Canonical source:** [`policies/iam-scoped.json`](../policies/iam-scoped.json) (placeholders form) and `terraform/main.tf` `aws_iam_role_policy.iam_scoped` (Terraform form). Both must stay in sync; the table below is a Sid-level summary, not a full reproduction.
+
+| Sid | Effect | Resource | Purpose |
+|------|--------|----------|---------|
+| AllowRoleManagementUnderAgentPath | Allow | `role/IAM_PATH*` | Create/manage roles only under the agent path |
+| AllowPolicyManagementUnderAgentPath | Allow | `policy/IAM_PATH*` | Create/manage policies only under the agent path |
+| AllowInstanceProfileManagementUnderAgentPath | Allow | `instance-profile/IAM_PATH*` | Same scope for instance profiles |
+| AllowPassRoleOnlyAgentRoles | Allow | `role/IAM_PATH*` | `iam:PassRole` only to agent-created roles |
+| AllowServiceLinkedRoles | Allow | `role/aws-service-role/*` | AWS services need to create their own SLRs |
+| AllowIAMReadOnly | Allow | `*` | Read-only IAM (Get/List/Simulate) account-wide |
### Important Notes
- Replace `*` in the account position of ARNs with your actual AWS account ID for tighter scoping
-- The `/loki/` path means all Terraform-created roles must use `path = "/loki/"` in their config
-- `PassRole` is restricted to `/loki/*` roles only — the agent can't assign roles it didn't create
+- The `IAM_PATH` (e.g. `/loki/` for Terraform, `loki/` for JSON substitution — see README) means all Terraform-created roles must use `path = var.iam_path` in their config
+- `PassRole` is restricted to agent-path roles only — the agent can't assign roles it didn't create
- Service-linked roles are allowed because AWS services create these automatically
---
@@ -225,94 +120,47 @@ This lets Terraform create execution roles for Lambda, ECS, CodeBuild, CodePipel
Explicit denies that prevent privilege escalation and dangerous actions.
**Deny always wins over Allow** — these can't be bypassed even with PowerUserAccess.
-```json
-{
- "Version": "2012-10-17",
- "Statement": [
- {
- "Sid": "DenyIdentityManagement",
- "Effect": "Deny",
- "Action": [
- "iam:CreateUser",
- "iam:DeleteUser",
- "iam:CreateGroup",
- "iam:DeleteGroup",
- "iam:CreateAccessKey",
- "iam:DeleteAccessKey",
- "iam:CreateLoginProfile",
- "iam:DeleteLoginProfile",
- "iam:UpdateLoginProfile",
- "iam:AddUserToGroup",
- "iam:RemoveUserFromGroup",
- "iam:AttachUserPolicy",
- "iam:DetachUserPolicy",
- "iam:PutUserPolicy",
- "iam:DeleteUserPolicy",
- "iam:AttachGroupPolicy",
- "iam:DetachGroupPolicy",
- "iam:PutGroupPolicy",
- "iam:DeleteGroupPolicy",
- "iam:DeactivateMFADevice",
- "iam:DeleteVirtualMFADevice"
- ],
- "Resource": "*"
- },
- {
- "Sid": "DenySelfEscalation",
- "Effect": "Deny",
- "Action": [
- "iam:AttachRolePolicy",
- "iam:DetachRolePolicy",
- "iam:PutRolePolicy",
- "iam:DeleteRolePolicy",
- "iam:UpdateAssumeRolePolicy",
- "iam:DeleteRole"
- ],
- "Resource": [
- "arn:aws:iam::*:role/YourCurrentAdminRole",
- "arn:aws:iam::*:role/loki-agent-role",
- "arn:aws:iam::*:instance-profile/your-agent-profile"
- ],
- "Condition": {}
- },
- {
- "Sid": "DenyOrganizationsAndAccount",
- "Effect": "Deny",
- "Action": [
- "organizations:*",
- "account:*"
- ],
- "Resource": "*"
- },
- {
- "Sid": "DenyRoleManagementOutsideLokiPath",
- "Effect": "Deny",
- "Action": [
- "iam:CreateRole",
- "iam:DeleteRole",
- "iam:PutRolePolicy",
- "iam:DeleteRolePolicy",
- "iam:AttachRolePolicy",
- "iam:DetachRolePolicy",
- "iam:UpdateAssumeRolePolicy"
- ],
- "NotResource": [
- "arn:aws:iam::*:role/loki/*",
- "arn:aws:iam::*:role/aws-service-role/*"
- ]
- }
- ]
-}
-```
+> **Two equivalent representations.** This policy is shipped in two forms:
+>
+> - [`policies/deny-guardrails.json`](../policies/deny-guardrails.json) — raw IAM policy document with literal
+> placeholders (`ACCOUNT_ID`, `IAM_PATH`, `AGENT_ROLE_NAME`, etc.).
+> Used by the AWS-CLI `Quick Start` flow in the README.
+> - `terraform/main.tf` `aws_iam_role_policy.deny_guardrails` — same
+> policy expressed via `jsonencode()` over a list of statement objects.
+> Used by the `terraform/` module flow.
+>
+> The two **must stay in sync** — enforced by the per-Sid Action-set parity
+> check in `.github/workflows/lint.yml`. The Terraform form is canonical for
+> ARN composition (uses `aws_iam_role.agent.arn` directly, no path footgun)
+> and gates the trail-storage / trail-KMS statements behind input variables
+> with validation. The JSON form is canonical for documentation, review,
+> and copy/paste auditing. When changing one, change both; CI fails the PR
+> otherwise.
+
+The table below is a Sid-level summary; consult the canonical files for the full action lists.
+
+| Sid | Resource scope | Purpose |
+|------|----------------|---------|
+| DenyIdentityManagement | `*` | No new IAM users, access keys, login profiles, MFA devices |
+| DenySelfEscalation | `role/IAM_PATHAGENT_ROLE_NAME` (JSON) / `aws_iam_role.agent.arn` (TF) | Agent cannot mutate its own role (policies, trust, tags, description, boundary) |
+| DenyOrganizationsAndAccount | `*` | No `organizations:*` / `account:*` |
+| DenyRoleManagementOutsideAgentPath | `NotResource: [role/IAM_PATH*, role/aws-service-role/*]` | Role mutation only inside agent path |
+| DenyCreateRoleWithoutBoundary | `role/IAM_PATH*` | New roles must attach the permissions boundary |
+| DenyRemovingBoundary | `role/IAM_PATH*` | Cannot remove boundary from agent-path roles |
+| DenyBoundaryPolicyModification | `policy/IAM_PATHLokiPermissionsBoundary` | Cannot mutate the boundary policy itself |
+| DenyCloudTrailTampering | `*` | Cannot stop/delete/update trails, event-data-stores, channels, selectors, resource policies |
+| DenyAuditServiceTampering | `*` | Cannot disable Config/GuardDuty/SecurityHub recorders, members, filters, finding triage |
+| DenyTrailStorageTampering | trail S3 bucket | Cannot delete/policy-modify/notify-redirect/object-overwrite the trail bucket |
+| DenyTrailKmsTampering | trail KMS CMK | Cannot delete/disable/grant-modify/import-material the trail's CMK |
### Guardrail Explanations
| Rule | Why |
|------|-----|
| DenyIdentityManagement | Agent can't create users, access keys, or login profiles — no new identities |
-| DenySelfEscalation | Agent can't modify its own role or instance profile — no privilege escalation |
+| DenySelfEscalation | Agent can't modify its own role or instance profile — no privilege escalation. The JSON template builds the role ARN as `role/IAM_PATHAGENT_ROLE_NAME` so the deny works whether the agent role lives at the root or under a path — substitute `IAM_PATH` (e.g. `loki/`) and `AGENT_ROLE_NAME` (e.g. `loki-agent-role`) independently. The Terraform module avoids the placeholder entirely by referencing `aws_iam_role.agent.arn`. |
| DenyOrganizationsAndAccount | Agent can't manage the AWS Organization or account settings |
-| DenyRoleManagementOutsideLokiPath | Agent can't touch ANY role outside `/loki/*` — protects admin roles, service roles, etc. |
+| DenyRoleManagementOutsideAgentPath | Agent can't touch ANY role outside the agent path — protects admin roles, service roles, etc. |
---
@@ -348,42 +196,7 @@ resource "aws_iam_policy" "custom" {
## Instance Profile Setup
-```bash
-# 1. Create the agent role (do this as the human admin, NOT the agent)
-aws iam create-role \
- --role-name loki-agent-role \
- --assume-role-policy-document '{
- "Version": "2012-10-17",
- "Statement": [{
- "Effect": "Allow",
- "Principal": {"Service": "ec2.amazonaws.com"},
- "Action": "sts:AssumeRole"
- }]
- }'
-
-# 2. Attach policies
-aws iam attach-role-policy --role-name loki-agent-role \
- --policy-arn arn:aws:iam::aws:policy/PowerUserAccess
-
-aws iam put-role-policy --role-name loki-agent-role \
- --policy-name LokiIAMScoped \
- --policy-document file://loki-iam-scoped.json
-
-aws iam put-role-policy --role-name loki-agent-role \
- --policy-name LokiDenyGuardrails \
- --policy-document file://loki-deny-guardrails.json
-
-# 3. Create instance profile and attach
-aws iam create-instance-profile --instance-profile-name your-agent-profile
-aws iam add-role-to-instance-profile \
- --instance-profile-name your-agent-profile \
- --role-name loki-agent-role
-
-# 4. Associate with EC2 instance
-aws ec2 associate-iam-instance-profile \
- --instance-id i-XXXXXXXXX \
- --iam-instance-profile Name=your-agent-profile
-```
+The authoritative setup flow lives in the [main README's Quick Start](../README.md#quick-start). It includes the substitution helper that resolves placeholders in `policies/*.json` to runnable IAM policy documents under `out/*.json`, then runs `aws iam create-policy` / `create-role` / `put-role-policy` / `create-instance-profile` against the resolved files. Don't duplicate that flow here — single source of truth.
---
@@ -418,7 +231,107 @@ aws iam create-role --role-name test-outside-path \
1. **No privilege escalation** — agent can't modify its own permissions
2. **No lateral movement** — agent can't create users/keys to persist access
3. **Blast radius limited** — agent can only create/modify roles under `/loki/`
-4. **Audit trail** — all IAM actions logged in CloudTrail
+4. **Audit trail integrity** — all IAM actions logged in CloudTrail.
+ The agent is explicitly denied:
+ - CloudTrail tampering: `StopLogging`, `DeleteTrail`, `UpdateTrail`,
+ `PutEventSelectors`, `PutInsightSelectors`, event-data-store mutation
+ - Config / GuardDuty / SecurityHub recorder tampering (delete, stop,
+ overwrite, or disable individual standards/controls)
+ - Trail S3 bucket tampering: delete, policy/ACL change, object
+ deletion, lifecycle/versioning rewrite, governance-retention bypass
+ - Trail KMS key tampering: schedule-deletion, disable, key-policy
+ rewrite, grant create/revoke
+
+ The S3 bucket and KMS key denies are scoped to specific ARNs
+ (placeholders `TRAIL_BUCKET_NAME` and `TRAIL_KMS_KEY_ARN` in the
+ template) so they do not block the agent from working with other
+ buckets/keys.
+
+ **Trade-offs and caveats:**
+
+ - *CloudTrail denies use `Resource: "*"`.* Most CloudTrail actions
+ support trail-ARN scoping, but we deny on `*` so the agent cannot
+ stand up a competing/parallel trail and then disable it. To make
+ that rationale airtight, the deny also covers `cloudtrail:CreateTrail`,
+ `CreateEventDataStore`, and `CreateChannel` — the agent cannot
+ create new audit infrastructure at all. The side effect is that
+ trail provisioning is an admin/Terraform task, not an agent task.
+ Adjust if your operating model differs.
+
+ - *KMS grant denies (`CreateGrant` / `RevokeGrant`) cover the
+ direct path only.* They prevent the agent from issuing or
+ revoking grants on the trail's CMK. They do **not** affect
+ service-linked grants created by AWS services on the agent's
+ behalf (those use `kms:GrantIsForAWSResource`). For the threat
+ model here (agent blinding its own audit) this is sufficient;
+ if you also want to prevent service-mediated grant churn, add
+ a condition `"Bool": {"kms:GrantIsForAWSResource": "false"}`.
+
+ - *Placeholders must be substituted before deployment.* A
+ literal `TRAIL_BUCKET_NAME` or `TRAIL_KMS_KEY_ID` left in the
+ policy is syntactically valid but matches nothing — a silent
+ no-op. If the trail is unencrypted or absent entirely, delete
+ the corresponding statement rather than supplying a fake ARN.
+ A pre-deploy lint (`grep -E 'KMS_REGION|TRAIL_(BUCKET_NAME|KMS_KEY_ID)'
+ policies/*.json`) should return nothing. The KMS resource is
+ split into `KMS_REGION:ACCOUNT_ID:key/TRAIL_KMS_KEY_ID` rather
+ than a single `TRAIL_KMS_KEY_ARN` placeholder so partial
+ substitution still produces an ARN-shaped string — partial
+ fills fail loudly instead of deploying a dead deny.
+
+ - *Day-2 ops on the trail bucket and CMK are blocked for the agent.*
+ `s3:PutBucketPolicy`, `PutEncryptionConfiguration`, `PutBucketVersioning`,
+ `kms:PutKeyPolicy`, `ScheduleKeyDeletion`, `CreateGrant`, etc.
+ are all denied. The trail bucket and KMS key **must be managed
+ outside this agent's Terraform state** — use a separate state
+ file with a separate (admin) role, or treat the audit trail as
+ unmanaged infra. Otherwise routine maintenance (KMS key rotation,
+ bucket policy update for a new principal, lifecycle-rule change)
+ will silently fail with no remediation path until the deny is
+ lifted manually. Recommended layout: a dedicated `audit-trail/`
+ module owned by the platform/security team, run with an admin
+ role; this `loki-permissions` module references its outputs but
+ never writes to the bucket/key.
+
+ - *Config / GuardDuty / SecurityHub initial setup is also blocked
+ for the agent.* `DenyAuditServiceTampering` covers
+ `config:PutConfigurationRecorder` and `config:PutDeliveryChannel`
+ (so the agent cannot overwrite an existing recorder to point at
+ a black-hole bucket). The side effect is that *first-time setup*
+ of these services must also be done outside the agent's Terraform
+ state — same separation-of-duties pattern as the trail bucket/CMK.
+ If the agent attempts to enable Config / GuardDuty / SecurityHub
+ for the first time, the apply fails on these actions; the fix is
+ to bootstrap them via an admin role and have the agent reference
+ the resulting infrastructure read-only.
+
+ - *Cross-partition templates.* The JSON and Terraform templates hardcode
+ `aws` partition (commercial region ARNs). GovCloud (`aws-us-gov`) and
+ China (`aws-cn`) deployments would require manual partition substitution
+ throughout all 4 JSON files + Terraform module. **Partition support is
+ planned as a future enhancement** to thread a `var.aws_partition` parameter
+ and systematically replace all `arn:aws:` with `arn:${aws_partition}:`.
+ This is a separate scope from the current audit-trail deny set; users
+ deploying to non-commercial regions should use this template as a
+ reference and manually update partitions.
+
+ - *Residual gaps (not currently denied, intentional):* the agent
+ can still call `cloudtrail:GetTrail` / `LookupEvents` /
+ `DescribeTrails` for legitimate debugging, and can still create
+ **new** S3 buckets / KMS keys unrelated to the audit trail. The
+ deny statements above are surgically targeted at the audit
+ infrastructure; they do not impose a blanket S3/KMS read-only
+ posture, which would break the agent's day job.
+
+ - *Triage actions denied (intentional, broad).* `DenyAuditServiceTampering`
+ denies `securityhub:BatchUpdateFindings` and `guardduty:CreateFilter`/
+ `UpdateFilter`/`DeleteFilter` with `Resource: "*"`. These actions can
+ legitimately be used for triage (mark findings RESOLVED, suppress noise
+ via filter), but the same actions can also be used to silence findings
+ about the agent's own activity. We deny broadly because triage is a
+ human/SOC task, not an agent task. If your operating model needs the
+ agent to do triage, scope these by `securityhub:ASFFSyntaxPath` /
+ `guardduty:DetectorId` conditions or move them out of the deny set.
5. **Reversible** — admin can delete `/loki/*` roles to revoke all agent-created permissions
6. **Human retains control** — admin role and instance profile are protected by explicit deny
diff --git a/policies/deny-guardrails.json b/policies/deny-guardrails.json
index 6bc841d..616249f 100644
--- a/policies/deny-guardrails.json
+++ b/policies/deny-guardrails.json
@@ -38,10 +38,16 @@
"iam:PutRolePolicy",
"iam:DeleteRolePolicy",
"iam:UpdateAssumeRolePolicy",
- "iam:DeleteRole"
+ "iam:DeleteRole",
+ "iam:TagRole",
+ "iam:UntagRole",
+ "iam:UpdateRole",
+ "iam:UpdateRoleDescription",
+ "iam:PutRolePermissionsBoundary",
+ "iam:DeleteRolePermissionsBoundary"
],
"Resource": [
- "arn:aws:iam::ACCOUNT_ID:role/AGENT_ROLE_NAME"
+ "arn:aws:iam::ACCOUNT_ID:role/IAM_PATHAGENT_ROLE_NAME"
]
},
{
@@ -65,7 +71,11 @@
"iam:DetachRolePolicy",
"iam:UpdateAssumeRolePolicy",
"iam:PutRolePermissionsBoundary",
- "iam:DeleteRolePermissionsBoundary"
+ "iam:DeleteRolePermissionsBoundary",
+ "iam:TagRole",
+ "iam:UntagRole",
+ "iam:UpdateRole",
+ "iam:UpdateRoleDescription"
],
"NotResource": [
"arn:aws:iam::ACCOUNT_ID:role/IAM_PATH*",
@@ -102,6 +112,116 @@
"iam:SetDefaultPolicyVersion"
],
"Resource": "arn:aws:iam::ACCOUNT_ID:policy/IAM_PATHLokiPermissionsBoundary"
+ },
+ {
+ "Sid": "DenyCloudTrailTampering",
+ "Effect": "Deny",
+ "Action": [
+ "cloudtrail:CreateTrail",
+ "cloudtrail:CreateEventDataStore",
+ "cloudtrail:CreateChannel",
+ "cloudtrail:StopLogging",
+ "cloudtrail:DeleteTrail",
+ "cloudtrail:UpdateTrail",
+ "cloudtrail:PutEventSelectors",
+ "cloudtrail:PutInsightSelectors",
+ "cloudtrail:PutResourcePolicy",
+ "cloudtrail:DeleteResourcePolicy",
+ "cloudtrail:DeleteEventDataStore",
+ "cloudtrail:UpdateEventDataStore",
+ "cloudtrail:DeleteChannel",
+ "cloudtrail:UpdateChannel"
+ ],
+ "Resource": "*"
+ },
+ {
+ "Sid": "DenyAuditServiceTampering",
+ "Effect": "Deny",
+ "Action": [
+ "config:DeleteConfigurationRecorder",
+ "config:StopConfigurationRecorder",
+ "config:PutConfigurationRecorder",
+ "config:DeleteDeliveryChannel",
+ "config:PutDeliveryChannel",
+ "config:DeleteConfigRule",
+ "config:DeleteConfigurationAggregator",
+ "config:DeleteOrganizationConfigRule",
+ "config:DeleteRetentionConfiguration",
+ "config:DeleteRemediationConfiguration",
+ "config:DeleteEvaluationResults",
+ "guardduty:DeleteDetector",
+ "guardduty:UpdateDetector",
+ "guardduty:DisassociateFromMasterAccount",
+ "guardduty:StopMonitoringMembers",
+ "guardduty:DeletePublishingDestination",
+ "guardduty:UpdatePublishingDestination",
+ "guardduty:DisassociateMembers",
+ "guardduty:DeleteMembers",
+ "guardduty:UpdateMemberDetectors",
+ "guardduty:CreateFilter",
+ "guardduty:UpdateFilter",
+ "guardduty:DeleteFilter",
+ "securityhub:DisableSecurityHub",
+ "securityhub:DisassociateFromMasterAccount",
+ "securityhub:BatchDisableStandards",
+ "securityhub:UpdateStandardsControl",
+ "securityhub:DeleteInsight",
+ "securityhub:UpdateInsight",
+ "securityhub:BatchUpdateFindings"
+ ],
+ "Resource": "*"
+ },
+ {
+ "Sid": "DenyTrailStorageTampering",
+ "Effect": "Deny",
+ "Action": [
+ "s3:DeleteBucket",
+ "s3:DeleteBucketPolicy",
+ "s3:PutBucketPolicy",
+ "s3:PutBucketAcl",
+ "s3:PutBucketPublicAccessBlock",
+ "s3:PutBucketOwnershipControls",
+ "s3:PutBucketNotification",
+ "s3:PutBucketWebsite",
+ "s3:PutBucketVersioning",
+ "s3:PutBucketLogging",
+ "s3:PutLifecycleConfiguration",
+ "s3:PutReplicationConfiguration",
+ "s3:PutEncryptionConfiguration",
+ "s3:PutBucketObjectLockConfiguration",
+ "s3:DeleteObject",
+ "s3:DeleteObjectVersion",
+ "s3:PutObject",
+ "s3:PutObjectAcl",
+ "s3:PutObjectLegalHold",
+ "s3:PutObjectRetention",
+ "s3:BypassGovernanceRetention"
+ ],
+ "Resource": [
+ "arn:aws:s3:::TRAIL_BUCKET_NAME",
+ "arn:aws:s3:::TRAIL_BUCKET_NAME/*"
+ ]
+ },
+ {
+ "Sid": "DenyTrailKmsTampering",
+ "Effect": "Deny",
+ "Action": [
+ "kms:ScheduleKeyDeletion",
+ "kms:DisableKey",
+ "kms:PutKeyPolicy",
+ "kms:CreateGrant",
+ "kms:RevokeGrant",
+ "kms:CancelKeyDeletion",
+ "kms:UpdateAlias",
+ "kms:DeleteAlias",
+ "kms:PutResourcePolicy",
+ "kms:DeleteResourcePolicy",
+ "kms:ImportKeyMaterial",
+ "kms:DeleteImportedKeyMaterial"
+ ],
+ "Resource": [
+ "arn:aws:kms:KMS_REGION:ACCOUNT_ID:key/TRAIL_KMS_KEY_ID"
+ ]
}
]
}
diff --git a/scripts/check_parity.py b/scripts/check_parity.py
new file mode 100755
index 0000000..0bc1980
--- /dev/null
+++ b/scripts/check_parity.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+"""Parity check between policies/*.json and Terraform-rendered statement lists.
+
+The Terraform module's deny_guardrails / iam_scoped / permissions_boundary
+policies and policies/*.json must encode identical Sid → (Action|NotAction)
+sets. Drift would leave one deployment path under-protected.
+
+Usage:
+
+ # Default: assumes you're in terraform/ with rendered tf-*.json there.
+ python3 ../scripts/check_parity.py
+
+ # Or pass a render directory explicitly:
+ python3 scripts/check_parity.py --render-dir terraform
+
+Exits 0 on parity, 1 on drift.
+
+Note on scope: this verifies Action/NotAction parity per Sid.
+Resource/NotResource/Condition are NOT checked because `terraform console`
+can't resolve resource refs like `aws_iam_role.agent.arn` without an apply
+(returns "known after apply"). Scope correctness is enforced by review +
+the JSON template's literal placeholders being humanly auditable.
+"""
+
+import argparse
+import json
+import sys
+import pathlib
+
+# Repo-relative paths resolved against this script's location, not CWD.
+REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent
+
+# (logical name, JSON source path relative to REPO_ROOT, TF render filename)
+POLICIES = [
+ ("deny-guardrails", "policies/deny-guardrails.json", "tf-deny.json"),
+ ("iam-scoped", "policies/iam-scoped.json", "tf-iam-scoped.json"),
+ ("permissions-boundary", "policies/permissions-boundary.json", "tf-boundary.json"),
+]
+
+
+def actions_of(stmt):
+ """Return (action_set, notaction_set) tuple. Either may be empty.
+ Handles Action/NotAction being either a list or a string."""
+ def to_set(key):
+ if key not in stmt or stmt[key] is None:
+ return frozenset()
+ v = stmt[key]
+ return frozenset(v) if isinstance(v, list) else frozenset([v])
+ return to_set("Action"), to_set("NotAction")
+
+
+def by_sid(stmts):
+ return {s["Sid"]: actions_of(s) for s in stmts}
+
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__.split("\n")[0])
+ parser.add_argument(
+ "--render-dir",
+ default="terraform",
+ help="Directory containing the Terraform-rendered tf-*.json files "
+ "(default: terraform, resolved relative to repo root).",
+ )
+ args = parser.parse_args()
+
+ render_dir = (REPO_ROOT / args.render_dir).resolve()
+
+ any_drift = False
+ for name, json_rel, tf_filename in POLICIES:
+ json_path = REPO_ROOT / json_rel
+ tf_path = render_dir / tf_filename
+
+ if not tf_path.exists():
+ print(f"[{name}] MISSING render file: {tf_path}", file=sys.stderr)
+ print(f" Run `terraform console` to produce it before this check.", file=sys.stderr)
+ sys.exit(2)
+
+ j = json.loads(json_path.read_text())
+ tf = json.loads(tf_path.read_text())
+ js = by_sid(j["Statement"])
+ ts = by_sid(tf)
+
+ miss_tf = set(js) - set(ts)
+ miss_json = set(ts) - set(js)
+ if miss_tf or miss_json:
+ any_drift = True
+ print(f"[{name}] STATEMENT DRIFT:")
+ if miss_tf: print(f" Sids in JSON only: {sorted(miss_tf)}")
+ if miss_json: print(f" Sids in TF only: {sorted(miss_json)}")
+
+ for sid in sorted(set(js) & set(ts)):
+ ja, jna = js[sid]
+ ta, tna = ts[sid]
+ if ja != ta:
+ any_drift = True
+ print(f"[{name}] ACTION DRIFT in Sid='{sid}':")
+ if ja - ta: print(f" JSON only: {sorted(ja - ta)}")
+ if ta - ja: print(f" TF only: {sorted(ta - ja)}")
+ if jna != tna:
+ any_drift = True
+ print(f"[{name}] NOTACTION DRIFT in Sid='{sid}':")
+ if jna - tna: print(f" JSON only: {sorted(jna - tna)}")
+ if tna - jna: print(f" TF only: {sorted(tna - jna)}")
+
+ total_a = sum(len(a) for a, _ in js.values())
+ total_na = sum(len(na) for _, na in js.values())
+ extras = f", {total_na} NotActions" if total_na else ""
+ print(f"[{name}] {len(js)} statements, {total_a} actions{extras}")
+
+ if any_drift:
+ sys.exit(1)
+ print("all policies parity OK")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/terraform/examples/README.md b/terraform/examples/README.md
new file mode 100644
index 0000000..30f2359
--- /dev/null
+++ b/terraform/examples/README.md
@@ -0,0 +1,27 @@
+# Terraform Examples
+
+This directory contains **standalone, non-module** Terraform snippets that
+demonstrate how to author IAM resources from a *consuming* project (a
+project that runs *under* the agent role and creates roles for its own
+Lambda/CodeBuild/etc.).
+
+These files are **not** part of the `loki-permissions` module. They are
+reference material only. The `terraform/` directory is the actual module;
+this `examples/` subdirectory is kept in a subdirectory because Terraform
+would treat sibling `.tf` files as part of the same module otherwise
+(causing `Duplicate variable declaration` errors when both define
+`variable "account_id"`).
+
+## Files
+
+- `downstream-consumer.tf` — shows what an agent-spawned Lambda /
+ CodeBuild / CodePipeline role looks like with the required `path` and
+ `permissions_boundary` attributes set. Copy/adapt into your project.
+
+## Usage
+
+```bash
+# In your project, NOT in this repo:
+cp terraform/examples/downstream-consumer.tf my-project/iam.tf
+# Then edit variables and `terraform apply` from my-project/.
+```
diff --git a/terraform/example.tf b/terraform/examples/downstream-consumer.tf
similarity index 76%
rename from terraform/example.tf
rename to terraform/examples/downstream-consumer.tf
index 5c7f806..25f2067 100644
--- a/terraform/example.tf
+++ b/terraform/examples/downstream-consumer.tf
@@ -2,6 +2,18 @@
#
# Add this to your project's Terraform to create IAM roles
# that comply with the agent's scoped permissions.
+#
+# ⚠️ IMPORTANT: This example HARDCODES the defaults from the loki-permissions
+# module:
+# - path = "/loki/" (matches module var.iam_path default)
+# - boundary policy name "LokiPermissionsBoundary" (matches module var.boundary_policy_name default)
+#
+# If your loki-permissions module deployment customizes EITHER of these vars,
+# you MUST update the literals below to match. Otherwise:
+# - path mismatch → DenyRoleManagementOutsideAgentPath blocks role creation
+# - boundary mismatch → DenyCreateRoleWithoutBoundary blocks role creation
+# Both fail at apply time with cryptic IAM errors. Parameterize via
+# variables if you expect to change them per-environment.
variable "account_id" {
type = string
diff --git a/terraform/main.tf b/terraform/main.tf
index 224c2e6..b24ba02 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -1,6 +1,24 @@
locals {
- boundary_arn = "arn:aws:iam::${var.account_id}:policy${var.iam_path}${var.boundary_policy_name}"
- role_path = var.iam_path
+
+ # ---------------------------------------------------------------------------
+ # PARTITION LOCK-IN (deferred; revisit if GovCloud / China support is needed)
+ # ---------------------------------------------------------------------------
+ # The AWS partition `aws` is hardcoded in 3 places:
+ # 1. terraform/main.tf — ARN composition (`arn:aws:iam:...`, etc.)
+ # 2. policies/*.json — inline ARN literals
+ # 3. terraform/variables.tf — `trail_kms_key_arn` validation regex
+ # User decision (2026-05-13): no GovCloud/China support in this template.
+ # If that changes, introduce `var.aws_partition` (default "aws") and thread
+ # it through all 3 sites — don't fix one in isolation.
+ # ---------------------------------------------------------------------------
+
+ # Fail-closed safety check: if neither trail var is set, the user
+ # must explicitly acknowledge they have no CloudTrail to protect.
+ # Otherwise the deny statements silently disappear while the agent
+ # keeps PowerUser-level S3/KMS access to whatever audit trail does
+ # exist in the account. See trail_protection_acknowledged in
+ # variables.tf.
+ trail_protection_omitted = var.trail_bucket_name == null && var.trail_kms_key_arn == null
}
# --- Permissions Boundary ---
@@ -13,32 +31,8 @@ resource "aws_iam_policy" "permissions_boundary" {
description = "Permissions boundary for AI agent-created roles. Blocks IAM/Orgs/Account."
policy = jsonencode({
- Version = "2012-10-17"
- Statement = [
- {
- Sid = "AllowEverythingExceptDangerous"
- Effect = "Allow"
- NotAction = ["iam:*", "organizations:*", "account:*"]
- Resource = "*"
- },
- {
- Sid = "AllowPassRoleOnlyAgentRoles"
- Effect = "Allow"
- Action = "iam:PassRole"
- Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*"
- },
- {
- Sid = "AllowReadOnlyIAM"
- Effect = "Allow"
- Action = [
- "iam:GetRole",
- "iam:GetRolePolicy",
- "iam:ListRolePolicies",
- "iam:ListAttachedRolePolicies"
- ]
- Resource = "*"
- }
- ]
+ Version = "2012-10-17"
+ Statement = local.permissions_boundary_statements
})
tags = var.tags
@@ -48,6 +42,7 @@ resource "aws_iam_policy" "permissions_boundary" {
resource "aws_iam_role" "agent" {
name = var.agent_role_name
+ path = var.iam_path
assume_role_policy = jsonencode({
Version = "2012-10-17"
@@ -73,72 +68,8 @@ resource "aws_iam_role_policy" "iam_scoped" {
role = aws_iam_role.agent.name
policy = jsonencode({
- Version = "2012-10-17"
- Statement = [
- {
- Sid = "AllowRoleManagementUnderAgentPath"
- Effect = "Allow"
- Action = [
- "iam:CreateRole", "iam:DeleteRole", "iam:GetRole",
- "iam:GetRolePolicy", "iam:ListRolePolicies",
- "iam:ListAttachedRolePolicies", "iam:ListInstanceProfilesForRole",
- "iam:TagRole", "iam:UntagRole", "iam:ListRoleTags",
- "iam:UpdateRole", "iam:UpdateRoleDescription",
- "iam:UpdateAssumeRolePolicy",
- "iam:PutRolePolicy", "iam:DeleteRolePolicy",
- "iam:AttachRolePolicy", "iam:DetachRolePolicy",
- "iam:PutRolePermissionsBoundary"
- ]
- Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*"
- },
- {
- Sid = "AllowPolicyManagementUnderAgentPath"
- Effect = "Allow"
- Action = [
- "iam:CreatePolicy", "iam:DeletePolicy",
- "iam:GetPolicy", "iam:GetPolicyVersion",
- "iam:ListPolicyVersions", "iam:CreatePolicyVersion",
- "iam:DeletePolicyVersion", "iam:TagPolicy", "iam:UntagPolicy"
- ]
- Resource = "arn:aws:iam::${var.account_id}:policy${var.iam_path}*"
- },
- {
- Sid = "AllowInstanceProfileManagement"
- Effect = "Allow"
- Action = [
- "iam:CreateInstanceProfile", "iam:DeleteInstanceProfile",
- "iam:GetInstanceProfile", "iam:AddRoleToInstanceProfile",
- "iam:RemoveRoleFromInstanceProfile", "iam:TagInstanceProfile"
- ]
- Resource = "arn:aws:iam::${var.account_id}:instance-profile${var.iam_path}*"
- },
- {
- Sid = "AllowPassRoleOnlyAgentRoles"
- Effect = "Allow"
- Action = "iam:PassRole"
- Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*"
- },
- {
- Sid = "AllowServiceLinkedRoles"
- Effect = "Allow"
- Action = [
- "iam:CreateServiceLinkedRole",
- "iam:DeleteServiceLinkedRole",
- "iam:GetServiceLinkedRoleDeletionStatus"
- ]
- Resource = "arn:aws:iam::${var.account_id}:role/aws-service-role/*"
- },
- {
- Sid = "AllowIAMReadOnly"
- Effect = "Allow"
- Action = [
- "iam:ListRoles", "iam:ListPolicies", "iam:ListInstanceProfiles",
- "iam:GetAccountSummary", "iam:SimulatePrincipalPolicy",
- "iam:ListOpenIDConnectProviders", "iam:ListSAMLProviders"
- ]
- Resource = "*"
- }
- ]
+ Version = "2012-10-17"
+ Statement = local.iam_scoped_statements
})
}
@@ -147,94 +78,296 @@ resource "aws_iam_role_policy" "deny_guardrails" {
name = "LokiDenyGuardrails"
role = aws_iam_role.agent.name
+ lifecycle {
+ precondition {
+ condition = !local.trail_protection_omitted || var.trail_protection_acknowledged
+ error_message = "BOTH trail_bucket_name AND trail_kms_key_arn are null — DenyTrailStorageTampering and DenyTrailKmsTampering will NOT be deployed. ** This disables an entire defense layer. ** If your account has a CloudTrail, the agent retains PowerUser-level S3/KMS access to its bucket and CMK; audit-trail tampering will not be blocked. To proceed anyway (e.g. no trail exists, or trail protection is enforced elsewhere), set trail_protection_acknowledged = true — you are responsible for the resulting risk."
+ }
+ }
+
policy = jsonencode({
Version = "2012-10-17"
- Statement = [
- {
- Sid = "DenyIdentityManagement"
- Effect = "Deny"
- Action = [
- "iam:CreateUser", "iam:DeleteUser",
- "iam:CreateGroup", "iam:DeleteGroup",
- "iam:CreateAccessKey", "iam:DeleteAccessKey",
- "iam:CreateLoginProfile", "iam:DeleteLoginProfile", "iam:UpdateLoginProfile",
- "iam:AddUserToGroup", "iam:RemoveUserFromGroup",
- "iam:AttachUserPolicy", "iam:DetachUserPolicy",
- "iam:PutUserPolicy", "iam:DeleteUserPolicy",
- "iam:AttachGroupPolicy", "iam:DetachGroupPolicy",
- "iam:PutGroupPolicy", "iam:DeleteGroupPolicy",
- "iam:DeactivateMFADevice", "iam:DeleteVirtualMFADevice"
- ]
- Resource = "*"
- },
- {
- Sid = "DenySelfEscalation"
- Effect = "Deny"
- Action = [
- "iam:AttachRolePolicy", "iam:DetachRolePolicy",
- "iam:PutRolePolicy", "iam:DeleteRolePolicy",
- "iam:UpdateAssumeRolePolicy", "iam:DeleteRole"
- ]
- Resource = [aws_iam_role.agent.arn]
- },
- {
- Sid = "DenyOrganizationsAndAccount"
- Effect = "Deny"
- Action = ["organizations:*", "account:*"]
- Resource = "*"
- },
- {
- Sid = "DenyRoleManagementOutsideAgentPath"
- Effect = "Deny"
- Action = [
- "iam:CreateRole", "iam:DeleteRole",
- "iam:PutRolePolicy", "iam:DeleteRolePolicy",
- "iam:AttachRolePolicy", "iam:DetachRolePolicy",
- "iam:UpdateAssumeRolePolicy",
- "iam:PutRolePermissionsBoundary", "iam:DeleteRolePermissionsBoundary"
- ]
- NotResource = [
- "arn:aws:iam::${var.account_id}:role${var.iam_path}*",
- "arn:aws:iam::${var.account_id}:role/aws-service-role/*"
- ]
- },
- {
- Sid = "DenyCreateRoleWithoutBoundary"
- Effect = "Deny"
- Action = "iam:CreateRole"
- Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*"
- Condition = {
- StringNotEquals = {
- "iam:PermissionsBoundary" = aws_iam_policy.permissions_boundary.arn
- }
+ Statement = concat(
+ local.deny_guardrails_base_statements,
+ var.trail_bucket_name != null ? [local.deny_trail_storage_statement] : [],
+ var.trail_kms_key_arn != null ? [local.deny_trail_kms_statement] : []
+ )
+ })
+}
+
+locals {
+ permissions_boundary_statements = [
+ {
+ Sid = "AllowEverythingExceptDangerous"
+ Effect = "Allow"
+ NotAction = ["iam:*", "organizations:*", "account:*"]
+ Resource = "*"
+ },
+ {
+ Sid = "AllowPassRoleOnlyAgentRoles"
+ Effect = "Allow"
+ Action = "iam:PassRole"
+ Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*"
+ },
+ {
+ Sid = "AllowReadOnlyIAM"
+ Effect = "Allow"
+ Action = [
+ "iam:GetRole",
+ "iam:GetRolePolicy",
+ "iam:ListRolePolicies",
+ "iam:ListAttachedRolePolicies"
+ ]
+ Resource = "*"
+ }
+ ]
+
+ iam_scoped_statements = [
+ {
+ Sid = "AllowRoleManagementUnderAgentPath"
+ Effect = "Allow"
+ Action = [
+ "iam:CreateRole", "iam:DeleteRole", "iam:GetRole",
+ "iam:GetRolePolicy", "iam:ListRolePolicies",
+ "iam:ListAttachedRolePolicies", "iam:ListInstanceProfilesForRole",
+ "iam:TagRole", "iam:UntagRole", "iam:ListRoleTags",
+ "iam:UpdateRole", "iam:UpdateRoleDescription",
+ "iam:UpdateAssumeRolePolicy",
+ "iam:PutRolePolicy", "iam:DeleteRolePolicy",
+ "iam:AttachRolePolicy", "iam:DetachRolePolicy",
+ "iam:PutRolePermissionsBoundary"
+ ]
+ Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*"
+ },
+ {
+ Sid = "AllowPolicyManagementUnderAgentPath"
+ Effect = "Allow"
+ Action = [
+ "iam:CreatePolicy", "iam:DeletePolicy",
+ "iam:GetPolicy", "iam:GetPolicyVersion",
+ "iam:ListPolicyVersions", "iam:CreatePolicyVersion",
+ "iam:DeletePolicyVersion", "iam:TagPolicy", "iam:UntagPolicy"
+ ]
+ Resource = "arn:aws:iam::${var.account_id}:policy${var.iam_path}*"
+ },
+ {
+ Sid = "AllowInstanceProfileManagementUnderAgentPath"
+ Effect = "Allow"
+ Action = [
+ "iam:CreateInstanceProfile", "iam:DeleteInstanceProfile",
+ "iam:GetInstanceProfile", "iam:AddRoleToInstanceProfile",
+ "iam:RemoveRoleFromInstanceProfile", "iam:TagInstanceProfile"
+ ]
+ Resource = "arn:aws:iam::${var.account_id}:instance-profile${var.iam_path}*"
+ },
+ {
+ Sid = "AllowPassRoleOnlyAgentRoles"
+ Effect = "Allow"
+ Action = "iam:PassRole"
+ Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*"
+ },
+ {
+ Sid = "AllowServiceLinkedRoles"
+ Effect = "Allow"
+ Action = [
+ "iam:CreateServiceLinkedRole",
+ "iam:DeleteServiceLinkedRole",
+ "iam:GetServiceLinkedRoleDeletionStatus"
+ ]
+ Resource = "arn:aws:iam::${var.account_id}:role/aws-service-role/*"
+ },
+ {
+ Sid = "AllowIAMReadOnly"
+ Effect = "Allow"
+ Action = [
+ "iam:ListRoles", "iam:ListPolicies", "iam:ListInstanceProfiles",
+ "iam:GetAccountSummary", "iam:SimulatePrincipalPolicy",
+ "iam:ListOpenIDConnectProviders", "iam:ListSAMLProviders"
+ ]
+ Resource = "*"
+ }
+ ]
+
+ deny_guardrails_base_statements = [
+ {
+ Sid = "DenyIdentityManagement"
+ Effect = "Deny"
+ Action = [
+ "iam:CreateUser", "iam:DeleteUser",
+ "iam:CreateGroup", "iam:DeleteGroup",
+ "iam:CreateAccessKey", "iam:DeleteAccessKey",
+ "iam:CreateLoginProfile", "iam:DeleteLoginProfile", "iam:UpdateLoginProfile",
+ "iam:AddUserToGroup", "iam:RemoveUserFromGroup",
+ "iam:AttachUserPolicy", "iam:DetachUserPolicy",
+ "iam:PutUserPolicy", "iam:DeleteUserPolicy",
+ "iam:AttachGroupPolicy", "iam:DetachGroupPolicy",
+ "iam:PutGroupPolicy", "iam:DeleteGroupPolicy",
+ "iam:DeactivateMFADevice", "iam:DeleteVirtualMFADevice"
+ ]
+ Resource = "*"
+ },
+ {
+ Sid = "DenySelfEscalation"
+ Effect = "Deny"
+ Action = [
+ "iam:AttachRolePolicy", "iam:DetachRolePolicy",
+ "iam:PutRolePolicy", "iam:DeleteRolePolicy",
+ "iam:UpdateAssumeRolePolicy", "iam:DeleteRole",
+ "iam:TagRole", "iam:UntagRole",
+ "iam:UpdateRole", "iam:UpdateRoleDescription",
+ "iam:PutRolePermissionsBoundary", "iam:DeleteRolePermissionsBoundary"
+ ]
+ Resource = [aws_iam_role.agent.arn]
+ },
+ {
+ Sid = "DenyOrganizationsAndAccount"
+ Effect = "Deny"
+ Action = ["organizations:*", "account:*"]
+ Resource = "*"
+ },
+ {
+ Sid = "DenyRoleManagementOutsideAgentPath"
+ Effect = "Deny"
+ Action = [
+ "iam:CreateRole", "iam:DeleteRole",
+ "iam:PutRolePolicy", "iam:DeleteRolePolicy",
+ "iam:AttachRolePolicy", "iam:DetachRolePolicy",
+ "iam:UpdateAssumeRolePolicy",
+ "iam:PutRolePermissionsBoundary", "iam:DeleteRolePermissionsBoundary",
+ "iam:TagRole", "iam:UntagRole",
+ "iam:UpdateRole", "iam:UpdateRoleDescription"
+ ]
+ NotResource = [
+ "arn:aws:iam::${var.account_id}:role${var.iam_path}*",
+ "arn:aws:iam::${var.account_id}:role/aws-service-role/*"
+ ]
+ },
+ {
+ Sid = "DenyCreateRoleWithoutBoundary"
+ Effect = "Deny"
+ Action = "iam:CreateRole"
+ Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*"
+ Condition = {
+ StringNotEquals = {
+ "iam:PermissionsBoundary" = aws_iam_policy.permissions_boundary.arn
}
- },
- {
- Sid = "DenyRemovingBoundary"
- Effect = "Deny"
- Action = [
- "iam:DeleteRolePermissionsBoundary",
- "iam:PutRolePermissionsBoundary"
- ]
- Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*"
- },
- {
- Sid = "DenyBoundaryPolicyModification"
- Effect = "Deny"
- Action = [
- "iam:DeletePolicy", "iam:CreatePolicyVersion",
- "iam:DeletePolicyVersion", "iam:SetDefaultPolicyVersion"
- ]
- Resource = aws_iam_policy.permissions_boundary.arn
}
+ },
+ {
+ Sid = "DenyRemovingBoundary"
+ Effect = "Deny"
+ Action = [
+ "iam:DeleteRolePermissionsBoundary",
+ "iam:PutRolePermissionsBoundary"
+ ]
+ Resource = "arn:aws:iam::${var.account_id}:role${var.iam_path}*"
+ },
+ {
+ Sid = "DenyBoundaryPolicyModification"
+ Effect = "Deny"
+ Action = [
+ "iam:DeletePolicy", "iam:CreatePolicyVersion",
+ "iam:DeletePolicyVersion", "iam:SetDefaultPolicyVersion"
+ ]
+ Resource = aws_iam_policy.permissions_boundary.arn
+ },
+ {
+ Sid = "DenyCloudTrailTampering"
+ Effect = "Deny"
+ Action = [
+ "cloudtrail:CreateTrail", "cloudtrail:CreateEventDataStore",
+ "cloudtrail:CreateChannel",
+ "cloudtrail:StopLogging", "cloudtrail:DeleteTrail", "cloudtrail:UpdateTrail",
+ "cloudtrail:PutEventSelectors", "cloudtrail:PutInsightSelectors",
+ "cloudtrail:PutResourcePolicy", "cloudtrail:DeleteResourcePolicy",
+ "cloudtrail:DeleteEventDataStore", "cloudtrail:UpdateEventDataStore",
+ "cloudtrail:DeleteChannel", "cloudtrail:UpdateChannel"
+ ]
+ Resource = "*"
+ },
+ {
+ Sid = "DenyAuditServiceTampering"
+ Effect = "Deny"
+ Action = [
+ "config:DeleteConfigurationRecorder", "config:StopConfigurationRecorder",
+ "config:PutConfigurationRecorder",
+ "config:DeleteDeliveryChannel", "config:PutDeliveryChannel",
+ "config:DeleteConfigRule",
+ "config:DeleteConfigurationAggregator", "config:DeleteOrganizationConfigRule",
+ "config:DeleteRetentionConfiguration", "config:DeleteRemediationConfiguration",
+ "config:DeleteEvaluationResults",
+ "guardduty:DeleteDetector", "guardduty:UpdateDetector",
+ "guardduty:DisassociateFromMasterAccount", "guardduty:StopMonitoringMembers",
+ "guardduty:DeletePublishingDestination", "guardduty:UpdatePublishingDestination",
+ "guardduty:DisassociateMembers", "guardduty:DeleteMembers",
+ "guardduty:UpdateMemberDetectors",
+ "guardduty:CreateFilter", "guardduty:UpdateFilter", "guardduty:DeleteFilter",
+ "securityhub:DisableSecurityHub", "securityhub:DisassociateFromMasterAccount",
+ "securityhub:BatchDisableStandards", "securityhub:UpdateStandardsControl",
+ "securityhub:DeleteInsight", "securityhub:UpdateInsight",
+ "securityhub:BatchUpdateFindings"
+ ]
+ Resource = "*"
+ }
+ ]
+
+ deny_trail_storage_statement = {
+ Sid = "DenyTrailStorageTampering"
+ Effect = "Deny"
+ Action = [
+ "s3:DeleteBucket", "s3:DeleteBucketPolicy", "s3:PutBucketPolicy",
+ "s3:PutBucketAcl", "s3:PutBucketPublicAccessBlock",
+ "s3:PutBucketOwnershipControls",
+ "s3:PutBucketNotification", "s3:PutBucketWebsite",
+ "s3:PutBucketVersioning", "s3:PutBucketLogging",
+ "s3:PutLifecycleConfiguration", "s3:PutReplicationConfiguration",
+ "s3:PutEncryptionConfiguration", "s3:PutBucketObjectLockConfiguration",
+ "s3:DeleteObject", "s3:DeleteObjectVersion",
+ "s3:PutObject",
+ "s3:PutObjectAcl", "s3:PutObjectLegalHold",
+ "s3:PutObjectRetention", "s3:BypassGovernanceRetention"
]
- })
+ Resource = [
+ # coalesce() shields against null when the deny statement is
+ # gated out by var.trail_bucket_name == null in concat() above.
+ # Terraform evaluates this local eagerly, so a null var would
+ # crash the whole plan even though the value is never used.
+ # The sentinel "INVALID_UNUSED" uses uppercase + underscore (both
+ # forbidden in real S3 bucket names) so the deploy would not
+ # match any actual bucket. NOTE: IAM policy *syntax* validation
+ # would still accept the resulting ARN — the safety here comes
+ # from the concat() gate, not from the sentinel itself. The
+ # sentinel is defense-in-depth: if the gate is ever dropped by
+ # mistake, the resulting deny is a no-op rather than a deny
+ # against an attacker-controlled bucket name.
+ "arn:aws:s3:::${coalesce(var.trail_bucket_name, "INVALID_UNUSED")}",
+ "arn:aws:s3:::${coalesce(var.trail_bucket_name, "INVALID_UNUSED")}/*"
+ ]
+ }
+
+ deny_trail_kms_statement = {
+ Sid = "DenyTrailKmsTampering"
+ Effect = "Deny"
+ Action = [
+ "kms:ScheduleKeyDeletion", "kms:DisableKey", "kms:PutKeyPolicy",
+ "kms:CreateGrant", "kms:RevokeGrant", "kms:CancelKeyDeletion",
+ "kms:UpdateAlias", "kms:DeleteAlias",
+ "kms:PutResourcePolicy", "kms:DeleteResourcePolicy",
+ "kms:ImportKeyMaterial", "kms:DeleteImportedKeyMaterial"
+ ]
+ # See coalesce() comment on deny_trail_storage_statement.Resource
+ # above for why the sentinel is needed. The sentinel is ARN-shaped
+ # but uses an invalid region ("invalid") and all-zero account/key,
+ # so it deploys cleanly if ever ungated but matches no real key.
+ Resource = [coalesce(var.trail_kms_key_arn, "arn:aws:kms:invalid:000000000000:key/00000000-0000-0000-0000-000000000000")]
+ }
}
# --- Instance Profile ---
resource "aws_iam_instance_profile" "agent" {
name = "${var.agent_role_name}-profile"
+ path = var.iam_path
role = aws_iam_role.agent.name
tags = var.tags
}
diff --git a/terraform/variables.tf b/terraform/variables.tf
index a437127..320861d 100644
--- a/terraform/variables.tf
+++ b/terraform/variables.tf
@@ -7,6 +7,11 @@ variable "agent_role_name" {
description = "Name for the agent's IAM role"
type = string
default = "loki-agent-role"
+
+ validation {
+ condition = can(regex("^[\\w+=,.@-]{1,64}$", var.agent_role_name))
+ error_message = "agent_role_name must match IAM naming rules: letters/digits and any of +=,.@-_, 1–64 chars. No slashes, spaces, or colons (would corrupt ARN composition)."
+ }
}
variable "iam_path" {
@@ -15,8 +20,8 @@ variable "iam_path" {
default = "/loki/"
validation {
- condition = can(regex("^/.*/$", var.iam_path))
- error_message = "iam_path must start and end with /"
+ condition = can(regex("^/([\\w+=,.@-]+/)+$", var.iam_path))
+ error_message = "iam_path must be a valid IAM path (e.g. /loki/ or /loki/sub/) starting and ending with /, with at least one path segment. Empty string and bare root '/' are both rejected because they would widen ARN-scoped allows like 'role/${var.iam_path}*' to 'role/*' (every role in the account)."
}
}
@@ -24,6 +29,11 @@ variable "boundary_policy_name" {
description = "Name of the permissions boundary policy"
type = string
default = "LokiPermissionsBoundary"
+
+ validation {
+ condition = can(regex("^[\\w+=,.@-]{1,128}$", var.boundary_policy_name))
+ error_message = "boundary_policy_name must match IAM policy naming rules: letters/digits and any of +=,.@-_, 1–128 chars. No slashes, spaces, or colons (would corrupt ARN composition)."
+ }
}
variable "tags" {
@@ -31,3 +41,72 @@ variable "tags" {
type = map(string)
default = {}
}
+
+variable "trail_bucket_name" {
+ description = <<-EOT
+ Name of the S3 bucket holding CloudTrail logs. When set, adds
+ DenyTrailStorageTampering scoped to this bucket. Leave null to skip
+ (e.g. if no CloudTrail exists, or the bucket is managed by a
+ separate Terraform state with its own protections). Must be
+ managed *outside* this agent's Terraform state — the agent role
+ will be denied PutBucketPolicy/PutEncryptionConfiguration on it.
+
+ Pass the BARE bucket name (e.g. "my-org-cloudtrail-logs"), NOT a
+ full S3 ARN. Pasting an ARN produces a malformed deny resource
+ (arn:aws:s3:::arn:aws:s3:::foo) that silently matches nothing.
+ EOT
+ type = string
+ default = null
+
+ validation {
+ # S3 bucket-name rules (subset that catches the common mistakes):
+ # - 3–63 chars
+ # - lowercase letters, digits, dot, hyphen only
+ # - must start and end with letter or digit
+ # - no consecutive dots (S3 rejects "a..b")
+ # Notably rejects: ARNs (contain colons), uppercase, underscores.
+ # Does NOT validate IP-format names (192.168.x.x) or xn-- prefix —
+ # AWS rejects those server-side at apply time.
+ condition = (
+ var.trail_bucket_name == null ||
+ (
+ can(regex("^[a-z0-9][a-z0-9.-]{1,61}[a-z0-9]$", var.trail_bucket_name)) &&
+ !can(regex("\\.\\.", var.trail_bucket_name))
+ )
+ )
+ error_message = "trail_bucket_name must be a bare S3 bucket name (3–63 chars, lowercase alphanumerics + dots/hyphens, no colons), not a full ARN. Got: ${var.trail_bucket_name == null ? "" : (var.trail_bucket_name == "" ? "" : var.trail_bucket_name)}"
+ }
+}
+
+variable "trail_kms_key_arn" {
+ description = <<-EOT
+ Full ARN of the KMS CMK encrypting CloudTrail. When set, adds
+ DenyTrailKmsTampering scoped to this key. Leave null if the trail
+ is unencrypted or absent. Must be a full key ARN
+ (arn:aws:kms:REGION:ACCOUNT_ID:key/KEY_ID), not a key UUID or alias
+ — a partial value yields a silent no-op deny.
+ EOT
+ type = string
+ default = null
+
+ validation {
+ condition = var.trail_kms_key_arn == null || can(regex("^arn:aws:kms:[a-z0-9-]+:[0-9]{12}:key/(mrk-[a-f0-9]{32}|[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$", var.trail_kms_key_arn))
+ error_message = "trail_kms_key_arn must be a full KMS key ARN for AWS commercial region (arn:aws:kms:REGION:ACCOUNT_ID:key/KEY_ID or .../key/mrk-... for multi-region keys), or null. Note: partition support (aws-us-gov, aws-cn) is planned for a future release; currently this policy only supports AWS commercial regions."
+ }
+}
+
+variable "trail_protection_acknowledged" {
+ description = <<-EOT
+ Set to true when you have intentionally left trail_bucket_name AND
+ trail_kms_key_arn null because the account has no CloudTrail (or the
+ trail is managed in a way where these denies are inappropriate).
+
+ This is a fail-closed safety check: if both vars are null and this
+ flag is false, plan/apply errors out. The intent is to prevent the
+ common case of "forgot to set the trail vars" silently deploying
+ without audit-trail tampering protection while the agent retains
+ PowerUser-level S3/KMS access to the (existing) trail bucket/CMK.
+ EOT
+ type = bool
+ default = false
+}