Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 44 additions & 44 deletions .kiro/specs/phase-8-deployment/tasks.md

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions apps/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,16 @@
"@stackfast/rules-engine": "workspace:*",
"@stackfast/schemas": "workspace:*",
"@stackfast/shared": "workspace:*",
"@upstash/ratelimit": "^2.0.8",
"@upstash/redis": "^1.38.0",
"better-auth": "^1.1.13",
"drizzle-orm": "^0.45.2",
"hono": "^4.6.1",
"zod": "^3.24.2"
},
"devDependencies": {
"@types/node": "^22.10.2",
"fast-check": "^4.8.0",
"tsx": "^4.19.2",
"typescript": "^5.6.3"
}
Expand Down
210 changes: 209 additions & 1 deletion apps/api/src/app.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
import { describe, expect, it } from "vitest";
import { afterEach, describe, expect, it } from "vitest";
import app from "./app.js";
import { BUCKETS } from "./rate-limit/buckets.js";
import {
__resetBackendForTests,
} from "./rate-limit/index.js";
import type {
RateLimitBackend,
RateLimitCheckArgs,
RateLimitDecision,
} from "./rate-limit/types.js";

describe("api", () => {
it("returns health status", async () => {
Expand Down Expand Up @@ -458,4 +467,203 @@ describe("api", () => {
});
expect(response.headers.get("X-Request-ID")).toBe(customId);
});

// ─── A6 rate-limit contract tests ──────────────────────────────
//
// These four cases are named verbatim in design.md § 8 "Testing strategy"
// and pin down the behavior the factory rewrite in
// `apps/api/src/rate-limit/index.ts` has to preserve. Every case is
// self-isolating: each one installs its own backend via
// `__resetBackendForTests` and restores the default in `afterEach` so a
// failing case cannot bleed counters into the next.

describe("rate-limit contract", () => {
afterEach(() => {
__resetBackendForTests(null);
});

/**
* Build a `RateLimitBackend` that records every `check()` call it
* receives into the returned `calls` array. Always returns an allow
* decision with full remaining quota so the admin/exempt-route checks
* can focus on "was this called at all?".
*/
function createSpyBackend(): {
backend: RateLimitBackend;
calls: RateLimitCheckArgs[];
} {
const calls: RateLimitCheckArgs[] = [];
const backend: RateLimitBackend = {
name: "memory",
async check(args: RateLimitCheckArgs): Promise<RateLimitDecision> {
calls.push(args);
return {
allowed: true,
remaining: BUCKETS[args.bucket].limit,
limit: BUCKETS[args.bucket].limit,
resetAtEpochMs: Date.now() + BUCKETS[args.bucket].windowMs,
};
},
};
return { backend, calls };
}

/**
* Build a `RateLimitBackend` that delegates to a shared Map. Used by
* the "bucket count survives backend swap" case to prove that two
* wrapper instances sharing the same underlying state keep accounting
* consistent across a simulated restart. Semantics mirror the real
* memory backend: count is incremented on every call, `allowed` is
* `count <= limit`, and the window resets lazily at `resetAtEpochMs`.
*/
function createSharedStateBackend(
store: Map<string, { count: number; resetAtEpochMs: number }>,
): RateLimitBackend {
return {
name: "upstash",
async check({ bucket, clientId }): Promise<RateLimitDecision> {
const key = `${bucket}:${clientId}`;
const config = BUCKETS[bucket];
const now = Date.now();
let entry = store.get(key);
if (!entry || now >= entry.resetAtEpochMs) {
entry = { count: 1, resetAtEpochMs: now + config.windowMs };
store.set(key, entry);
} else {
entry.count += 1;
}
return {
allowed: entry.count <= config.limit,
remaining: Math.max(0, config.limit - entry.count),
limit: config.limit,
resetAtEpochMs: entry.resetAtEpochMs,
};
},
};
}

it("admin 401 before rate-limit counter increments (R8.1)", async () => {
const { backend, calls } = createSpyBackend();
__resetBackendForTests(backend);

const response = await app.request("/admin/tools/import", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ tools: [{ id: "whatever" }] }),
});

expect(response.status).toBe(401);
// The admin middleware rejects before any downstream middleware runs.
// `/admin/*` also isn't under `/api/v1/*`, so the rate-limit
// middleware cannot match it — the backend is never consulted.
expect(calls).toHaveLength(0);
});

it("Retry-After only on 429 (R4.7, R4.8)", async () => {
const store = new Map<string, { count: number; resetAtEpochMs: number }>();
__resetBackendForTests(createSharedStateBackend(store));

const clientId = "retry-after-test";
const readLimit = BUCKETS.read.limit;

// Requests 1..limit should all be 200 and MUST NOT include a
// Retry-After header. We collect the Retry-After values to assert
// they are all null in one shot.
const retryAfterDuringAllowed: (string | null)[] = [];
for (let i = 0; i < readLimit; i += 1) {
const ok = await app.request("/api/v1/tools/search", {
headers: { "x-forwarded-for": clientId },
});
expect(ok.status).toBe(200);
retryAfterDuringAllowed.push(ok.headers.get("Retry-After"));
}
expect(retryAfterDuringAllowed.every((v) => v === null)).toBe(true);

// Request limit+1 trips the rate limit: must return 429 and a
// positive-integer Retry-After header.
const blocked = await app.request("/api/v1/tools/search", {
headers: { "x-forwarded-for": clientId },
});
expect(blocked.status).toBe(429);
const retryAfter = blocked.headers.get("Retry-After");
expect(retryAfter).not.toBeNull();
const seconds = Number(retryAfter);
expect(Number.isInteger(seconds)).toBe(true);
expect(seconds).toBeGreaterThan(0);
});

it("exempt routes never counted (R4.9)", async () => {
const { backend, calls } = createSpyBackend();
__resetBackendForTests(backend);

for (let i = 0; i < 5; i += 1) {
const health = await app.request("/health");
expect(health.status).toBe(200);
}
for (let i = 0; i < 5; i += 1) {
const openapi = await app.request("/openapi.json");
expect(openapi.status).toBe(200);
}

expect(calls).toHaveLength(0);
});

it("bucket count survives backend swap (R6.4)", async () => {
const store = new Map<string, { count: number; resetAtEpochMs: number }>();
__resetBackendForTests(createSharedStateBackend(store));

const clientId = "backend-swap-test";
const generationLimit = BUCKETS.generation.limit;
const preSwap = generationLimit - 10; // 20 of the 30-quota window

// Fire `preSwap` generation-bucket requests. These exercise both
// rate-limit middlewares (generation + /api/v1/*), so we just look
// at the generation counter in the shared store.
for (let i = 0; i < preSwap; i += 1) {
const response = await app.request("/api/v1/blueprints", {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-forwarded-for": clientId,
},
// Empty body intentionally: we want the rate-limit middleware
// to count every request, but the blueprint handler to short-
// circuit quickly with a 400 so the test stays fast.
body: "{}",
});
expect(response.status).not.toBe(429);
}
expect(
store.get(`generation:${clientId}`)?.count,
).toBe(preSwap);

// Swap the backend wrapper but keep the SAME shared store — this
// simulates an API service restart where the underlying Redis
// keyspace survives. If accounting resets, the next 11 requests
// would all be 200 and the invariant breaks.
__resetBackendForTests(createSharedStateBackend(store));

const postSwapStatuses: number[] = [];
for (let i = 0; i < 11; i += 1) {
const response = await app.request("/api/v1/blueprints", {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-forwarded-for": clientId,
},
body: "{}",
});
postSwapStatuses.push(response.status);
}

// 20 pre-swap + first 10 post-swap = 30 (still allowed). Request 31
// (the 11th post-swap) must be 429 because the shared store tracks
// the cumulative count across the restart.
expect(postSwapStatuses.slice(0, 10).every((s) => s !== 429)).toBe(true);
expect(postSwapStatuses[10]).toBe(429);
expect(
store.get(`generation:${clientId}`)?.count,
).toBe(generationLimit + 1);
});
});
});
38 changes: 7 additions & 31 deletions apps/api/src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import type { MiddlewareHandler } from "hono/types";
import { z } from "zod";
import { openApiDocument } from "./openapi.js";
import { getAuth, requireSession, optionalSession } from "./middleware/auth.js";
import { createRateLimitMiddleware } from "./rate-limit/index.js";

type Bindings = {
ADMIN_API_KEY?: string;
Expand Down Expand Up @@ -52,11 +53,6 @@ const EnrichToolSchema = z.object({
force: z.boolean().optional(),
});

const GENERATION_LIMIT = 30;
const READ_LIMIT = 100;
const WINDOW_MS = 60_000;
export const rateLimitBuckets = new Map<string, { count: number; resetAt: number }>();

const catalogLoader = new CatalogLoader();
const configuredCorsOrigin = process.env.CORS_ORIGIN ?? process.env.WEB_ORIGIN ?? "http://localhost:5173";

Expand Down Expand Up @@ -94,9 +90,12 @@ app.use("*", async (c, next) => {
});

// --- Rate limiting ---
app.use("/api/v1/blueprints", rateLimit("generation", GENERATION_LIMIT));
app.use("/api/v1/scaffolds", rateLimit("generation", GENERATION_LIMIT));
app.use("/api/v1/*", rateLimit("read", READ_LIMIT));
// `/health` and `/openapi.json` are registered as top-level routes without any
// `/api/v1/*` prefix, so the rate-limit middleware below never matches them
// (R4.9: exempt routes never counted).
app.use("/api/v1/blueprints", createRateLimitMiddleware("generation"));
app.use("/api/v1/scaffolds", createRateLimitMiddleware("generation"));
app.use("/api/v1/*", createRateLimitMiddleware("read"));

// --- Auth middleware ---
app.use("/api/v1/blueprints", requireSession());
Expand Down Expand Up @@ -346,29 +345,6 @@ function resolveTools(toolIds: string[]): Tool[] {
return tools as Tool[];
}

function rateLimit(bucket: string, limit: number): MiddlewareHandler<{ Bindings: Bindings; Variables: Variables }> {
return async (c, next) => {
const clientId = c.req.header("x-forwarded-for") ?? c.req.header("cf-connecting-ip") ?? "local";
const key = `${bucket}:${clientId}`;
const now = Date.now();
const current = rateLimitBuckets.get(key);

if (!current || current.resetAt <= now) {
rateLimitBuckets.set(key, { count: 1, resetAt: now + WINDOW_MS });
await next();
return;
}

if (current.count >= limit) {
c.header("Retry-After", String(Math.ceil((current.resetAt - now) / 1000)));
return c.json({ error: "Rate limit exceeded", requestId: c.get("requestId") }, 429);
}

current.count += 1;
await next();
};
}

function requireAdminApiKey(): MiddlewareHandler<{ Bindings: Bindings; Variables: Variables }> {
return async (c, next) => {
const configuredKey = c.env?.ADMIN_API_KEY ?? process.env.ADMIN_API_KEY;
Expand Down
19 changes: 1 addition & 18 deletions apps/api/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { serve } from "@hono/node-server";
import app, { rateLimitBuckets } from "./app.js";
import app from "./app.js";

const port = Number(process.env.PORT ?? 3000);

Expand All @@ -10,21 +10,4 @@ serve({

console.log(`Stackfast API listening on http://localhost:${port}`);

// --- Rate limit stale key cleanup (in-memory, single-process) ---
// TODO Phase 8: Replace with Upstash/Redis-backed rate limiting for multi-instance support
const RATE_LIMIT_CLEANUP_INTERVAL_MS = 5 * 60_000; // every 5 minutes
setInterval(() => {
const now = Date.now();
let cleaned = 0;
for (const [key, bucket] of rateLimitBuckets) {
if (bucket.resetAt <= now) {
rateLimitBuckets.delete(key);
cleaned++;
}
}
if (cleaned > 0) {
console.log(`[rate-limit] Cleaned ${cleaned} stale bucket(s)`);
}
}, RATE_LIMIT_CLEANUP_INTERVAL_MS);

export default app;
24 changes: 24 additions & 0 deletions apps/api/src/rate-limit/buckets.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { describe, expect, it } from "vitest";
import { BUCKETS, BUCKET_NAMES, type BucketName } from "./buckets.js";

describe("rate-limit buckets", () => {
it("configures the generation bucket at 30 requests per 60s (R4.2)", () => {
expect(BUCKETS.generation).toEqual({ limit: 30, windowMs: 60_000 });
});

it("configures the read bucket at 100 requests per 60s (R4.3)", () => {
expect(BUCKETS.read).toEqual({ limit: 100, windowMs: 60_000 });
});

it("exposes exactly the two bucket names used by the middleware", () => {
expect(BUCKET_NAMES).toEqual(["generation", "read"]);
const keys = Object.keys(BUCKETS) as BucketName[];
expect(new Set(keys)).toEqual(new Set(["generation", "read"]));
});

it("uses a 60 second window for every bucket", () => {
for (const name of BUCKET_NAMES) {
expect(BUCKETS[name].windowMs).toBe(60_000);
}
});
});
27 changes: 27 additions & 0 deletions apps/api/src/rate-limit/buckets.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* Rate-limit bucket configuration.
*
* Two buckets per R4.2 / R4.3:
* - `generation` — 30 requests per 60s, applied to POST /api/v1/blueprints
* and POST /api/v1/scaffolds.
* - `read` — 100 requests per 60s, applied to the remaining /api/v1/*
* routes.
*
* Kept as a pure module so both backends (memory and Upstash, landing in A2
* and A4) and the fail-open wrapper (A3) can import it without pulling in
* Hono, Redis, or any other runtime dependency.
*/

export type BucketName = "generation" | "read";

export interface BucketConfig {
readonly limit: number;
readonly windowMs: number;
}

export const BUCKETS: Readonly<Record<BucketName, BucketConfig>> = {
generation: { limit: 30, windowMs: 60_000 },
read: { limit: 100, windowMs: 60_000 },
} as const;

export const BUCKET_NAMES: readonly BucketName[] = ["generation", "read"] as const;
Loading
Loading