vercel · pranaygp · Feb 6, 2026 · Feb 6, 2026 · Copilot · Feb 6, 2026
diff --git a/.changeset/fix-flaky-e2e-tests.md b/.changeset/fix-flaky-e2e-tests.md
@@ -0,0 +1,5 @@
+---
+"@workflow/core": patch
+---
+
+Fix flaky E2E tests: widen promiseAny timing gaps, reduce stream chunk delay, add health checks and increase dev test timeouts
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -328,7 +328,21 @@ jobs:
         run: |
           cd workbench/${{ matrix.app.name }} && pnpm dev &
           echo "starting tests in 10 seconds" && sleep 10
-          pnpm vitest run packages/core/e2e/dev.test.ts; sleep 10
+          pnpm vitest run packages/core/e2e/dev.test.ts
+          echo "Waiting for server to stabilize..."
+          health_ok=false
+          for i in $(seq 1 60); do
+            if curl -sf --max-time 5 "$DEPLOYMENT_URL/.well-known/workflow/v1/manifest.json" > /dev/null 2>&1; then
+              echo "Server healthy after ${i}s"
+              health_ok=true
+              break
+            fi
+            sleep 1
+          done
-          for i in $(seq 1 60); do
-            if curl -sf "$DEPLOYMENT_URL/.well-known/workflow/v1/manifest.json" > /dev/null 2>&1; then
-              echo "Server healthy after ${i}s"
-              break
-            fi
-            sleep 1
-          done
+          health_ok=0
+          for i in $(seq 1 60); do
+            if curl -sf --max-time 5 "$DEPLOYMENT_URL/.well-known/workflow/v1/manifest.json" > /dev/null 2>&1; then
+              echo "Server healthy after ${i}s"
+              health_ok=1
+              break
+            fi
+            sleep 1
+          done
+          if [ "$health_ok" -ne 1 ]; then
+            echo "Server failed to become healthy after 60 seconds"
+            exit 1
+          fi
-          for i in $(seq 1 60); do
-            if curl -sf "$DEPLOYMENT_URL/.well-known/workflow/v1/manifest.json" > /dev/null 2>&1; then
-              echo "Server healthy after ${i}s"
-              break
-            fi
-            sleep 1
-          done
+          health_ok=0
+          for i in $(seq 1 60); do
+            if curl -sf --max-time 5 "$DEPLOYMENT_URL/.well-known/workflow/v1/manifest.json" > /dev/null 2>&1; then
+              echo "Server healthy after ${i}s"
+              health_ok=1
+              break
+            fi
+            sleep 1
+          done
+          if [ "$health_ok" -ne 1 ]; then
+            echo "Server failed to become healthy after 60 seconds"
+            exit 1
+          fi
+          if [ "$health_ok" != "true" ]; then
+            echo "Server did not become healthy within 60s"
+            exit 1
+          fi
           pnpm run test:e2e --reporter=default --reporter=json --outputFile=e2e-local-dev-${{ matrix.app.name }}-${{ matrix.app.canary && 'canary' || 'stable' }}.json
         env:
           NODE_OPTIONS: "--enable-source-maps"
@@ -395,7 +409,20 @@ jobs:
       - name: Run E2E Tests
         run: |
           cd workbench/${{ matrix.app.name }} && pnpm start &
-          echo "starting tests in 10 seconds" && sleep 10
+          echo "Waiting for server to be ready..."
+          health_ok=false
+          for i in $(seq 1 60); do
+            if curl -sf --max-time 5 "$DEPLOYMENT_URL/.well-known/workflow/v1/manifest.json" > /dev/null 2>&1; then
+              echo "Server healthy after ${i}s"
+              health_ok=true
+              break
+            fi
+            sleep 1
+          done
+          if [ "$health_ok" != "true" ]; then
+            echo "Server did not become healthy within 60s"
+            exit 1
+          fi
           pnpm run test:e2e --reporter=default --reporter=json --outputFile=e2e-local-prod-${{ matrix.app.name }}-${{ matrix.app.canary && 'canary' || 'stable' }}.json
         env:
           NODE_OPTIONS: "--enable-source-maps"
@@ -481,7 +508,20 @@ jobs:
       - name: Run E2E Tests
         run: |
           cd workbench/${{ matrix.app.name }} && pnpm start &
-          echo "starting tests in 10 seconds" && sleep 10
+          echo "Waiting for server to be ready..."
+          health_ok=false
+          for i in $(seq 1 60); do
+            if curl -sf --max-time 5 "$DEPLOYMENT_URL/.well-known/workflow/v1/manifest.json" > /dev/null 2>&1; then
+              echo "Server healthy after ${i}s"
+              health_ok=true
+              break
+            fi
+            sleep 1
+          done
+          if [ "$health_ok" != "true" ]; then
+            echo "Server did not become healthy within 60s"
+            exit 1
+          fi
           pnpm run test:e2e --reporter=default --reporter=json --outputFile=e2e-local-postgres-${{ matrix.app.name }}-${{ matrix.app.canary && 'canary' || 'stable' }}.json
         env:
           NODE_OPTIONS: "--enable-source-maps"

diff --git a/packages/core/e2e/dev.test.ts b/packages/core/e2e/dev.test.ts
@@ -57,7 +57,7 @@
       restoreFiles.length = 0;
     });
 
-    test('should rebuild on workflow change', { timeout: 30_000 }, async () => {
+    test('should rebuild on workflow change', { timeout: 60_000 }, async () => {
       const workflowFile = path.join(appPath, workflowsDir, testWorkflowFile);
 
       const content = await fs.readFile(workflowFile, 'utf8');
@@ -85,7 +85,7 @@
       }
     });
 
-    test('should rebuild on step change', { timeout: 30_000 }, async () => {
+    test('should rebuild on step change', { timeout: 60_000 }, async () => {
       const stepFile = path.join(appPath, workflowsDir, testWorkflowFile);
 
       const content = await fs.readFile(stepFile, 'utf8');
@@ -115,7 +115,7 @@
 
     test(
       'should rebuild on adding workflow file',
-      { timeout: 30_000 },
+      { timeout: 60_000 },
       async () => {
         const workflowFile = path.join(
           appPath,

diff --git a/packages/core/e2e/e2e.test.ts b/packages/core/e2e/e2e.test.ts
@@ -1,7 +1,7 @@
 import { withResolvers } from '@workflow/utils';
 import fs from 'fs';
 import path from 'path';
-import { afterAll, assert, describe, expect, test } from 'vitest';
+import { afterAll, assert, beforeAll, describe, expect, test } from 'vitest';
 import { dehydrateWorkflowArguments } from '../src/serialization';
 import {
   cliHealthJson,
@@ -136,6 +136,35 @@ async function getWorkflowReturnValue(runId: string) {
 // NOTE: Temporarily disabling concurrent tests to avoid flakiness.
 // TODO: Re-enable concurrent tests after conf when we have more time to investigate.
 describe('e2e', () => {
+  // Wait for the deployment to be healthy before running tests
+  beforeAll(async () => {
+    const manifestUrl = new URL(
+      '/.well-known/workflow/v1/manifest.json',
+      deploymentUrl
+    );
+    for (let i = 1; i <= 60; i++) {
+      try {
+        const controller = new AbortController();
+        const timeout = setTimeout(() => controller.abort(), 5_000);
+        const res = await fetch(manifestUrl, {
+          headers: getProtectionBypassHeaders(),
+          signal: controller.signal,
+        });
+        clearTimeout(timeout);
+        if (res.ok) {
+          console.log(`Server healthy after ${i}s`);
+          return;
+        }
+      } catch {
+        // Server not ready yet
+      }
+      await new Promise((resolve) => setTimeout(resolve, 1_000));
+    }
+    throw new Error(
+      `Server at ${deploymentUrl} did not become healthy within 60s`
-    for (let i = 1; i <= 60; i++) {
-      try {
-        const res = await fetch(deploymentUrl);
-        if (res.ok) {
-          console.log(`Server healthy after ${i}s`);
-          return;
-        }
-      } catch {
-        // Server not ready yet
-      }
-      await new Promise((resolve) => setTimeout(resolve, 1_000));
-    }
-    throw new Error(
-      `Server at ${deploymentUrl} did not become healthy within 60s`
+    const healthUrl = new URL(
+      '/.well-known/workflow/v1/manifest.json',
+      deploymentUrl
+    ).toString();
+    const headers = getProtectionBypassHeaders?.();
+
+    for (let i = 1; i <= 60; i++) {
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => controller.abort(), 5_000);
+      try {
+        const res = await fetch(healthUrl, {
+          headers,
+          signal: controller.signal,
+        });
+        if (res.ok) {
+          console.log(`Server healthy after ${i}s`);
+          return;
+        }
+      } catch {
+        // Server not ready yet or request timed out
+      } finally {
+        clearTimeout(timeoutId);
+      }
+      await new Promise((resolve) => setTimeout(resolve, 1_000));
+    }
+    throw new Error(
+      `Server at ${healthUrl} did not become healthy within 60s`
-    for (let i = 1; i <= 60; i++) {
-      try {
-        const res = await fetch(deploymentUrl);
-        if (res.ok) {
-          console.log(`Server healthy after ${i}s`);
-          return;
-        }
-      } catch {
-        // Server not ready yet
-      }
-      await new Promise((resolve) => setTimeout(resolve, 1_000));
-    }
-    throw new Error(
-      `Server at ${deploymentUrl} did not become healthy within 60s`
+    const healthUrl = new URL(
+      '/.well-known/workflow/v1/manifest.json',
+      deploymentUrl
+    ).toString();
+    const headers = getProtectionBypassHeaders?.();
+
+    for (let i = 1; i <= 60; i++) {
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => controller.abort(), 5_000);
+      try {
+        const res = await fetch(healthUrl, {
+          headers,
+          signal: controller.signal,
+        });
+        if (res.ok) {
+          console.log(`Server healthy after ${i}s`);
+          return;
+        }
+      } catch {
+        // Server not ready yet or request timed out
+      } finally {
+        clearTimeout(timeoutId);
+      }
+      await new Promise((resolve) => setTimeout(resolve, 1_000));
+    }
+    throw new Error(
+      `Server at ${healthUrl} did not become healthy within 60s`
+    );
+  }, 60_000);
+
   // Write E2E metadata file with runIds for observability links
   afterAll(() => {
     writeE2EMetadata();

diff --git a/workbench/example/workflows/99_e2e.ts b/workbench/example/workflows/99_e2e.ts
@@ -78,8 +78,8 @@ export async function promiseAnyWorkflow() {
   'use workflow';
   const winner = await Promise.any([
     stepThatFails(),
-    specificDelay(1000, 'b'), // "b" should always win
-    specificDelay(3000, 'c'),
+    specificDelay(100, 'b'), // "b" should always win
+    specificDelay(10000, 'c'),
   ]);
   return winner;
 }
@@ -96,7 +96,7 @@ async function genReadableStream() {
       for (let i = 0; i < 10; i++) {
         console.log('enqueueing', i);
         controller.enqueue(encoder.encode(`${i}\n`));
-        await new Promise((resolve) => setTimeout(resolve, 1000));
+        await new Promise((resolve) => setTimeout(resolve, 500));
       }
       console.log('closing controller');
       controller.close();