From 42006899122a6d4240dea5ab65e4690ae728ce70 Mon Sep 17 00:00:00 2001 From: Andy Jordan <2226434+andyleejordan@users.noreply.github.com> Date: Tue, 23 Jun 2026 15:17:33 -0700 Subject: [PATCH] Collect a hang dump on the wedging Windows PowerShell CI leg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `windows-latest` `CI Tests` leg still intermittently rides its `timeout-minutes` cap to a cancelled run even after #2318's skips. Pulling the test-result artifacts from the last two hung runs shows they stop right after `TestE2EPwsh` and never emit a `TestPS51` `.trx`, so the wedge is now in the net462 / Windows PowerShell 5.1 unit leg — not the E2E server path #2318 skipped. The earlier assumption in #2323 that `TestPS51` was unaffected no longer holds, and `dotnetTestArgs` had no `--blame-hang`, so a stuck unit host produced no dump and no test name; it just burned the hour. This doesn't skip anything — it instruments CI so the next hang is actionable: - Add `--blame-hang --blame-hang-timeout 10m --blame-hang-dump-type full` to the CI `dotnet test` invocations (gated to `GITHUB_ACTIONS`, so local runs are byte-identical). Any single test that wedges past 10 minutes is dumped and its host tree terminated, failing the leg fast and naming the test. - Install ProcDump (best-effort) on the Windows leg. VSTest's built-in hang dumper only handles .NET Core hosts, so dumping the net462 host needs ProcDump, which isn't on the runner image. A download failure only warns. - Upload `**/*.dmp` and `**/*_Sequence.xml` alongside the `.trx`. Caveat: if the wedge is in host startup/discovery rather than a running test, the per-test timer may not fire — but `_Sequence.xml` still shows how far discovery got. Once a hung run names the net462 test, we can give it the same targeted treatment as #2307 and #2314 instead of skipping the whole leg. Drafted by Copilot (Claude Opus 4.8). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/ci-test.yml | 26 +++++++++++++++++++++++++- PowerShellEditorServices.build.ps1 | 5 +++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml index bac41ddc9..4b69ce6bf 100644 --- a/.github/workflows/ci-test.yml +++ b/.github/workflows/ci-test.yml @@ -52,6 +52,27 @@ jobs: shell: pwsh run: ./pwsh/tools/install-powershell.ps1 -Preview -Destination ./preview + - name: Install ProcDump for Windows PowerShell hang dumps + if: runner.os == 'Windows' + shell: pwsh + run: | + # VSTest's built-in hang dumper only handles .NET Core test hosts, so + # capturing a dump of a wedged .NET Framework (net462 / Windows + # PowerShell 5.1) test host requires ProcDump, which isn't on the + # runner image. Install is best-effort: a failure must not fail CI + # (the net8 legs dump without it, and `--blame-hang` still terminates + # the host and writes a sequence file naming the hung test). See #2323. + try { + $zip = Join-Path $env:RUNNER_TEMP 'Procdump.zip' + $dir = Join-Path $env:RUNNER_TEMP 'procdump' + Invoke-WebRequest -Uri 'https://download.sysinternals.com/files/Procdump.zip' -OutFile $zip + Expand-Archive -Path $zip -DestinationPath $dir -Force + "PROCDUMP_PATH=$dir" | Out-File -FilePath $env:GITHUB_ENV -Append + Write-Host "ProcDump installed to $dir" + } catch { + Write-Warning "ProcDump install failed; net462 hang dumps will be unavailable: $_" + } + - name: Build and test shell: pwsh run: Invoke-Build -Configuration Release TestFull @@ -79,4 +100,7 @@ jobs: if: always() with: name: PowerShellEditorServices-test-results-${{ matrix.os }} - path: '**/*.trx' + path: | + **/*.trx + **/*.dmp + **/*_Sequence.xml diff --git a/PowerShellEditorServices.build.ps1 b/PowerShellEditorServices.build.ps1 index 62cb53369..9cfbf36f1 100644 --- a/PowerShellEditorServices.build.ps1 +++ b/PowerShellEditorServices.build.ps1 @@ -32,6 +32,11 @@ $script:dotnetBuildArgs = @( $script:dotnetTestArgs = @("test") + $script:dotnetBuildArgs + $TestArgs + @( if ($TestFilter) { "--filter", $TestFilter } + # In CI, collect a hang dump and fail fast if any single test wedges instead + # of riding the job's `timeout-minutes` cap blind. The Windows PowerShell 5.1 + # (net462) unit leg intermittently hangs under the 20260614 runner image; the + # dump names the offending test and captures its stacks. See #2323. + if ($env:GITHUB_ACTIONS) { "--blame-hang", "--blame-hang-timeout", "10m", "--blame-hang-dump-type", "full" } "--framework" )