From a230cf8207805b93da7342fe3492c200e8b05141 Mon Sep 17 00:00:00 2001 From: Shayne Boyer <7681382+spboyer@users.noreply.github.com> Date: Wed, 27 May 2026 17:45:51 -0400 Subject: [PATCH 1/4] Close guest process stdin to avoid TTY hang on macOS ProcessGuestLauncher and NpmRunner spawn child processes (npm/pnpm/yarn/bun install, plus the guest AppHost itself) with stdout/stderr redirected but left stdin inheriting the parent CLI's TTY. On macOS/Linux, if any child (e.g. an npm postinstall script, husky, or a package-manager permission prompt) reads from stdin, it blocks indefinitely waiting on the terminal, making 'aspire new' for the TypeScript starter (and 'aspire init/add/ restore') appear to stall with no output and ~0% CPU. Redirect stdin and close it immediately after Process.Start() so any child read surfaces as EOF instead of blocking. We never write to the guest process or npm stdin, so closing is safe. dotnet-based invocations already redirect stdin via ProcessExecutionFactory. Add a regression test in GuestRuntimeTests that launches a shell script which reads stdin and asserts it observes EOF and exits within 10s. Fixes #16791 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/Aspire.Cli/Npm/NpmRunner.cs | 16 +++++++ .../Projects/ProcessGuestLauncher.cs | 18 +++++++ .../Projects/GuestRuntimeTests.cs | 47 +++++++++++++++++++ 3 files changed, 81 insertions(+) diff --git a/src/Aspire.Cli/Npm/NpmRunner.cs b/src/Aspire.Cli/Npm/NpmRunner.cs index 96b47a06b94..3094eb6a212 100644 --- a/src/Aspire.Cli/Npm/NpmRunner.cs +++ b/src/Aspire.Cli/Npm/NpmRunner.cs @@ -276,6 +276,11 @@ internal static ProcessStartInfo CreateNpmProcessStartInfo(string npmPath, strin { var startInfo = new ProcessStartInfo { + // Redirect stdin so the child npm process (and any lifecycle scripts it invokes) + // does not inherit the CLI's TTY. The caller closes stdin immediately after Start() + // so any read surfaces as EOF instead of hanging waiting on the terminal. NpmRunner + // is intended to be fully non-interactive. See https://github.com/microsoft/aspire/issues/16791. + RedirectStandardInput = true, RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, @@ -357,6 +362,17 @@ internal static bool TryExtractLastVersion(string npmOutput, [NotNullWhen(true)] using var process = new Process { StartInfo = startInfo }; using var activity = profilingTelemetry.StartNpmCommand(npmPath, args, workingDirectory); process.Start(); + // Close stdin so any npm lifecycle script that tries to read terminal input + // sees EOF instead of blocking on the inherited TTY. See ProcessGuestLauncher + // and https://github.com/microsoft/aspire/issues/16791. + try + { + process.StandardInput.Close(); + } + catch (IOException) + { + // The child may have already closed its stdin; ignore. + } activity.SetProcessId(process.Id); var outputTask = process.StandardOutput.ReadToEndAsync(cancellationToken); diff --git a/src/Aspire.Cli/Projects/ProcessGuestLauncher.cs b/src/Aspire.Cli/Projects/ProcessGuestLauncher.cs index fdf137100f7..75500ebb8f0 100644 --- a/src/Aspire.Cli/Projects/ProcessGuestLauncher.cs +++ b/src/Aspire.Cli/Projects/ProcessGuestLauncher.cs @@ -57,6 +57,13 @@ public ProcessGuestLauncher(string language, ILogger logger, FileLoggerProvider? { FileName = resolvedCommandPath, WorkingDirectory = workingDirectory.FullName, + // Redirect stdin so the child does not inherit the CLI's TTY. Without this, on macOS/Linux + // any child (e.g. `npm install` postinstall scripts, husky, package-manager permission + // prompts) that reads from stdin will block forever waiting on the terminal, making + // `aspire new`/`init`/`add`/`restore` appear to stall with no output. We close stdin + // immediately after Start() below so a reader sees EOF instead of hanging. + // See https://github.com/microsoft/aspire/issues/16791. + RedirectStandardInput = true, RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, @@ -123,6 +130,17 @@ public ProcessGuestLauncher(string language, ILogger logger, FileLoggerProvider? AddEvent(activity, ProfilingTelemetry.Events.GuestProcessStart); process.Start(); + // Close the redirected stdin pipe immediately so any read attempt in the child surfaces + // as EOF rather than blocking on an empty pipe. We never write to the guest process + // stdin, so this is safe. + try + { + process.StandardInput.Close(); + } + catch (IOException) + { + // The child may have already closed its stdin; ignore. + } activity?.SetTag(TelemetryConstants.Tags.ProcessPid, process.Id); AddEvent(activity, ProfilingTelemetry.Events.GuestProcessStarted, TelemetryConstants.Tags.ProcessPid, process.Id); if (afterLaunchAsync is not null) diff --git a/tests/Aspire.Cli.Tests/Projects/GuestRuntimeTests.cs b/tests/Aspire.Cli.Tests/Projects/GuestRuntimeTests.cs index 4b8a5607d88..55848ddb7ab 100644 --- a/tests/Aspire.Cli.Tests/Projects/GuestRuntimeTests.cs +++ b/tests/Aspire.Cli.Tests/Projects/GuestRuntimeTests.cs @@ -705,6 +705,53 @@ public async Task ProcessGuestLauncher_AnnotatesAmbientGuestProfilingActivity() Assert.Contains(activity.Events, @event => @event.Name == ProfilingTelemetry.Events.GuestProcessExited); } + [Fact] + public async Task ProcessGuestLauncher_ClosesChildStdinSoReadsObserveEof() + { + // Regression coverage for https://github.com/microsoft/aspire/issues/16791. + // Before this fix, ProcessGuestLauncher did not redirect/close stdin, so a child + // process (e.g. `npm install` postinstall scripts on macOS) inherited the parent + // CLI's TTY and any stdin read blocked forever - making `aspire new` for the + // TypeScript starter appear to stall. + var launcher = new ProcessGuestLauncher( + "test", + _loggerFactory.CreateLogger()); + + string command; + string[] args; + if (OperatingSystem.IsWindows()) + { + // `set /p` reads a line from stdin. With redirected+closed stdin it sees EOF and + // exits immediately. With an inherited or open-empty stdin it would block. + command = "cmd.exe"; + args = ["/c", "set /p line=(), + cts.Token); + + stopwatch.Stop(); + + Assert.False(cts.IsCancellationRequested, + $"Child process did not exit on its own within 10s - stdin may not have been closed. Elapsed: {stopwatch.Elapsed}."); + Assert.Equal(0, exitCode); + var lines = output?.GetLines().Select(l => l.Line).ToArray() ?? []; + Assert.Contains(lines, l => l.Contains("eof", StringComparison.OrdinalIgnoreCase)); + } + [Fact] public async Task ProcessGuestLauncher_KillsProcessAndReturnsOnCancellation() { From 23af241a4d7cbfb999414c5ffac6f00c5cbf7b46 Mon Sep 17 00:00:00 2001 From: Shayne Boyer <7681382+spboyer@users.noreply.github.com> Date: Wed, 27 May 2026 17:57:01 -0400 Subject: [PATCH 2/4] Also redirect+close stdin for AppHost server processes Extend the TTY-hang fix to the two AppHost server launch paths used by BuildAndGenerateSdkAsync during 'aspire new'/'init'/'add'/'restore': - DotNetBasedAppHostServerProject.Run (dev/source-based AppHost server) - PrebuiltAppHostServer (shipped AppHost server) Both previously redirected stdout/stderr but left stdin inheriting the parent CLI's TTY. The CLI communicates with the server over a Unix socket (REMOTE_APP_HOST_SOCKET_PATH), not stdin, so closing the redirected stdin pipe immediately after Process.Start() is safe and ensures any stdin read in the server (or a library it loads) surfaces as EOF instead of blocking. Combined with the earlier ProcessGuestLauncher / NpmRunner changes, this covers every child process spawned during the TypeScript starter scaffolding flow that previously inherited the parent TTY. Refs #16791 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Projects/DotNetBasedAppHostServerProject.cs | 13 +++++++++++++ src/Aspire.Cli/Projects/PrebuiltAppHostServer.cs | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/Aspire.Cli/Projects/DotNetBasedAppHostServerProject.cs b/src/Aspire.Cli/Projects/DotNetBasedAppHostServerProject.cs index 1d431a973a6..f1c95bd3af4 100644 --- a/src/Aspire.Cli/Projects/DotNetBasedAppHostServerProject.cs +++ b/src/Aspire.Cli/Projects/DotNetBasedAppHostServerProject.cs @@ -518,10 +518,23 @@ public async Task PrepareAsync( _logger.LogDebug("Enabling debug logging for AppHostServer"); } + startInfo.RedirectStandardInput = true; startInfo.RedirectStandardOutput = true; startInfo.RedirectStandardError = true; var process = Process.Start(startInfo)!; + // Close the redirected stdin pipe immediately so the AppHost server process — and any + // child/library it loads — observes EOF rather than blocking on the parent CLI's TTY + // if it ever reads from stdin. The CLI communicates with the server over a Unix socket + // (REMOTE_APP_HOST_SOCKET_PATH), not stdin. See https://github.com/microsoft/aspire/issues/16791. + try + { + process.StandardInput.Close(); + } + catch (IOException) + { + // The child may have already closed its stdin; ignore. + } var outputCollector = new OutputCollector(); process.OutputDataReceived += (sender, e) => diff --git a/src/Aspire.Cli/Projects/PrebuiltAppHostServer.cs b/src/Aspire.Cli/Projects/PrebuiltAppHostServer.cs index d87fc185867..81b0c99591e 100644 --- a/src/Aspire.Cli/Projects/PrebuiltAppHostServer.cs +++ b/src/Aspire.Cli/Projects/PrebuiltAppHostServer.cs @@ -855,6 +855,18 @@ private static string GetRestoreVersion(string packageName, string version, bool var startInfo = CreateStartInfo(hostPid, environmentVariables, additionalArgs, debug); var process = Process.Start(startInfo)!; + // Close the redirected stdin pipe immediately so the AppHost server process — and any + // child/library it loads — observes EOF rather than blocking on the parent CLI's TTY + // if it ever reads from stdin. The CLI communicates with the server over a Unix socket + // (REMOTE_APP_HOST_SOCKET_PATH), not stdin. See https://github.com/microsoft/aspire/issues/16791. + try + { + process.StandardInput.Close(); + } + catch (IOException) + { + // The child may have already closed its stdin; ignore. + } var outputCollector = new OutputCollector(); process.OutputDataReceived += (_, e) => @@ -980,6 +992,7 @@ internal ProcessStartInfo CreateStartInfo( startInfo.Environment[KnownConfigNames.AspireLogLevel] = "Debug"; } + startInfo.RedirectStandardInput = true; startInfo.RedirectStandardOutput = true; startInfo.RedirectStandardError = true; From 9c38ba53fc13a861cf0d7c6d4d7dcb0dad520d48 Mon Sep 17 00:00:00 2001 From: Shayne Boyer <7681382+spboyer@users.noreply.github.com> Date: Thu, 28 May 2026 12:38:07 -0400 Subject: [PATCH 3/4] Fail fast in AspireStopAsync E2E helper on aspire stop errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The E2E helper AspireStopAsync was calling WaitForSuccessPromptAsync, which waits up to 500s (default) for [N OK] $. When aspire stop returns non-zero (for example the documented FailedToDotnetRunAppHost flake in #16643), the prompt arrives as [N ERR:2] $ and the test then sits idle for ~8:20 before failing with a useless 'didn't see OK' timeout. The recent failure on this PR's CI was exactly this shape: aspire stop exited within seconds with ERR:2, but the test wasted 8m20s waiting for an OK that would never come. Switch the helper to WaitForSuccessPromptFailFastAsync so any ERR prompt fails the test immediately with the captured error context. All 20 callers are happy-path tests that expect aspire stop to succeed, so this is a pure test-diagnostic improvement — no product behavior change. Refs #16643 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Helpers/CliE2EAutomatorHelpers.cs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/Aspire.Cli.EndToEnd.Tests/Helpers/CliE2EAutomatorHelpers.cs b/tests/Aspire.Cli.EndToEnd.Tests/Helpers/CliE2EAutomatorHelpers.cs index 579eee358b5..502b1dd146b 100644 --- a/tests/Aspire.Cli.EndToEnd.Tests/Helpers/CliE2EAutomatorHelpers.cs +++ b/tests/Aspire.Cli.EndToEnd.Tests/Helpers/CliE2EAutomatorHelpers.cs @@ -878,13 +878,21 @@ await auto.TypeAsync( /// /// Stops a running Aspire AppHost with aspire stop. /// + /// + /// Uses so that a + /// non-zero exit from aspire stop (for example the documented FailedToDotnetRunAppHost + /// flake in https://github.com/microsoft/aspire/issues/16643) surfaces immediately with a + /// useful diagnostic rather than the default 500-second wait for the success prompt. aspire stop + /// is invoked at the end of E2E tests on the happy path; any error result is a real failure to + /// surface, not something the test should silently sit on. + /// internal static async Task AspireStopAsync( this Hex1bTerminalAutomator auto, SequenceCounter counter) { await auto.TypeAsync("aspire stop"); await auto.EnterAsync(); - await auto.WaitForSuccessPromptAsync(counter); + await auto.WaitForSuccessPromptFailFastAsync(counter); } /// From fc4bf939248276de732f966433e8bc1c5ee8fc7d Mon Sep 17 00:00:00 2001 From: Shayne Boyer <7681382+spboyer@users.noreply.github.com> Date: Thu, 28 May 2026 13:44:27 -0400 Subject: [PATCH 4/4] Give process output readers more time after exit ProcessCaptureRunner bounded post-exit stdout/stderr capture at 250ms. On loaded Windows CI, short-lived cmd.exe wrappers can exit before the async pipe readers get enough CPU to observe EOF, causing callers to receive an empty capture even though the process wrote output. The PeerInstallProbe failure on this PR had that shape: the fake peer.cmd printed the expected --version output, but the probe reported no usable output. Increase the bounded post-exit capture window to 2s. This remains far below the full process timeout, but gives enough scheduling slack for Windows pipe readers under CI load. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/Aspire.Cli/Utils/ProcessCaptureRunner.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Aspire.Cli/Utils/ProcessCaptureRunner.cs b/src/Aspire.Cli/Utils/ProcessCaptureRunner.cs index e47c75762e4..74d7e38a2d0 100644 --- a/src/Aspire.Cli/Utils/ProcessCaptureRunner.cs +++ b/src/Aspire.Cli/Utils/ProcessCaptureRunner.cs @@ -15,7 +15,7 @@ internal static class ProcessCaptureRunner // abandon the process. 2s is well above the <100ms typical post-kill exit // latency but small enough not to noticeably stall the caller. private static readonly TimeSpan s_postKillExitWaitBound = TimeSpan.FromSeconds(2); - private static readonly TimeSpan s_postKillCaptureWaitBound = TimeSpan.FromMilliseconds(250); + private static readonly TimeSpan s_postKillCaptureWaitBound = TimeSpan.FromSeconds(2); public static async Task> RunAsync( ProcessStartInfo startInfo, @@ -137,6 +137,10 @@ public static async Task> RunAsync( // budget (potentially several seconds for a peer that exited in // milliseconds). Cap the post-exit drain at the same bound we use after a // kill so the success path doesn't pay the full timeout for that scenario. + // Keep this comfortably above a scheduling quantum: on loaded Windows CI, + // short-lived cmd.exe wrappers can exit before the async pipe readers get + // enough CPU to observe EOF, and treating that as empty output makes + // otherwise-successful probes flaky. var capture = await SwallowCaptureAsync(captureTask, createEmptyCapture, logger, s_postKillCaptureWaitBound).ConfigureAwait(false); var exitCode = process.ExitCode;