Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ public class ExternalAgentLauncher implements Closeable {
public ExternalAgentLauncher(Config config) {
if (config.isAzureAppServices()) {
if (config.getTraceAgentPath() != null) {
log.info(
"[aas-repro] ExternalAgentLauncher: spawning trace-agent — path={} pipe={} jvm_pid={}",
config.getTraceAgentPath(),
config.getAgentNamedPipe(),
ProcessHandle.current().pid());
ProcessBuilder traceProcessBuilder = new ProcessBuilder(config.getTraceAgentPath());
traceProcessBuilder.redirectOutput(DISCARD);
traceProcessBuilder.redirectError(DISCARD);
Expand All @@ -38,6 +43,11 @@ public ExternalAgentLauncher(Config config) {
}

if (config.getDogStatsDPath() != null) {
log.info(
"[aas-repro] ExternalAgentLauncher: spawning dogstatsd — path={} pipe={} jvm_pid={}",
config.getDogStatsDPath(),
config.getDogStatsDNamedPipe(),
ProcessHandle.current().pid());
ProcessBuilder dogStatsDProcessBuilder = new ProcessBuilder(config.getDogStatsDPath());
dogStatsDProcessBuilder.redirectOutput(DISCARD);
dogStatsDProcessBuilder.redirectError(DISCARD);
Expand Down Expand Up @@ -71,6 +81,7 @@ private static ProcessSupervisor.HealthCheck healthCheck(String pipeName) {

static final class NamedPipeHealthCheck implements ProcessSupervisor.HealthCheck {
private static final String NAMED_PIPE_PREFIX = "\\\\.\\pipe\\";
private static final Logger log = LoggerFactory.getLogger(NamedPipeHealthCheck.class);

private final File pipe;

Expand All @@ -88,10 +99,15 @@ public ProcessSupervisor.Health run(ProcessSupervisor.Health previousHealth)

// first-time round do a more detailed check for existing bound named-pipe
if (previousHealth == NEVER_CHECKED) {
log.info("[aas-repro] NamedPipeHealthCheck first-check pipe={}", pipe);

double delayMillis = 50;
for (int retries = 0; retries < 7; retries++) {
if (!pipe.exists()) {
log.info(
"[aas-repro] NamedPipeHealthCheck pipe={} not found on retry {} → READY_TO_START",
pipe,
retries);
return READY_TO_START; // no longer bound, start our own external process
}

Expand All @@ -100,13 +116,19 @@ public ProcessSupervisor.Health run(ProcessSupervisor.Health previousHealth)
delayMillis = delayMillis * 1.75;
}

// Pipe survived all retries — something already owns it
log.warn(
"[aas-repro] NamedPipeHealthCheck pipe={} still bound after all retries"
+ " — assuming existing process is healthy (orphan risk!)",
pipe);
return HEALTHY; // use existing external process
}

// otherwise just check that the pipe is still bound
if (pipe.exists()) {
return HEALTHY; // keep using external process
} else {
log.info("[aas-repro] NamedPipeHealthCheck pipe={} gone → READY_TO_START", pipe);
return READY_TO_START; // start our own process
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,34 @@ private void mainLoop() {
try {
while (!stopping) {
if (currentHealth == FAULTED && ++faults >= MAX_FAULTS) {
log.warn("Failed to start process [{}] after {} attempts", imageName, faults);
log.warn(
"[aas-repro] [{}] Terminal: failed to start after {} faults — supervisor exiting",
imageName,
faults);
break;
}
try {
long delayMillis = nextCheckMillis - System.currentTimeMillis();
if (delayMillis > 0) {
Thread.sleep(delayMillis);
}
Health prevHealth = currentHealth;
currentHealth = healthCheck.run(currentHealth);
if (currentHealth != prevHealth) {
log.info(
"[aas-repro] [{}] Health {} → {} (faults={})",
imageName,
prevHealth,
currentHealth,
faults);
}
if (currentHealth == READY_TO_START) {
startProcessAndWait();
}
} catch (InterruptedException e) {
currentHealth = INTERRUPTED;
} catch (Throwable e) {
log.warn("Exception starting process: [{}]", imageName, e);
log.warn("[aas-repro] [{}] Exception in supervisor loop", imageName, e);
currentHealth = FAULTED;
}
scheduleNextHealthCheck();
Expand All @@ -108,26 +120,37 @@ private void scheduleNextHealthCheck() {

private void startProcessAndWait() throws Exception {
if (currentProcess == null) {
log.debug("Starting process: [{}]", imageName);
log.info(
"[aas-repro] [{}] Spawning — jvm_pid={}",
imageName,
ProcessHandle.current().pid());
try (TraceScope ignored = AgentTracer.get().muteTracing()) {
currentProcess = processBuilder.start();
}
currentHealth = HEALTHY;
faults = 0;
log.info(
"[aas-repro] [{}] Started — child_pid={} faults_reset_to_0",
imageName,
currentProcess.pid());
}

// Block until the process exits
int code = currentProcess.waitFor();
log.debug("Process [{}] has exited with code {}", imageName, code);
currentHealth = code == 0 ? INTERRUPTED : FAULTED;
log.info(
"[aas-repro] [{}] Exited — code={} → health={}",
imageName,
code,
currentHealth);

// Process is dead, no longer needs to be tracked
currentProcess = null;
}

private void stopProcess() {
if (currentProcess != null) {
log.debug("Stopping process: [{}]", imageName);
log.info("[aas-repro] [{}] Stopping (supervisor closing)", imageName);
currentProcess.destroy();
if (currentProcess.isAlive()) {
currentProcess.destroyForcibly();
Expand Down
Loading