From 1a02cc2bf7e9fa988cc079d9895a64a71894bf98 Mon Sep 17 00:00:00 2001 From: matthew-pilot Date: Sat, 30 May 2026 20:40:41 +0000 Subject: [PATCH] fix(daemon): add 5s timeout to SubManagedCycle IPC handler (PILOT-309) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ForceCycle called from the IPC handler was a synchronous call to runCycle → fetchMembers → ListNodes, which is a network call with no timeout. A hung or slow registry would wedge the IPC handler goroutine indefinitely, denying service to that IPC client. Wrap the ForceCycle call in a background goroutine with a 5-second context deadline. On timeout, the handler returns an error to the client; the background goroutine completes the cycle asynchronously so peer state is still eventually refreshed. Closes PILOT-309 --- pkg/daemon/ipc.go | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/pkg/daemon/ipc.go b/pkg/daemon/ipc.go index c0f15b79..3b65e557 100644 --- a/pkg/daemon/ipc.go +++ b/pkg/daemon/ipc.go @@ -1660,18 +1660,41 @@ func (s *IPCServer) handleManaged(conn *ipcConn, reqID uint64, payload []byte) { netID = binary.BigEndian.Uint16(rest[0:2]) } - var result map[string]interface{} - if pr := s.findPolicyRunner(netID); pr != nil { - result = pr.ForceCycle() - } else if me := s.findManagedEngine(netID); me != nil { - result = me.ForceCycle() - } else { + // PILOT-309: ForceCycle is synchronous and calls fetchMembers → + // ListNodes which is a network call with no timeout. A hung/slow + // registry would wedge the IPC handler goroutine indefinitely. + // Run the cycle in a background goroutine with a 5s deadline. + pr := s.findPolicyRunner(netID) + me := s.findManagedEngine(netID) + if pr == nil && me == nil { s.sendError(conn, reqID, "managed: no active managed networks") return } - data, _ := json.Marshal(result) - s.ipcWriteManagedOK(conn, reqID, data) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + type cycleResult struct { + data map[string]interface{} + } + ch := make(chan cycleResult, 1) + go func() { + var r map[string]interface{} + if pr != nil { + r = pr.ForceCycle() + } else { + r = me.ForceCycle() + } + ch <- cycleResult{data: r} + }() + + select { + case cr := <-ch: + data, _ := json.Marshal(cr.data) + s.ipcWriteManagedOK(conn, reqID, data) + case <-ctx.Done(): + s.sendError(conn, reqID, "managed: force cycle timed out after 5s") + } case SubManagedReconcile: // [2-byte netID]. Poll the registry for the network's member list