From d2797e6cecb378671b041d1f385e85ff9b5a4c6f Mon Sep 17 00:00:00 2001 From: matthew-pilot Date: Sun, 31 May 2026 23:24:34 +0000 Subject: [PATCH] fix(daemon): add IPC whitelist to bypass per-client dial quota (PILOT-346) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add IPCWhitelist config field (process comm names). On Linux, the peer PID from SO_PEERCRED is resolved to a process name via /proc//comm; whitelisted clients skip the MaxConnsPerIPCClient (4096) per-client dial quota check. Darwin stubs return PID=0. Changes: - Config: IPCWhitelist []string - checkPeerUID now returns (int32, error) — peer PID - resolveProcessName(pid) on Linux, no-op elsewhere - ipcConn gets peerPID + whitelisted bool - handleDial respects whitelisted flag - Updated 6 test files for new signatures --- pkg/daemon/daemon.go | 1 + pkg/daemon/ipc.go | 45 ++++++++++++++++--- pkg/daemon/ipc_peercred_darwin.go | 21 +++++---- pkg/daemon/ipc_peercred_linux.go | 33 +++++++++----- pkg/daemon/ipc_peercred_other.go | 12 +++-- pkg/daemon/zz_ipc_async_write_test.go | 8 ++-- pkg/daemon/zz_ipc_conncount_stale_bug_test.go | 2 +- pkg/daemon/zz_ipc_dialcancel_leak_bug_test.go | 2 +- pkg/daemon/zz_ipc_helpers_test.go | 4 +- pkg/daemon/zz_ipc_socket_lifecycle_test.go | 4 +- pkg/daemon/zz_ipc_test.go | 2 +- pkg/daemon/zz_ipc_write_deadline_test.go | 4 +- 12 files changed, 96 insertions(+), 42 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index c1219a04..b5eae93d 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -73,6 +73,7 @@ type Config struct { BeaconAddr string ListenAddr string // UDP listen address for tunnel traffic SocketPath string // Unix socket path for IPC + IPCWhitelist []string // process names (comm) trusted to bypass per-client dial quota (PILOT-346) Encrypt bool // enable tunnel-layer encryption (X25519 + AES-256-GCM) RegistryTLS bool // use TLS for registry connection RegistryFingerprint string // hex SHA-256 fingerprint for TLS cert pinning diff --git a/pkg/daemon/ipc.go b/pkg/daemon/ipc.go index d7a307d7..243a47ca 100644 --- a/pkg/daemon/ipc.go +++ b/pkg/daemon/ipc.go @@ -149,6 +149,11 @@ type ipcConn struct { closeOnce sync.Once writeDone chan struct{} + // peerPID is the PID of the connected process (Linux SO_PEERCRED, + // 0 on Darwin/other). Used for IPC whitelist matching (PILOT-346). + peerPID int32 + whitelisted bool // bypasses per-client dial quota (PILOT-346) + // dialCancels holds cancel funcs for in-flight DialConnection calls // this client started. On Close() we fire them all so the daemon's // dial loops bail out immediately instead of grinding to their full @@ -229,14 +234,17 @@ const MaxConnsPerIPCClient = 4096 // newIPCConn wraps a net.Conn and starts the per-conn writer goroutine. // All callers must use this constructor (not &ipcConn{...}) so the writer -// is properly initialized. -func newIPCConn(c net.Conn) *ipcConn { +// is properly initialized. peerPID is the PID of the connected process +// (0 on non-Linux); whitelisted bypasses the per-client dial quota. +func newIPCConn(c net.Conn, peerPID int32, whitelisted bool) *ipcConn { ic := &ipcConn{ Conn: c, sendCh: make(chan []byte, ipcSendBuffer), done: make(chan struct{}), writeDone: make(chan struct{}), dialCancels: make(map[uint64]context.CancelFunc), + peerPID: peerPID, + whitelisted: whitelisted, } go ic.writeLoop() return ic @@ -511,15 +519,34 @@ func (s *IPCServer) acceptLoop() { // PILOT-246: Reject connections from other UIDs — only same-UID // processes may issue IPC commands. Without this, any local // process can connect and control the daemon. - if err := checkPeerUID(conn); err != nil { + peerPID, err := checkPeerUID(conn) + if err != nil { slog.Warn("IPC rejected cross-UID connection", "err", err) conn.Close() continue } + + // PILOT-346: Check if the connecting process is in the IPC + // whitelist. Whitelisted clients bypass the per-client dial + // connection quota (MaxConnsPerIPCClient). + var whitelisted bool + if peerPID > 0 && len(s.daemon.config.IPCWhitelist) > 0 { + name := resolveProcessName(peerPID) + if name != "" { + for _, w := range s.daemon.config.IPCWhitelist { + if name == w { + whitelisted = true + slog.Info("IPC whitelisted client connected", "pid", peerPID, "name", name) + break + } + } + } + } + s.mu.Lock() full := len(s.clients) >= MaxIPCClients if !full { - ic := newIPCConn(conn) + ic := newIPCConn(conn, peerPID, whitelisted) s.clients[ic] = true s.mu.Unlock() go s.handleClient(ic) @@ -757,9 +784,13 @@ func (s *IPCServer) handleDial(conn *ipcConn, reqID uint64, payload []byte) { // P2-002: reject dial if this client already owns MaxConnsPerIPCClient // connections. Avoids the single-client DoS where one buggy driver // exhausts the global connection table. - if n := conn.connCount(); n >= MaxConnsPerIPCClient { - s.sendError(conn, reqID, fmt.Sprintf("dial: per-client connection quota (%d) reached", MaxConnsPerIPCClient)) - return + // PILOT-346: whitelisted clients (trusted integrations) bypass the + // per-client quota limit. + if !conn.whitelisted { + if n := conn.connCount(); n >= MaxConnsPerIPCClient { + s.sendError(conn, reqID, fmt.Sprintf("dial: per-client connection quota (%d) reached", MaxConnsPerIPCClient)) + return + } } dstAddr := protocol.UnmarshalAddr(payload[0:protocol.AddrSize]) diff --git a/pkg/daemon/ipc_peercred_darwin.go b/pkg/daemon/ipc_peercred_darwin.go index 58d482f8..2a1f4491 100644 --- a/pkg/daemon/ipc_peercred_darwin.go +++ b/pkg/daemon/ipc_peercred_darwin.go @@ -10,17 +10,22 @@ import ( "golang.org/x/sys/unix" ) +// resolveProcessName is a no-op on Darwin — Xucred doesn't expose PID. +// IPC whitelist is Linux-only (PILOT-346). +func resolveProcessName(pid int32) string { return "" } + // checkPeerUID — Darwin variant. Uses LOCAL_PEERCRED + GetsockoptXucred, // the BSD equivalent of Linux SO_PEERCRED, to retrieve the effective UID -// of the connected peer. -func checkPeerUID(conn net.Conn) error { +// of the connected peer. Returns 0 for PID — Darwin Xucred does not +// expose the peer PID, so IPC whitelist is Linux-only (PILOT-346). +func checkPeerUID(conn net.Conn) (int32, error) { unixConn, ok := conn.(*net.UnixConn) if !ok { - return fmt.Errorf("IPC: not a unix socket") + return 0, fmt.Errorf("IPC: not a unix socket") } rawConn, err := unixConn.SyscallConn() if err != nil { - return fmt.Errorf("IPC: SyscallConn: %w", err) + return 0, fmt.Errorf("IPC: SyscallConn: %w", err) } var xucred *unix.Xucred var getErr error @@ -28,13 +33,13 @@ func checkPeerUID(conn net.Conn) error { xucred, getErr = unix.GetsockoptXucred(int(fd), unix.SOL_LOCAL, unix.LOCAL_PEERCRED) }) if ctrlErr != nil { - return fmt.Errorf("IPC: Control: %w", ctrlErr) + return 0, fmt.Errorf("IPC: Control: %w", ctrlErr) } if getErr != nil { - return fmt.Errorf("IPC: LOCAL_PEERCRED: %w", getErr) + return 0, fmt.Errorf("IPC: LOCAL_PEERCRED: %w", getErr) } if xucred.Uid != uint32(os.Getuid()) { - return fmt.Errorf("IPC: peer UID %d != daemon UID %d", xucred.Uid, os.Getuid()) + return 0, fmt.Errorf("IPC: peer UID %d != daemon UID %d", xucred.Uid, os.Getuid()) } - return nil + return 0, nil } diff --git a/pkg/daemon/ipc_peercred_linux.go b/pkg/daemon/ipc_peercred_linux.go index d9767830..aeb6c6c5 100644 --- a/pkg/daemon/ipc_peercred_linux.go +++ b/pkg/daemon/ipc_peercred_linux.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "os" + "strings" "golang.org/x/sys/unix" ) @@ -13,17 +14,18 @@ import ( // checkPeerUID verifies that a Unix-domain socket connection comes from // the same Unix UID as the daemon. Linux variant: SO_PEERCRED + Ucred. // -// Returns nil if the peer UID matches the daemon's UID, or an error -// if the socket is not Unix-domain, the syscall failed, or the peer -// UID differs. This is the primary IPC access control for PILOT-246. -func checkPeerUID(conn net.Conn) error { +// Returns the peer PID (for whitelist checks) and nil if the peer UID +// matches the daemon's UID, or an error if the socket is not Unix-domain, +// the syscall failed, or the peer UID differs. This is the primary IPC +// access control for PILOT-246. +func checkPeerUID(conn net.Conn) (int32, error) { unixConn, ok := conn.(*net.UnixConn) if !ok { - return fmt.Errorf("IPC: not a unix socket") + return 0, fmt.Errorf("IPC: not a unix socket") } rawConn, err := unixConn.SyscallConn() if err != nil { - return fmt.Errorf("IPC: SyscallConn: %w", err) + return 0, fmt.Errorf("IPC: SyscallConn: %w", err) } var ucred *unix.Ucred var getErr error @@ -31,13 +33,24 @@ func checkPeerUID(conn net.Conn) error { ucred, getErr = unix.GetsockoptUcred(int(fd), unix.SOL_SOCKET, unix.SO_PEERCRED) }) if ctrlErr != nil { - return fmt.Errorf("IPC: Control: %w", ctrlErr) + return 0, fmt.Errorf("IPC: Control: %w", ctrlErr) } if getErr != nil { - return fmt.Errorf("IPC: SO_PEERCRED: %w", getErr) + return 0, fmt.Errorf("IPC: SO_PEERCRED: %w", getErr) } if ucred.Uid != uint32(os.Getuid()) { - return fmt.Errorf("IPC: peer UID %d != daemon UID %d", ucred.Uid, os.Getuid()) + return 0, fmt.Errorf("IPC: peer UID %d != daemon UID %d", ucred.Uid, os.Getuid()) } - return nil + return int32(ucred.Pid), nil +} + +// resolveProcessName reads /proc//comm and returns the process name +// (trimmed). Returns empty string on any error (process gone, permission +// denied, etc.). Used for IPC whitelist matching (PILOT-346). +func resolveProcessName(pid int32) string { + data, err := os.ReadFile(fmt.Sprintf("/proc/%d/comm", pid)) + if err != nil { + return "" + } + return strings.TrimSpace(string(data)) } diff --git a/pkg/daemon/ipc_peercred_other.go b/pkg/daemon/ipc_peercred_other.go index 255b7a3a..3cf5c34d 100644 --- a/pkg/daemon/ipc_peercred_other.go +++ b/pkg/daemon/ipc_peercred_other.go @@ -7,12 +7,16 @@ import ( "net" ) +// resolveProcessName is a no-op on unsupported platforms. +// IPC whitelist is Linux-only (PILOT-346). +func resolveProcessName(pid int32) string { return "" } + // checkPeerUID — fallback for non-Linux, non-Darwin builds. Pilot does // not officially support these platforms; the IPC peer-UID check is a -// no-op so the build keeps compiling. -func checkPeerUID(conn net.Conn) error { +// no-op so the build keeps compiling. Returns 0 for PID. +func checkPeerUID(conn net.Conn) (int32, error) { if _, ok := conn.(*net.UnixConn); !ok { - return fmt.Errorf("IPC: not a unix socket") + return 0, fmt.Errorf("IPC: not a unix socket") } - return nil + return 0, nil } diff --git a/pkg/daemon/zz_ipc_async_write_test.go b/pkg/daemon/zz_ipc_async_write_test.go index a7943510..e18b9f9e 100644 --- a/pkg/daemon/zz_ipc_async_write_test.go +++ b/pkg/daemon/zz_ipc_async_write_test.go @@ -31,7 +31,7 @@ func pairedConn(t *testing.T) (server, client net.Conn) { func TestIPCConnAsyncWriteSerializesConcurrent(t *testing.T) { t.Parallel() server, client := pairedConn(t) - conn := newIPCConn(server) + conn := newIPCConn(server, 0, false) defer conn.Close() const writers = 16 @@ -106,7 +106,7 @@ func TestIPCConnAsyncWriteSerializesConcurrent(t *testing.T) { func TestIPCConnAsyncWriteRejectsAfterClose(t *testing.T) { t.Parallel() server, _ := pairedConn(t) - conn := newIPCConn(server) + conn := newIPCConn(server, 0, false) conn.Close() err := conn.ipcWrite([]byte("late")) @@ -130,7 +130,7 @@ func TestIPCConnAsyncWriteRejectsAfterClose(t *testing.T) { func TestIPCConnAsyncWriteBlocksUntilClose(t *testing.T) { t.Parallel() server, client := pairedConn(t) - conn := newIPCConn(server) + conn := newIPCConn(server, 0, false) defer client.Close() // intentionally do NOT read — block forever // Fill the buffer + writer's in-flight slot. ipcSendBuffer + 1 @@ -191,7 +191,7 @@ func TestIPCConnAsyncWriteBlocksUntilClose(t *testing.T) { func TestIPCConnCloseDrainsBufferedMessages(t *testing.T) { t.Parallel() server, client := pairedConn(t) - conn := newIPCConn(server) + conn := newIPCConn(server, 0, false) const N = 20 for i := 0; i < N; i++ { diff --git a/pkg/daemon/zz_ipc_conncount_stale_bug_test.go b/pkg/daemon/zz_ipc_conncount_stale_bug_test.go index 28bdb014..51cebfea 100644 --- a/pkg/daemon/zz_ipc_conncount_stale_bug_test.go +++ b/pkg/daemon/zz_ipc_conncount_stale_bug_test.go @@ -32,7 +32,7 @@ func TestIPCConnCountIncludesClosedConns(t *testing.T) { clientConn, serverConn := net.Pipe() t.Cleanup(func() { clientConn.Close(); serverConn.Close() }) - ic := newIPCConn(serverConn) + ic := newIPCConn(serverConn, 0, false) t.Cleanup(func() { ic.Close() }) pm := NewPortManager() diff --git a/pkg/daemon/zz_ipc_dialcancel_leak_bug_test.go b/pkg/daemon/zz_ipc_dialcancel_leak_bug_test.go index ac0daf3c..3a217469 100644 --- a/pkg/daemon/zz_ipc_dialcancel_leak_bug_test.go +++ b/pkg/daemon/zz_ipc_dialcancel_leak_bug_test.go @@ -30,7 +30,7 @@ func TestIPCDialCancelsLeakOnCompletedDials(t *testing.T) { clientConn, serverConn := net.Pipe() t.Cleanup(func() { clientConn.Close(); serverConn.Close() }) - ic := newIPCConn(serverConn) + ic := newIPCConn(serverConn, 0, false) t.Cleanup(func() { ic.Close() }) const N = 1000 diff --git a/pkg/daemon/zz_ipc_helpers_test.go b/pkg/daemon/zz_ipc_helpers_test.go index eba0f24e..1784fa90 100644 --- a/pkg/daemon/zz_ipc_helpers_test.go +++ b/pkg/daemon/zz_ipc_helpers_test.go @@ -19,7 +19,7 @@ import ( func newPipePair(t *testing.T) (*ipcConn, net.Conn) { t.Helper() client, server := net.Pipe() - ic := newIPCConn(server) + ic := newIPCConn(server, 0, false) t.Cleanup(func() { ic.Close() client.Close() @@ -222,7 +222,7 @@ func TestIPCServerCloseClosesClients(t *testing.T) { client, server := net.Pipe() t.Cleanup(func() { client.Close() }) - ic := newIPCConn(server) + ic := newIPCConn(server, 0, false) s.clients[ic] = true if err := s.Close(); err != nil { diff --git a/pkg/daemon/zz_ipc_socket_lifecycle_test.go b/pkg/daemon/zz_ipc_socket_lifecycle_test.go index 27c83758..8465f422 100644 --- a/pkg/daemon/zz_ipc_socket_lifecycle_test.go +++ b/pkg/daemon/zz_ipc_socket_lifecycle_test.go @@ -420,7 +420,7 @@ func TestCheckPeerUIDRejectsNonUnixSocket(t *testing.T) { defer server.Close() defer client.Close() - if err := checkPeerUID(server); err == nil { + if _, err := checkPeerUID(server); err == nil { t.Fatal("checkPeerUID should reject non-Unix conn") } } @@ -440,7 +440,7 @@ func TestCheckPeerUIDAcceptsSameUIDUnixSocket(t *testing.T) { } defer conn.Close() - if err := checkPeerUID(conn); err != nil { + if _, err := checkPeerUID(conn); err != nil { t.Fatalf("checkPeerUID should accept same-UID connection: %v", err) } } diff --git a/pkg/daemon/zz_ipc_test.go b/pkg/daemon/zz_ipc_test.go index 82f8ad52..c6d8ee3f 100644 --- a/pkg/daemon/zz_ipc_test.go +++ b/pkg/daemon/zz_ipc_test.go @@ -22,7 +22,7 @@ import ( func newIPCTestConn(t *testing.T) (*ipcConn, net.Conn) { t.Helper() server, client := net.Pipe() - ic := newIPCConn(server) + ic := newIPCConn(server, 0, false) t.Cleanup(func() { _ = ic.Close() // signals writer goroutine to drain + exit _ = client.Close() diff --git a/pkg/daemon/zz_ipc_write_deadline_test.go b/pkg/daemon/zz_ipc_write_deadline_test.go index f5dab991..d9c594d8 100644 --- a/pkg/daemon/zz_ipc_write_deadline_test.go +++ b/pkg/daemon/zz_ipc_write_deadline_test.go @@ -59,7 +59,7 @@ func TestWriteLoopExitsOnWriteDeadline(t *testing.T) { } server := res.conn - ic := newIPCConn(server) + ic := newIPCConn(server, 0, false) defer func() { ic.Close() client.Close() @@ -114,7 +114,7 @@ func TestHealthHandlerInlineDispatch(t *testing.T) { defer server.Close() defer client.Close() - ic := newIPCConn(server) + ic := newIPCConn(server, 0, false) defer ic.Close() // Minimal daemon — handleHealth only needs Info().