Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions SECURITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,9 @@ Instead, please report via one of:
- **Decentralized trust** (no central authority)
- **Org PKI** for fleet trust
- **Key rotation** every 120 seconds
- **RX cryptokey routing**: decrypted packets must carry an inner source IP belonging to the sending peer
- **Authenticated org control plane**: revoke/alias/vouch messages require a verified org Ed25519 signature
- **Confirmed failure detection**: gossiped death only suspects a peer locally; eviction requires our own probe to fail
- **Inbound-handshake rate limiting**: per-source + global token bucket before the X25519

See [docs/concepts/security.md](docs/concepts/security.md) for full security model documentation.
170 changes: 159 additions & 11 deletions src/discovery/membership.zig
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,29 @@ pub const Peer = struct {
cert_expires_at: ?i64 = null,
};

/// Hard cap on tracked peers. Without this, unauthenticated SWIM gossip (open
/// trust by default) lets a remote attacker inject unbounded fake members until
/// OOM (plus quadratic per-tick CPU). 4096 bounds memory to ~1 MB while staying
/// well above any realistic mesh size; new peers past the cap reclaim a
/// dead/left/suspected slot or are dropped.
pub const MAX_MEMBERS: usize = 4096;

pub const MembershipTable = struct {
allocator: std.mem.Allocator,
peers: std.AutoHashMap([32]u8, Peer),
/// Local node's Lamport clock
lamport: u64,
/// Suspicion timeout in nanoseconds
suspicion_timeout_ns: i128,
/// SECURITY (H7): guards `peers` against the single writer (the SWIM/event-loop
/// thread, via the mutating methods below) racing concurrent readers on other
/// threads (FFI host calls, data-plane workers). A `peers.put` rehash frees the
/// old buckets; an unsynchronized reader would then hit freed memory. Mutating
/// methods take the write lock; external readers must take the read lock via
/// `lock.lockShared()`. The SWIM thread's own direct reads need no lock (it is
/// the only writer). Methods release the lock before any handler callback fires,
/// so this lock is never held together with the WgDevice lock (no inversion).
lock: std.Io.RwLock = .init,

pub fn init(allocator: std.mem.Allocator, suspicion_timeout_ms: u32) MembershipTable {
return .{
Expand All @@ -97,16 +113,62 @@ pub const MembershipTable = struct {

/// Add or update a peer in the membership table.
pub fn upsert(self: *MembershipTable, peer: Peer) !void {
self.lock.lockUncancelable(zio());
defer self.lock.unlock(zio());
const existing = self.peers.get(peer.pubkey);
if (existing) |e| {
// Only update if the incoming info is newer (higher Lamport timestamp)
if (peer.lamport <= e.lamport) return;
// Free the previous name allocation we're about to overwrite, unless
// the caller is reusing the same buffer (avoids a leak and a later
// double-free; names are owned by self.allocator like remove/deinit).
if (e.name.len > 0 and e.name.ptr != peer.name.ptr) {
self.allocator.free(e.name);
}
try self.peers.put(peer.pubkey, peer);
return;
}
Comment on lines 118 to +130
// New peer: enforce the capacity bound. Try to reclaim a non-alive slot
// before rejecting, so honest churn keeps working while a flood of fake
// members cannot grow the table without limit.
if (self.peers.count() >= MAX_MEMBERS and !self.evictOneReclaimable()) {
return error.MembershipFull;
}
try self.peers.put(peer.pubkey, peer);
}

/// Evict one dead/left/suspected peer to make room under MAX_MEMBERS.
/// Returns true if a peer was removed. Never evicts an alive peer.
/// Caller must hold the write lock (called from upsert).
fn evictOneReclaimable(self: *MembershipTable) bool {
var victim: ?[32]u8 = null;
var iter = self.peers.iterator();
while (iter.next()) |entry| {
if (entry.value_ptr.state == .dead or entry.value_ptr.state == .left) {
victim = entry.key_ptr.*;
break;
}
}
if (victim == null) {
var it2 = self.peers.iterator();
while (it2.next()) |entry| {
if (entry.value_ptr.state == .suspected) {
victim = entry.key_ptr.*;
break;
}
}
}
if (victim) |v| {
self.removeLocked(v);
return true;
}
return false;
}

/// Mark a peer as suspected (failed to respond to ping).
pub fn suspect(self: *MembershipTable, pubkey: [32]u8) void {
self.lock.lockUncancelable(zio());
defer self.lock.unlock(zio());
if (self.peers.getPtr(pubkey)) |peer| {
if (peer.state == .alive) {
self.lamport += 1;
Expand All @@ -119,6 +181,8 @@ pub const MembershipTable = struct {

/// Mark a peer as alive (responded to ping).
pub fn markAlive(self: *MembershipTable, pubkey: [32]u8, rtt_ns: ?u64) void {
self.lock.lockUncancelable(zio());
defer self.lock.unlock(zio());
if (self.peers.getPtr(pubkey)) |peer| {
self.lamport += 1;
peer.state = .alive;
Expand All @@ -131,6 +195,13 @@ pub const MembershipTable = struct {

/// Mark a peer as dead (confirmed unreachable).
pub fn markDead(self: *MembershipTable, pubkey: [32]u8) void {
self.lock.lockUncancelable(zio());
defer self.lock.unlock(zio());
self.markDeadLocked(pubkey);
}

/// markDead body without locking — caller must hold the write lock.
fn markDeadLocked(self: *MembershipTable, pubkey: [32]u8) void {
if (self.peers.getPtr(pubkey)) |peer| {
self.lamport += 1;
peer.state = .dead;
Expand All @@ -140,6 +211,13 @@ pub const MembershipTable = struct {

/// Remove a dead peer from the table entirely.
pub fn remove(self: *MembershipTable, pubkey: [32]u8) void {
self.lock.lockUncancelable(zio());
defer self.lock.unlock(zio());
self.removeLocked(pubkey);
}

/// remove body without locking — caller must hold the write lock.
fn removeLocked(self: *MembershipTable, pubkey: [32]u8) void {
if (self.peers.fetchRemove(pubkey)) |kv| {
if (kv.value.name.len > 0) {
self.allocator.free(kv.value.name);
Expand Down Expand Up @@ -188,32 +266,39 @@ pub const MembershipTable = struct {
return null;
}

/// Check suspected peers and promote to dead if timeout expired.
pub fn expireSuspected(self: *MembershipTable) [][32]u8 {
/// Check suspected peers and promote to dead if their suspicion timeout
/// expired. The expired pubkeys are written into the caller-provided `out`
/// buffer and the count is returned.
///
/// SECURITY (use-after-return): this used to return a slice into its own
/// stack frame, which the caller's per-element work (print/onPeerDead/
/// enqueueGossip) then overwrote. The buffer is now owned by the caller so
/// it stays live across that loop.
pub fn expireSuspected(self: *MembershipTable, out: [][32]u8) usize {
self.lock.lockUncancelable(zio());
defer self.lock.unlock(zio());
const now = nowNs();
// Collect keys of peers to mark as dead (can't modify map while iterating)
var to_kill_buf: [256][32]u8 = undefined;
var to_kill_count: usize = 0;
var n: usize = 0;

var iter = self.peers.iterator();
while (iter.next()) |entry| {
if (entry.value_ptr.state == .suspected) {
if (entry.value_ptr.suspected_at_ns) |suspected_at| {
if (now - suspected_at > self.suspicion_timeout_ns) {
if (to_kill_count < to_kill_buf.len) {
to_kill_buf[to_kill_count] = entry.key_ptr.*;
to_kill_count += 1;
if (n < out.len) {
out[n] = entry.key_ptr.*;
n += 1;
}
}
}
}
}

for (to_kill_buf[0..to_kill_count]) |pubkey| {
self.markDead(pubkey);
for (out[0..n]) |pubkey| {
self.markDeadLocked(pubkey);
}

return to_kill_buf[0..to_kill_count];
return n;
}

/// Number of peers in a given state.
Expand Down Expand Up @@ -314,3 +399,66 @@ test "lamport ordering" {
const peer = table.peers.get(pubkey).?;
try std.testing.expectEqual(peer.state, .alive);
}

fn testPeer(pk: [32]u8, state: PeerState, suspected_at: ?i128) Peer {
return .{
.pubkey = pk,
.name = "",
.state = state,
.gossip_endpoint = null,
.wg_pubkey = null,
.mesh_ip = .{ 0, 0, 0, 0 },
.wg_port = 0,
.lamport = 1,
.last_seen_ns = 0,
.suspected_at_ns = suspected_at,
.last_rtt_ns = null,
.handshake_complete = false,
};
}

test "membership table caps growth and reclaims non-alive slots (H3 regression)" {
const allocator = std.testing.allocator;
var table = MembershipTable.init(allocator, 5000);
defer table.deinit();

var i: usize = 0;
while (i < MAX_MEMBERS) : (i += 1) {
var pk = [_]u8{0} ** 32;
std.mem.writeInt(u32, pk[0..4], @intCast(i), .little);
try table.upsert(testPeer(pk, .alive, null));
}
try std.testing.expectEqual(MAX_MEMBERS, table.count());

// A new peer past the cap with no reclaimable slot must be rejected.
const overflow_pk = [_]u8{0xFF} ** 32;
try std.testing.expectError(error.MembershipFull, table.upsert(testPeer(overflow_pk, .alive, null)));

// Kill an existing peer, then the new peer is admitted by reclaiming it.
const victim = [_]u8{0} ** 32; // i==0
table.markDead(victim);
try table.upsert(testPeer(overflow_pk, .alive, null));
try std.testing.expectEqual(MAX_MEMBERS, table.count());
try std.testing.expect(table.peers.get(overflow_pk) != null);
}

test "expireSuspected writes to caller-owned buffer (H8 regression)" {
const allocator = std.testing.allocator;
var table = MembershipTable.init(allocator, 1000); // 1s timeout
defer table.deinit();

const keys = [_][32]u8{ [_]u8{1} ** 32, [_]u8{2} ** 32, [_]u8{3} ** 32 };
for (keys) |k| {
try table.upsert(testPeer(k, .suspected, 1)); // suspected_at = 1ns → long expired
}

var buf: [8][32]u8 = undefined;
const n = table.expireSuspected(&buf);
try std.testing.expectEqual(@as(usize, 3), n);
// Every returned key must be a real, now-dead peer. Pre-fix, the 2nd/3rd
// entries were garbage read from a reclaimed stack frame.
for (buf[0..n]) |k| {
const p = table.peers.get(k) orelse return error.TestUnexpectedResult;
try std.testing.expectEqual(PeerState.dead, p.state);
}
}
Loading
Loading