From 70e3460cb83dd44fadafa9f306c9370e2fca4757 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:38:37 -0300 Subject: [PATCH 1/3] Harden networking: auth, memory-safety, DoS, and thread-safety fixes Addresses the confirmed Critical/High issues (plus two low-risk Mediums) from an adversarial review of the networking stack. Each fix ships with a regression test; green on `zig build test`, Debug, ReleaseFast, ReleaseSafe. Authentication / spoofing - Verify the Ed25519 signature on org control messages (revoke/alias/vouch) and require a trusted org before acting on them (swim.zig, codec.zig). - Enforce RX cryptokey routing: a decrypted packet's inner source IP must belong to the sending peer, for IPv4 and IPv6 (device.zig). - Gossiped dead/leave/suspect about a third party now only marks it suspected locally and is confirmed by our own failure detector before any eviction; gossip can no longer clear local suspicion or resurrect a peer (swim.zig). - DNS bootstrap: random per-query transaction ID, connected socket for source filtering, and transaction-ID/question validation (dns.zig). Memory safety (safety checks are off in the shipped ReleaseFast build) - Bound the base64 token decode against the output buffer before decoding, fixing a stack out-of-bounds write on an over-long mg:// token (coordinated_punch.zig). - Return expired-peer keys via a caller-owned buffer instead of a slice into a reclaimed stack frame (membership.zig). Resource exhaustion / DoS - Cap the membership table and reclaim non-alive entries so unauthenticated gossip cannot grow it without bound (membership.zig). - Rate-limit inbound WireGuard handshake initiations (per-source + global token bucket) before the X25519 (device.zig). Thread safety - Add an RwLock to the WgDevice peer table (exclusive for mutation/handshake, shared for the data plane) so peer removal cannot zero key material while a worker is mid-encrypt (device.zig, main.zig). - Add an RwLock to the membership table; the single writer takes it exclusively for mutations, external readers (data plane, FFI) share it (membership.zig, main.zig, meshguard_ffi.zig). Service policy - Classify IPv6 packets in the service filter and fail closed on anything unclassifiable under a default-deny policy (policy.zig). --- SECURITY.md | 4 + src/discovery/membership.zig | 164 +++++++++++++++++++++++++-- src/discovery/swim.zig | 183 +++++++++++++++++++++++++----- src/main.zig | 70 +++++++----- src/meshguard_ffi.zig | 37 ++++++- src/nat/coordinated_punch.zig | 27 ++++- src/net/dns.zig | 98 ++++++++++++++++- src/protocol/codec.zig | 32 ++++++ src/services/policy.zig | 104 ++++++++++++++++- src/wireguard/device.zig | 202 +++++++++++++++++++++++++++++++++- 10 files changed, 834 insertions(+), 87 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 939454f..9e8e1a5 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -37,5 +37,9 @@ Instead, please report via one of: - **Decentralized trust** (no central authority) - **Org PKI** for fleet trust - **Key rotation** every 120 seconds +- **RX cryptokey routing**: decrypted packets must carry an inner source IP belonging to the sending peer +- **Authenticated org control plane**: revoke/alias/vouch messages require a verified org Ed25519 signature +- **Confirmed failure detection**: gossiped death only suspects a peer locally; eviction requires our own probe to fail +- **Inbound-handshake rate limiting**: per-source + global token bucket before the X25519 See [docs/concepts/security.md](docs/concepts/security.md) for full security model documentation. \ No newline at end of file diff --git a/src/discovery/membership.zig b/src/discovery/membership.zig index 15f31e5..ff14252 100644 --- a/src/discovery/membership.zig +++ b/src/discovery/membership.zig @@ -68,6 +68,13 @@ pub const Peer = struct { cert_expires_at: ?i64 = null, }; +/// Hard cap on tracked peers. Without this, unauthenticated SWIM gossip (open +/// trust by default) lets a remote attacker inject unbounded fake members until +/// OOM (plus quadratic per-tick CPU). 4096 bounds memory to ~1 MB while staying +/// well above any realistic mesh size; new peers past the cap reclaim a +/// dead/left/suspected slot or are dropped. +pub const MAX_MEMBERS: usize = 4096; + pub const MembershipTable = struct { allocator: std.mem.Allocator, peers: std.AutoHashMap([32]u8, Peer), @@ -75,6 +82,15 @@ pub const MembershipTable = struct { lamport: u64, /// Suspicion timeout in nanoseconds suspicion_timeout_ns: i128, + /// SECURITY (H7): guards `peers` against the single writer (the SWIM/event-loop + /// thread, via the mutating methods below) racing concurrent readers on other + /// threads (FFI host calls, data-plane workers). A `peers.put` rehash frees the + /// old buckets; an unsynchronized reader would then hit freed memory. Mutating + /// methods take the write lock; external readers must take the read lock via + /// `lock.lockShared()`. The SWIM thread's own direct reads need no lock (it is + /// the only writer). Methods release the lock before any handler callback fires, + /// so this lock is never held together with the WgDevice lock (no inversion). + lock: std.Io.RwLock = .init, pub fn init(allocator: std.mem.Allocator, suspicion_timeout_ms: u32) MembershipTable { return .{ @@ -97,16 +113,56 @@ pub const MembershipTable = struct { /// Add or update a peer in the membership table. pub fn upsert(self: *MembershipTable, peer: Peer) !void { + self.lock.lockUncancelable(zio()); + defer self.lock.unlock(zio()); const existing = self.peers.get(peer.pubkey); if (existing) |e| { // Only update if the incoming info is newer (higher Lamport timestamp) if (peer.lamport <= e.lamport) return; + try self.peers.put(peer.pubkey, peer); + return; + } + // New peer: enforce the capacity bound. Try to reclaim a non-alive slot + // before rejecting, so honest churn keeps working while a flood of fake + // members cannot grow the table without limit. + if (self.peers.count() >= MAX_MEMBERS and !self.evictOneReclaimable()) { + return error.MembershipFull; } try self.peers.put(peer.pubkey, peer); } + /// Evict one dead/left/suspected peer to make room under MAX_MEMBERS. + /// Returns true if a peer was removed. Never evicts an alive peer. + /// Caller must hold the write lock (called from upsert). + fn evictOneReclaimable(self: *MembershipTable) bool { + var victim: ?[32]u8 = null; + var iter = self.peers.iterator(); + while (iter.next()) |entry| { + if (entry.value_ptr.state == .dead or entry.value_ptr.state == .left) { + victim = entry.key_ptr.*; + break; + } + } + if (victim == null) { + var it2 = self.peers.iterator(); + while (it2.next()) |entry| { + if (entry.value_ptr.state == .suspected) { + victim = entry.key_ptr.*; + break; + } + } + } + if (victim) |v| { + self.removeLocked(v); + return true; + } + return false; + } + /// Mark a peer as suspected (failed to respond to ping). pub fn suspect(self: *MembershipTable, pubkey: [32]u8) void { + self.lock.lockUncancelable(zio()); + defer self.lock.unlock(zio()); if (self.peers.getPtr(pubkey)) |peer| { if (peer.state == .alive) { self.lamport += 1; @@ -119,6 +175,8 @@ pub const MembershipTable = struct { /// Mark a peer as alive (responded to ping). pub fn markAlive(self: *MembershipTable, pubkey: [32]u8, rtt_ns: ?u64) void { + self.lock.lockUncancelable(zio()); + defer self.lock.unlock(zio()); if (self.peers.getPtr(pubkey)) |peer| { self.lamport += 1; peer.state = .alive; @@ -131,6 +189,13 @@ pub const MembershipTable = struct { /// Mark a peer as dead (confirmed unreachable). pub fn markDead(self: *MembershipTable, pubkey: [32]u8) void { + self.lock.lockUncancelable(zio()); + defer self.lock.unlock(zio()); + self.markDeadLocked(pubkey); + } + + /// markDead body without locking β€” caller must hold the write lock. + fn markDeadLocked(self: *MembershipTable, pubkey: [32]u8) void { if (self.peers.getPtr(pubkey)) |peer| { self.lamport += 1; peer.state = .dead; @@ -140,6 +205,13 @@ pub const MembershipTable = struct { /// Remove a dead peer from the table entirely. pub fn remove(self: *MembershipTable, pubkey: [32]u8) void { + self.lock.lockUncancelable(zio()); + defer self.lock.unlock(zio()); + self.removeLocked(pubkey); + } + + /// remove body without locking β€” caller must hold the write lock. + fn removeLocked(self: *MembershipTable, pubkey: [32]u8) void { if (self.peers.fetchRemove(pubkey)) |kv| { if (kv.value.name.len > 0) { self.allocator.free(kv.value.name); @@ -188,32 +260,39 @@ pub const MembershipTable = struct { return null; } - /// Check suspected peers and promote to dead if timeout expired. - pub fn expireSuspected(self: *MembershipTable) [][32]u8 { + /// Check suspected peers and promote to dead if their suspicion timeout + /// expired. The expired pubkeys are written into the caller-provided `out` + /// buffer and the count is returned. + /// + /// SECURITY (use-after-return): this used to return a slice into its own + /// stack frame, which the caller's per-element work (print/onPeerDead/ + /// enqueueGossip) then overwrote. The buffer is now owned by the caller so + /// it stays live across that loop. + pub fn expireSuspected(self: *MembershipTable, out: [][32]u8) usize { + self.lock.lockUncancelable(zio()); + defer self.lock.unlock(zio()); const now = nowNs(); - // Collect keys of peers to mark as dead (can't modify map while iterating) - var to_kill_buf: [256][32]u8 = undefined; - var to_kill_count: usize = 0; + var n: usize = 0; var iter = self.peers.iterator(); while (iter.next()) |entry| { if (entry.value_ptr.state == .suspected) { if (entry.value_ptr.suspected_at_ns) |suspected_at| { if (now - suspected_at > self.suspicion_timeout_ns) { - if (to_kill_count < to_kill_buf.len) { - to_kill_buf[to_kill_count] = entry.key_ptr.*; - to_kill_count += 1; + if (n < out.len) { + out[n] = entry.key_ptr.*; + n += 1; } } } } } - for (to_kill_buf[0..to_kill_count]) |pubkey| { - self.markDead(pubkey); + for (out[0..n]) |pubkey| { + self.markDeadLocked(pubkey); } - return to_kill_buf[0..to_kill_count]; + return n; } /// Number of peers in a given state. @@ -314,3 +393,66 @@ test "lamport ordering" { const peer = table.peers.get(pubkey).?; try std.testing.expectEqual(peer.state, .alive); } + +fn testPeer(pk: [32]u8, state: PeerState, suspected_at: ?i128) Peer { + return .{ + .pubkey = pk, + .name = "", + .state = state, + .gossip_endpoint = null, + .wg_pubkey = null, + .mesh_ip = .{ 0, 0, 0, 0 }, + .wg_port = 0, + .lamport = 1, + .last_seen_ns = 0, + .suspected_at_ns = suspected_at, + .last_rtt_ns = null, + .handshake_complete = false, + }; +} + +test "membership table caps growth and reclaims non-alive slots (H3 regression)" { + const allocator = std.testing.allocator; + var table = MembershipTable.init(allocator, 5000); + defer table.deinit(); + + var i: usize = 0; + while (i < MAX_MEMBERS) : (i += 1) { + var pk = [_]u8{0} ** 32; + std.mem.writeInt(u32, pk[0..4], @intCast(i), .little); + try table.upsert(testPeer(pk, .alive, null)); + } + try std.testing.expectEqual(MAX_MEMBERS, table.count()); + + // A new peer past the cap with no reclaimable slot must be rejected. + const overflow_pk = [_]u8{0xFF} ** 32; + try std.testing.expectError(error.MembershipFull, table.upsert(testPeer(overflow_pk, .alive, null))); + + // Kill an existing peer, then the new peer is admitted by reclaiming it. + const victim = [_]u8{0} ** 32; // i==0 + table.markDead(victim); + try table.upsert(testPeer(overflow_pk, .alive, null)); + try std.testing.expectEqual(MAX_MEMBERS, table.count()); + try std.testing.expect(table.peers.get(overflow_pk) != null); +} + +test "expireSuspected writes to caller-owned buffer (H8 regression)" { + const allocator = std.testing.allocator; + var table = MembershipTable.init(allocator, 1000); // 1s timeout + defer table.deinit(); + + const keys = [_][32]u8{ [_]u8{1} ** 32, [_]u8{2} ** 32, [_]u8{3} ** 32 }; + for (keys) |k| { + try table.upsert(testPeer(k, .suspected, 1)); // suspected_at = 1ns β†’ long expired + } + + var buf: [8][32]u8 = undefined; + const n = table.expireSuspected(&buf); + try std.testing.expectEqual(@as(usize, 3), n); + // Every returned key must be a real, now-dead peer. Pre-fix, the 2nd/3rd + // entries were garbage read from a reclaimed stack frame. + for (buf[0..n]) |k| { + const p = table.peers.get(k) orelse return error.TestUnexpectedResult; + try std.testing.expectEqual(PeerState.dead, p.state); + } +} diff --git a/src/discovery/swim.zig b/src/discovery/swim.zig index 3a636e8..31f5344 100644 --- a/src/discovery/swim.zig +++ b/src/discovery/swim.zig @@ -11,6 +11,7 @@ const std = @import("std"); const Membership = @import("membership.zig"); const messages = @import("../protocol/messages.zig"); const codec = @import("../protocol/codec.zig"); +const keys = @import("../identity/keys.zig"); const Udp = @import("../net/udp.zig"); const Holepuncher = @import("../nat/holepunch.zig").Holepuncher; const log = std.log.scoped(.swim); @@ -293,8 +294,9 @@ pub const SwimProtocol = struct { self.checkTimeouts(); // 3. Expire suspected peers - const expired = self.membership.expireSuspected(); - for (expired) |pubkey| { + var expired_buf: [256][32]u8 = undefined; + const expired_n = self.membership.expireSuspected(&expired_buf); + for (expired_buf[0..expired_n]) |pubkey| { // Log the expiration std.debug.print(" peer expired (suspect timeout): {x:0>2}{x:0>2}...\n", .{ pubkey[0], pubkey[1] }); @@ -352,8 +354,9 @@ pub const SwimProtocol = struct { const now_ns = nowNs(); // 2. Expire suspected peers - const expired = self.membership.expireSuspected(); - for (expired) |pubkey| { + var expired_buf: [256][32]u8 = undefined; + const expired_n = self.membership.expireSuspected(&expired_buf); + for (expired_buf[0..expired_n]) |pubkey| { std.debug.print(" peer expired (suspect timeout): {x:0>2}{x:0>2}...\n", .{ pubkey[0], pubkey[1] }); if (self.handler) |h| { h.onPeerDead(h.ctx, pubkey); @@ -539,8 +542,26 @@ pub const SwimProtocol = struct { std.debug.print(" πŸ“¨ APP msg relayed ({d}B) β†’ {s}\n", .{ data.len, ep.format(&ep_buf) }); } + /// Verify an org control message before acting on it. + /// + /// SECURITY (C2): org_cert_revoke/alias/vouch carry a 64-byte Ed25519 + /// signature that the receiver previously decoded but NEVER verified, so a + /// single spoofed UDP datagram could revoke/evict any node or hijack an + /// alias. We now require BOTH that the issuing org is trusted AND that the + /// signature over the canonical payload verifies against the org pubkey. + fn orgSignatureValid(self: *const SwimProtocol, org_pubkey: [32]u8, payload: []const u8, signature: [64]u8) bool { + if (!self.isOrgAuthorizedPeer(org_pubkey)) return false; + const pk = std.crypto.sign.Ed25519.PublicKey.fromBytes(org_pubkey) catch return false; + return keys.verify(payload, signature, pk); + } + /// Handle an OrgAliasAnnounce: register or update org alias, with Lamport conflict resolution. fn handleOrgAlias(self: *SwimProtocol, ann: messages.OrgAliasAnnounce) void { + const signed = codec.orgAliasSignedBytes(ann); + if (!self.orgSignatureValid(ann.org_pubkey, &signed, ann.signature)) { + log.warn("org alias rejected: invalid or untrusted org signature", .{}); + return; + } const alias_name = std.mem.trimEnd(u8, &ann.alias, "\x00"); // Check for existing alias with same name from a different org (conflict) @@ -589,6 +610,11 @@ pub const SwimProtocol = struct { /// Handle an OrgCertRevoke: add the revoked node pubkey to the set. fn handleOrgRevoke(self: *SwimProtocol, rev: messages.OrgCertRevoke) void { + const signed = codec.orgRevokeSignedBytes(rev); + if (!self.orgSignatureValid(rev.org_pubkey, &signed, rev.signature)) { + log.warn("org revoke rejected: invalid or untrusted org signature", .{}); + return; + } // Check if already revoked for (self.revoked_nodes[0..self.revoked_count]) |revoked| { if (std.mem.eql(u8, &revoked, &rev.node_pubkey)) return; @@ -615,8 +641,12 @@ pub const SwimProtocol = struct { /// Handle an OrgTrustVouch: org admin vouches for an external standalone node. /// All nodes trusting this org will auto-accept the vouched node. fn handleOrgVouch(self: *SwimProtocol, vouch: messages.OrgTrustVouch) void { - // Only process vouches from trusted orgs - if (!self.isOrgAuthorizedPeer(vouch.org_pubkey)) return; + // Only process vouches from trusted orgs with a valid signature. + const signed = codec.orgVouchSignedBytes(vouch); + if (!self.orgSignatureValid(vouch.org_pubkey, &signed, vouch.signature)) { + log.warn("org vouch rejected: invalid or untrusted org signature", .{}); + return; + } // Check if already vouched (same org + same node) for (0..self.vouched_count) |i| { @@ -1023,6 +1053,21 @@ pub const SwimProtocol = struct { } } + /// React to an unauthenticated gossip claim that a third party is failing. + /// Marks the subject suspected locally and actively probes it, so OUR failure + /// detector β€” not a remote attacker β€” decides whether to evict. Only peers we + /// currently consider alive are probed, which bounds any reflected traffic to + /// endpoints already in our (capped) membership table. + fn suspectAndProbe(self: *SwimProtocol, subject: [32]u8) void { + const ep = blk: { + const peer = self.membership.peers.getPtr(subject) orelse return; + if (peer.state != .alive) return; // already suspected/dead, or unknown + break :blk peer.gossip_endpoint; + }; + self.membership.suspect(subject); + if (ep) |e| self.sendPing(e, subject); + } + fn applyGossip(self: *SwimProtocol, entry: messages.GossipEntry) void { // Self-suspicion refutation: if someone suspects/kills us, broadcast alive if (std.mem.eql(u8, &entry.subject_pubkey, &self.our_pubkey)) { @@ -1043,22 +1088,22 @@ pub const SwimProtocol = struct { switch (entry.event) { .join, .alive => { - // Only use gossip endpoint for NEW peer registration. - // For existing peers, the real sender_addr from direct - // pings (set in handlePing/handleAck) is more reliable. + // The gossip endpoint is only used to LEARN about brand-new peers. + // For peers we already track, liveness is decided by our own + // failure detector (direct ping/ack β†’ markAlive), never by + // unauthenticated gossip. if (entry.endpoint) |ep| { if (self.membership.peers.getPtr(entry.subject_pubkey) == null) { self.registerOrUpdatePeerEndpoint(entry.subject_pubkey, ep); - } else { - // Peer exists β€” just refresh alive state without changing endpoint - if (self.membership.peers.getPtr(entry.subject_pubkey)) |peer| { - peer.state = .alive; - peer.suspected_at_ns = null; - } } } - // Update peer NAT info from gossip + // SECURITY (M6): do NOT let gossip clear local suspicion or + // resurrect a peer our detector has marked down. A replayed/forged + // `alive` used to reset suspected_at_ns, letting an attacker + // indefinitely postpone failure detection (or revive a dead peer). + // Only direct observation clears suspicion. We still absorb the + // additive NAT/WG discovery metadata below. if (self.membership.peers.getPtr(entry.subject_pubkey)) |peer| { // Update public endpoint if present if (entry.public_endpoint) |pub_ep| { @@ -1082,20 +1127,15 @@ pub const SwimProtocol = struct { } } }, - .suspect => { - self.membership.suspect(entry.subject_pubkey); - }, - .dead => { - self.membership.markDead(entry.subject_pubkey); - if (self.handler) |h| { - h.onPeerDead(h.ctx, entry.subject_pubkey); - } - }, - .leave => { - self.membership.markDead(entry.subject_pubkey); - if (self.handler) |h| { - h.onPeerDead(h.ctx, entry.subject_pubkey); - } + // SECURITY (H2): gossip entries are unauthenticated and the sender gate + // is open by default, so a single packet could previously markDead any + // third party and tear down its WireGuard tunnel. We now only DOWNGRADE + // the claim to local suspicion and let our OWN failure detector confirm + // it before any eviction. A live peer answers our probe and stays alive; + // a genuinely dead/departed peer fails our probes and is reaped by + // expireSuspected. + .suspect, .dead, .leave => { + self.suspectAndProbe(entry.subject_pubkey); }, } } @@ -1142,3 +1182,86 @@ test "swim creates sequential pings" { try std.testing.expectEqual(@as(usize, 2), swim.pending_count); try std.testing.expectEqual(@as(u64, 2), swim.pending[1].seq); } + +fn aliveTestPeer(pk: [32]u8) Membership.Peer { + return .{ + .pubkey = pk, + .name = "", + .state = .alive, + .gossip_endpoint = messages.Endpoint.initV4(.{ 127, 0, 0, 1 }, 40000), + .wg_pubkey = null, + .mesh_ip = .{ 10, 99, 0, 1 }, + .wg_port = 51830, + .lamport = 1, + .last_seen_ns = 0, + .suspected_at_ns = null, + .last_rtt_ns = null, + .handshake_complete = false, + }; +} + +test "org revoke requires a valid trusted-org signature (C2 regression)" { + const allocator = std.testing.allocator; + var membership = Membership.MembershipTable.init(allocator, 5000); + defer membership.deinit(); + var socket = Udp.UdpSocket.bind(0) catch return; // skip if no socket + defer socket.close(); + + var swim = SwimProtocol.init(&membership, socket, .{}, [_]u8{1} ** 32, [_]u8{2} ** 32, .{ 127, 0, 0, 1 }, 51821, null); + + const org_kp = keys.generate(); + const org_pubkey = org_kp.public_key.toBytes(); + swim.addTrustedOrg(org_pubkey); + + const victim = [_]u8{0x77} ** 32; + try membership.upsert(aliveTestPeer(victim)); + + // (1) Forged: trusted org pubkey but an all-zero signature β†’ rejected. + const forged = messages.OrgCertRevoke{ .org_pubkey = org_pubkey, .node_pubkey = victim, .reason = 2, .lamport = 5, .signature = [_]u8{0} ** 64 }; + swim.handleOrgRevoke(forged); + try std.testing.expect(membership.peers.get(victim).?.state == .alive); + try std.testing.expectEqual(@as(usize, 0), swim.revoked_count); + + // (2) Valid: correctly signed by the trusted org β†’ revoked + marked dead. + var valid = messages.OrgCertRevoke{ .org_pubkey = org_pubkey, .node_pubkey = victim, .reason = 2, .lamport = 6, .signature = undefined }; + const signed = codec.orgRevokeSignedBytes(valid); + valid.signature = keys.sign(&signed, org_kp.secret_key) catch unreachable; + swim.handleOrgRevoke(valid); + try std.testing.expect(membership.peers.get(victim).?.state == .dead); + try std.testing.expectEqual(@as(usize, 1), swim.revoked_count); + + // (3) Untrusted org with a valid self-signature β†’ rejected. + const evil_kp = keys.generate(); + const evil_pub = evil_kp.public_key.toBytes(); + const victim2 = [_]u8{0x88} ** 32; + try membership.upsert(aliveTestPeer(victim2)); + var evil = messages.OrgCertRevoke{ .org_pubkey = evil_pub, .node_pubkey = victim2, .reason = 2, .lamport = 7, .signature = undefined }; + const evil_signed = codec.orgRevokeSignedBytes(evil); + evil.signature = keys.sign(&evil_signed, evil_kp.secret_key) catch unreachable; + swim.handleOrgRevoke(evil); + try std.testing.expect(membership.peers.get(victim2).?.state == .alive); +} + +test "gossiped dead does not instantly evict a third party (H2 regression)" { + const allocator = std.testing.allocator; + var membership = Membership.MembershipTable.init(allocator, 5000); + defer membership.deinit(); + var socket = Udp.UdpSocket.bind(0) catch return; + defer socket.close(); + + var swim = SwimProtocol.init(&membership, socket, .{}, [_]u8{1} ** 32, [_]u8{2} ** 32, .{ 127, 0, 0, 1 }, 51821, null); + + const victim = [_]u8{0x55} ** 32; + try membership.upsert(aliveTestPeer(victim)); + + // An unauthenticated gossip "dead" about a third party must only DOWNGRADE + // to local suspicion (to be confirmed by our own probes), never instant-kill. + swim.applyGossip(.{ .subject_pubkey = victim, .event = .dead, .lamport = 99, .endpoint = null }); + const p = membership.peers.get(victim).?; + try std.testing.expect(p.state != .dead); + try std.testing.expect(p.state == .suspected); + + // A replayed "alive" from gossip must NOT clear our local suspicion (M6). + swim.applyGossip(.{ .subject_pubkey = victim, .event = .alive, .lamport = 100, .endpoint = messages.Endpoint.initV4(.{ 127, 0, 0, 1 }, 40000) }); + try std.testing.expect(membership.peers.get(victim).?.state == .suspected); +} diff --git a/src/main.zig b/src/main.zig index ce45436..cabddbc 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2247,6 +2247,10 @@ fn dispatchBatch( batch_idx: u16, ) void { const Pipeline = lib.net.Pipeline; + // SECURITY (H6): shared lock excludes removePeer from zeroing this tunnel + // mid-use. Outermost on the data path (taken before tx_ring.push_lock). + wg_dev.lock.lockSharedUncancelable(zio()); + defer wg_dev.lock.unlockShared(zio()); var peer = &(wg_dev.peers[peer_slot] orelse return); var batch = &pool.batches[batch_idx]; batch.peer_slot = peer_slot; @@ -2294,6 +2298,11 @@ fn cryptoWorkerPipeline( while (running.load(.acquire)) { const batch_idx = crypto_q.pop() orelse break; // null = closed + // SECURITY (H6): shared lock for this batch's tunnel access (encrypt + + // opportunistic flush) so removePeer cannot zero the keys mid-encrypt. + // Outermost lock; send_lock below is acquired via tryLock (never blocks). + wg_dev.lock.lockSharedUncancelable(zio()); + defer wg_dev.lock.unlockShared(zio()); var batch = &pool.batches[batch_idx]; var peer = &(wg_dev.peers[batch.peer_slot] orelse continue); var tun = &(peer.active_tunnel orelse continue); @@ -2643,6 +2652,15 @@ fn writeCoalescedToTun( } } +/// Read a peer's org pubkey from the membership table under a shared lock. +/// SECURITY (H7): data-plane worker threads read membership while the SWIM thread +/// may be rehashing `peers` (peers.put); the read lock prevents a freed-bucket UAF. +fn orgPubkeyLocked(membership: *lib.discovery.Membership.MembershipTable, identity_key: [32]u8) ?[32]u8 { + membership.lock.lockSharedUncancelable(zio()); + defer membership.lock.unlockShared(zio()); + return if (membership.peers.getPtr(identity_key)) |mp| mp.org_pubkey else null; +} + /// Parallel decrypt worker: pulls encrypted transport packets from the DecryptQueue, /// decrypts them using wg_dev.decryptTransport (thread-safe via replay_lock), and writes /// plaintext to TUN. This parallelizes the download path across N cores. @@ -2667,11 +2685,10 @@ fn decryptRxWorker( const pkt = decrypt_q.getPacket(result.idx); if (wg_dev.decryptTransport(pkt, &out_buf)) |dec| { - // Service filter: check port access before TUN write - if (lib.services.Policy.parseTransportHeader(out_buf[0..dec.len])) |ti| { - const peer = wg_dev.peers[dec.slot] orelse continue; - const org_pk = if (membership.peers.getPtr(peer.identity_key)) |mp| mp.org_pubkey else null; - if (!service_filter.check(peer.identity_key, org_pk, ti.proto, ti.dst_port)) continue; + // Service filter: check access before TUN write (IPv4 + IPv6, M5). + if (wg_dev.peers[dec.slot]) |peer| { + const org_pk = orgPubkeyLocked(membership, peer.identity_key); + if (!service_filter.allowPacket(peer.identity_key, org_pk, out_buf[0..dec.len])) continue; } // Write decrypted packet to TUN @@ -2712,13 +2729,10 @@ fn processIncomingPacket( if (pkt_type == .wg_transport) { if (n_decrypted.* < 64) { if (wg_dev.decryptTransport(pkt, &decrypt_storage[n_decrypted.*])) |result| { - // Check service filter before buffering - const PolicyMod = lib.services.Policy; - if (PolicyMod.parseTransportHeader(decrypt_storage[n_decrypted.*][0..result.len])) |ti| { - if (wg_dev.peers[result.slot]) |peer| { - const org_pk = if (swim.membership.peers.getPtr(peer.identity_key)) |mp| mp.org_pubkey else null; - if (!service_filter.check(peer.identity_key, org_pk, ti.proto, ti.dst_port)) return; - } + // Check service filter before buffering (IPv4 + IPv6, M5). + if (wg_dev.peers[result.slot]) |peer| { + const org_pk = orgPubkeyLocked(swim.membership, peer.identity_key); + if (!service_filter.allowPacket(peer.identity_key, org_pk, decrypt_storage[n_decrypted.*][0..result.len])) return; } decrypt_lens[n_decrypted.*] = result.len; decrypt_slots[n_decrypted.*] = result.slot; @@ -2729,7 +2743,7 @@ fn processIncomingPacket( .wg_handshake_init => { if (pkt.len >= @sizeOf(lib.wireguard.Noise.HandshakeInitiation)) { const msg: *const lib.wireguard.Noise.HandshakeInitiation = @ptrCast(@alignCast(pkt.ptr)); - if (wg_dev.handleInitiation(msg)) |hs_result| { + if (wg_dev.handleInitiation(msg, sender_addr)) |hs_result| { const resp_bytes = std.mem.asBytes(&hs_result.response); _ = udp_sock.sendTo(resp_bytes, sender_addr, sender_port) catch 0; writeFormatted(stdout, " WG handshake: responded to initiation\n", .{}) catch {}; @@ -2796,13 +2810,10 @@ fn windowsEventLoop( if (pkt_type == .wg_transport) { // Decrypt WG transport β†’ write plaintext to Wintun if (wg_dev.decryptTransport(pkt, &decrypt_buf)) |result| { - // Apply service filter before writing to TUN - const PolicyMod = lib.services.Policy; - if (PolicyMod.parseTransportHeader(decrypt_buf[0..result.len])) |ti| { - if (wg_dev.peers[result.slot]) |peer| { - const org_pk = if (swim.membership.peers.getPtr(peer.identity_key)) |mp| mp.org_pubkey else null; - if (!service_filter.check(peer.identity_key, org_pk, ti.proto, ti.dst_port)) continue; - } + // Apply service filter before writing to TUN (IPv4 + IPv6, M5). + if (wg_dev.peers[result.slot]) |peer| { + const org_pk = orgPubkeyLocked(swim.membership, peer.identity_key); + if (!service_filter.allowPacket(peer.identity_key, org_pk, decrypt_buf[0..result.len])) continue; } tun_dev.write(decrypt_buf[0..result.len]) catch {}; } else |_| {} @@ -2810,7 +2821,7 @@ fn windowsEventLoop( .wg_handshake_init => { if (pkt.len >= @sizeOf(Noise.HandshakeInitiation)) { const msg: *const Noise.HandshakeInitiation = @ptrCast(@alignCast(pkt.ptr)); - if (wg_dev.handleInitiation(msg)) |hs_result| { + if (wg_dev.handleInitiation(msg, recv.sender_addr)) |hs_result| { const resp_bytes = std.mem.asBytes(&hs_result.response); _ = udp_sock.sendTo(resp_bytes, recv.sender_addr, recv.sender_port) catch 0; writeFormatted(stdout, " WG handshake: responded to initiation\n", .{}) catch {}; @@ -2948,13 +2959,10 @@ fn macosEventLoop( if (pkt_type == .wg_transport) { // Decrypt WG transport β†’ write plaintext to utun if (wg_dev.decryptTransport(pkt, &decrypt_buf)) |result| { - // Apply service filter before writing to TUN - const PolicyMod = lib.services.Policy; - if (PolicyMod.parseTransportHeader(decrypt_buf[0..result.len])) |ti| { - if (wg_dev.peers[result.slot]) |peer| { - const org_pk = if (swim.membership.peers.getPtr(peer.identity_key)) |mp| mp.org_pubkey else null; - if (!service_filter.check(peer.identity_key, org_pk, ti.proto, ti.dst_port)) continue; - } + // Apply service filter before writing to TUN (IPv4 + IPv6, M5). + if (wg_dev.peers[result.slot]) |peer| { + const org_pk = orgPubkeyLocked(swim.membership, peer.identity_key); + if (!service_filter.allowPacket(peer.identity_key, org_pk, decrypt_buf[0..result.len])) continue; } tun_dev.write(decrypt_buf[0..result.len]) catch {}; } else |_| {} @@ -2962,7 +2970,7 @@ fn macosEventLoop( .wg_handshake_init => { if (pkt.len >= @sizeOf(Noise.HandshakeInitiation)) { const msg: *const Noise.HandshakeInitiation = @ptrCast(@alignCast(pkt.ptr)); - if (wg_dev.handleInitiation(msg)) |hs_result| { + if (wg_dev.handleInitiation(msg, recv.sender_addr)) |hs_result| { const resp_bytes = std.mem.asBytes(&hs_result.response); _ = udp_sock.sendTo(resp_bytes, recv.sender_addr, recv.sender_port) catch 0; writeFormatted(stdout, " WG handshake: responded to initiation\n", .{}) catch {}; @@ -3423,7 +3431,9 @@ fn findPeerByMeshIp( mesh_ip: [4]u8, ) ?usize { // Walk membership table to find identity_key for this mesh IP, - // then map to WgDevice slot + // then map to WgDevice slot. SECURITY (H7): read lock vs SWIM-thread rehash. + swim.membership.lock.lockSharedUncancelable(zio()); + defer swim.membership.lock.unlockShared(zio()); var iter = swim.membership.peers.iterator(); while (iter.next()) |entry| { const peer = entry.value_ptr; diff --git a/src/meshguard_ffi.zig b/src/meshguard_ffi.zig index 449601f..e303b84 100644 --- a/src/meshguard_ffi.zig +++ b/src/meshguard_ffi.zig @@ -528,7 +528,12 @@ export fn meshguard_send( var target_key: [32]u8 = undefined; @memcpy(&target_key, peer_pubkey[0..32]); - const peer_entry = c.membership.peers.get(target_key); + // SECURITY (H7): copy the peer out under a read lock vs SWIM-thread rehash. + const peer_entry = blk: { + c.membership.lock.lockSharedUncancelable(zio()); + defer c.membership.lock.unlockShared(zio()); + break :blk c.membership.peers.get(target_key); + }; if (peer_entry == null) { // Peer unknown β€” notify host app so it can queue for offline delivery if (c.on_undeliverable_cb) |cb| cb(peer_pubkey, data, len, -4); @@ -666,6 +671,9 @@ export fn meshguard_set_on_undeliverable( /// Get the number of alive peers. export fn meshguard_peer_count(ctx: ?*MeshguardContext) u32 { const c = ctx orelse return 0; + // SECURITY (H7): read lock vs the SWIM thread mutating `peers`. + c.membership.lock.lockSharedUncancelable(zio()); + defer c.membership.lock.unlockShared(zio()); var count: u32 = 0; var iter = c.membership.peers.iterator(); while (iter.next()) |entry| { @@ -754,6 +762,9 @@ export fn meshguard_debug_info(ctx: ?*MeshguardContext, out: [*]u8) void { @memset(out[0..22], 0); return; }; + // SECURITY (H7): read lock over the membership reads below vs SWIM rehash. + c.membership.lock.lockSharedUncancelable(zio()); + defer c.membership.lock.unlockShared(zio()); // Bound port const bp = if (c.socket) |s| s.port else @as(u16, 0); @@ -828,6 +839,9 @@ export fn meshguard_get_peers( max_peers: u32, ) u32 { const c = ctx orelse return 0; + // SECURITY (H7): read lock vs the SWIM thread mutating `peers`. + c.membership.lock.lockSharedUncancelable(zio()); + defer c.membership.lock.unlockShared(zio()); var count: u32 = 0; var iter = c.membership.peers.iterator(); while (iter.next()) |entry| { @@ -866,7 +880,12 @@ export fn meshguard_get_peer_info( var key: [32]u8 = undefined; @memcpy(&key, peer_pubkey[0..32]); - const peer = c.membership.peers.get(key) orelse return -1; + // SECURITY (H7): copy the peer out under a read lock vs SWIM-thread rehash. + const peer = blk: { + c.membership.lock.lockSharedUncancelable(zio()); + defer c.membership.lock.unlockShared(zio()); + break :blk c.membership.peers.get(key); + } orelse return -1; // Gossip endpoint IP + port if (peer.gossip_endpoint) |ep| { @@ -1114,7 +1133,7 @@ fn handleWgPacket(ctx: *MeshguardContext, data: []const u8, sender_addr: [4]u8, // Finding #7 (v2): Attempt handleInitiation first. // Only auto-register if it fails with UnknownPeer. // This avoids the double X25519 multiplication on every packet. - const result = dev.handleInitiation(msg) catch |err| blk: { + const result = dev.handleInitiation(msg, sender_addr) catch |err| blk: { if (err != error.UnknownPeer) return; // Auto-register responder peer from membership table @@ -1147,7 +1166,7 @@ fn handleWgPacket(ctx: *MeshguardContext, data: []const u8, sender_addr: [4]u8, } // Retry after auto-registration - break :blk dev.handleInitiation(msg) catch return; + break :blk dev.handleInitiation(msg, sender_addr) catch return; }; // Send handshake response back to the initiator @@ -1268,8 +1287,14 @@ export fn meshguard_tunnel_open( var target_key: [32]u8 = undefined; @memcpy(&target_key, peer_pubkey[0..32]); - // Look up peer in membership table for endpoint and WG pubkey - const peer = c.membership.peers.get(target_key) orelse return -3; + // Look up peer in membership table for endpoint and WG pubkey. + // SECURITY (H7): copy under a read lock vs SWIM-thread rehash (released + // before wg_lock below, preserving membershipβ†’wg_lock ordering). + const peer = blk: { + c.membership.lock.lockSharedUncancelable(zio()); + defer c.membership.lock.unlockShared(zio()); + break :blk c.membership.peers.get(target_key); + } orelse return -3; const peer_x25519 = peer.wg_pubkey orelse return -4; const ep = peer.gossip_endpoint orelse return -5; diff --git a/src/nat/coordinated_punch.zig b/src/nat/coordinated_punch.zig index cbbddbd..2b96900 100644 --- a/src/nat/coordinated_punch.zig +++ b/src/nat/coordinated_punch.zig @@ -321,8 +321,15 @@ fn base64UrlDecode(input: []const u8, output: []u8) !usize { buf[len] = '='; } - try std.base64.standard.Decoder.decode(output, buf[0..len]); - return std.base64.standard.Decoder.calcSizeForSlice(buf[0..len]) catch return error.InvalidBase64; + // SECURITY: std.base64 Decoder.decode does NOT bound-check the destination + // (its tail loop writes dest[dest_idx] with no dest_idx < dest.len guard β€” + // std/base64.zig:275). With safety checks off in ReleaseFast an over-long + // token would silently smash the caller's stack buffer. Validate the decoded + // size against `output` BEFORE decoding. + const decoded_len = std.base64.standard.Decoder.calcSizeForSlice(buf[0..len]) catch return error.InvalidBase64; + if (decoded_len > output.len) return error.InputTooLong; + try std.base64.standard.Decoder.decode(output[0..decoded_len], buf[0..len]); + return decoded_len; } // ─── NTP client ─── @@ -687,6 +694,22 @@ test "token signature verification" { try std.testing.expect(!verifyToken(&tampered)); } +test "decodeTokenUri rejects over-long token without OOB write (C1 regression)" { + // An attacker-supplied mg:// token whose base64url body decodes to more than + // TOKEN_BINARY_SIZE bytes must be rejected with an error, never overflow the + // 106-byte stack buffer. Pre-fix this silently smashed the stack in ReleaseFast. + var buf: [300]u8 = undefined; + @memcpy(buf[0..5], "mg://"); + // 251 'A' chars -> ~188 decoded bytes, far past the 106-byte target. + @memset(buf[5..256], 'A'); + const uri = buf[0..256]; + try std.testing.expectError(error.InvalidToken, decodeTokenUri(uri)); + + // base64UrlDecode itself must refuse to write past a small output buffer. + var small: [16]u8 = undefined; + try std.testing.expectError(error.InputTooLong, base64UrlDecode("AAAAAAAAAAAAAAAAAAAAAAAA", &small)); +} + test "probe building and detection" { const nonce = [_]u8{0xAB} ** 8; const pubkey = [_]u8{0xCD} ** 32; diff --git a/src/net/dns.zig b/src/net/dns.zig index 146eea5..1396d27 100644 --- a/src/net/dns.zig +++ b/src/net/dns.zig @@ -135,10 +135,17 @@ pub fn resolveA(hostname: []const u8) ?[4]u8 { /// Query a specific nameserver for an A record. fn queryA(nameserver: [4]u8, hostname: []const u8) ?[4]u8 { var query_buf: [512]u8 = undefined; - const query_len = buildQuery(&query_buf, hostname, TYPE_A, 0xABCD) catch return null; + // SECURITY (H5): randomize the transaction ID per query (was a constant + // 0xABCD, trivially spoofable off-path). + const id = randomTxid(); + const query_len = buildQuery(&query_buf, hostname, TYPE_A, id) catch return null; const response = sendQuery(nameserver, DNS_PORT, query_buf[0..query_len]) orelse return null; + // SECURITY (H5): reject responses that don't match our query (TXID, QR bit, + // and the echoed question). The connected socket already filters by source. + if (!responseMatchesQuery(response.data[0..response.len], id, hostname, TYPE_A)) return null; + // Parse A record from response return parseAResponse(&response.data, response.len); } @@ -153,9 +160,12 @@ pub fn queryTXT(allocator: std.mem.Allocator, domain: []const u8) ![][]const u8 for (ns_buf[0..ns_count]) |ns| { var query_buf: [512]u8 = undefined; - const query_len = buildQuery(&query_buf, domain, TYPE_TXT, 0xCDEF) catch continue; + const id = randomTxid(); // SECURITY (H5): random TXID (was constant 0xCDEF) + const query_len = buildQuery(&query_buf, domain, TYPE_TXT, id) catch continue; const response = sendQuery(ns, DNS_PORT, query_buf[0..query_len]) orelse continue; + // SECURITY (H5): only accept a response that matches our query. + if (!responseMatchesQuery(response.data[0..response.len], id, domain, TYPE_TXT)) continue; return parseTXTResponse(allocator, &response.data, response.len); } return &.{}; @@ -447,6 +457,33 @@ fn recvFromSocket(fd: posix.socket_t, buf: []u8) ?usize { } } +/// Random 16-bit DNS transaction ID. SECURITY (H5): unpredictable per query so +/// off-path attackers cannot forge a matching response. +fn randomTxid() u16 { + var b: [2]u8 = undefined; + zio().random(&b); + return (@as(u16, b[0]) << 8) | @as(u16, b[1]); +} + +/// SECURITY (H5): validate that a unicast DNS response actually answers OUR +/// query β€” matching transaction ID, the QR (response) bit set, exactly one +/// echoed question, and that question's name (case-insensitive) and type equal +/// what we asked. Combined with the connected socket (source filtering) this +/// blocks off-path cache-poisoning of the bootstrap path. Not used for mDNS, +/// whose responses legitimately carry id 0 and no echoed question. +fn responseMatchesQuery(data: []const u8, expected_id: u16, domain: []const u8, qtype: u16) bool { + if (data.len < HEADER_LEN) return false; + if (readU16(data, 0) != expected_id) return false; // transaction ID + if ((readU16(data, 2) & 0x8000) == 0) return false; // QR bit must be set + if (readU16(data, 4) != 1) return false; // exactly one question echoed + var q_buf: [512]u8 = undefined; + const q_end = encodeDomainName(&q_buf, 0, domain) catch return false; + if (HEADER_LEN + q_end + 4 > data.len) return false; + if (!std.ascii.eqlIgnoreCase(data[HEADER_LEN .. HEADER_LEN + q_end], q_buf[0..q_end])) return false; + if (readU16(data, HEADER_LEN + q_end) != qtype) return false; // QTYPE echoed + return true; +} + /// Send a DNS query to a nameserver and wait for a response (2s timeout). fn sendQuery(nameserver: [4]u8, port: u16, query: []const u8) ?QueryResponse { const sock_fd = createDgramSocket() orelse return null; @@ -459,11 +496,21 @@ fn sendQuery(nameserver: [4]u8, port: u16, query: []const u8) ?QueryResponse { .zero = .{ 0, 0, 0, 0, 0, 0, 0, 0 }, }; + // SECURITY (H5): connect() the UDP socket so the kernel only delivers + // datagrams from THIS nameserver, dropping off-path spoofed responses. After + // connect we send with a null destination (the connected peer). if (comptime is_linux) { - const rc = linux.sendto(sock_fd, query.ptr, query.len, 0, @ptrCast(&addr), @sizeOf(@TypeOf(addr))); + const crc = linux.connect(sock_fd, @ptrCast(&addr), @sizeOf(@TypeOf(addr))); + if (posix.errno(crc) != .SUCCESS) return null; + } else { + if (std.c.connect(sock_fd, @ptrCast(&addr), @sizeOf(@TypeOf(addr))) != 0) return null; + } + + if (comptime is_linux) { + const rc = linux.sendto(sock_fd, query.ptr, query.len, 0, null, 0); if (posix.errno(rc) != .SUCCESS) return null; } else { - if (std.c.sendto(sock_fd, query.ptr, query.len, 0, @ptrCast(&addr), @sizeOf(@TypeOf(addr))) < 0) return null; + if (std.c.sendto(sock_fd, query.ptr, query.len, 0, null, 0) < 0) return null; } // Wait for response with 2s timeout (cross-platform) @@ -543,6 +590,49 @@ test "build query" { try std.testing.expectEqual(readU16(&buf, 4), 1); } +test "responseMatchesQuery rejects spoofed/mismatched responses (H5 regression)" { + const domain = "seed.example.com"; + const id: u16 = 0x1234; + + // Craft a well-formed response: header (QR=1, QDCOUNT=1) + echoed question. + var buf: [512]u8 = undefined; + buf[0] = 0x12; + buf[1] = 0x34; // id + buf[2] = 0x81; + buf[3] = 0x80; // flags: QR=1, RD, RA + buf[4] = 0x00; + buf[5] = 0x01; // QDCOUNT = 1 + buf[6] = 0x00; + buf[7] = 0x01; // ANCOUNT = 1 + buf[8] = 0; + buf[9] = 0; + buf[10] = 0; + buf[11] = 0; + var pos = try encodeDomainName(&buf, HEADER_LEN, domain); + buf[pos] = TYPE_A >> 8; + buf[pos + 1] = TYPE_A & 0xFF; + pos += 2; + buf[pos] = CLASS_IN >> 8; + buf[pos + 1] = CLASS_IN & 0xFF; + pos += 2; + const total = pos; + + // Legitimate matching response is accepted. + try std.testing.expect(responseMatchesQuery(buf[0..total], id, domain, TYPE_A)); + + // Spoofed responses are all rejected: + try std.testing.expect(!responseMatchesQuery(buf[0..total], 0x9999, domain, TYPE_A)); // wrong TXID + try std.testing.expect(!responseMatchesQuery(buf[0..total], id, "evil.example.com", TYPE_A)); // wrong name + try std.testing.expect(!responseMatchesQuery(buf[0..total], id, domain, TYPE_TXT)); // wrong qtype + + var as_query = buf; + as_query[2] = 0x01; // clear QR bit β†’ looks like a query, not a response + try std.testing.expect(!responseMatchesQuery(as_query[0..total], id, domain, TYPE_A)); + + // Truncated response rejected (no OOB). + try std.testing.expect(!responseMatchesQuery(buf[0..5], id, domain, TYPE_A)); +} + test "parse meshguard TXT" { const result = parseMeshguardTXT("meshguard=1.2.3.4:51821").?; try std.testing.expectEqual(result.addr, [4]u8{ 1, 2, 3, 4 }); diff --git a/src/protocol/codec.zig b/src/protocol/codec.zig index c64503d..433a688 100644 --- a/src/protocol/codec.zig +++ b/src/protocol/codec.zig @@ -217,6 +217,38 @@ pub fn encodeOrgTrustVouch(buf: []u8, msg: messages.OrgTrustVouch) !usize { return pos; } +// ─── Canonical signed-payload builders for org control messages ─── +// +// These define exactly which bytes the org's Ed25519 signature covers, so that +// both a future signer and the receive-side verifier agree. Layouts match the +// `signature:` comments in messages.zig; the lamport is little-endian to match +// the wire encoding above. + +/// OrgAliasAnnounce signed payload: alias(32) β€– lamport(8 LE). +pub fn orgAliasSignedBytes(msg: messages.OrgAliasAnnounce) [40]u8 { + var out: [40]u8 = undefined; + @memcpy(out[0..32], &msg.alias); + std.mem.writeInt(u64, out[32..40], msg.lamport, .little); + return out; +} + +/// OrgCertRevoke signed payload: node_pubkey(32) β€– reason(1) β€– lamport(8 LE). +pub fn orgRevokeSignedBytes(msg: messages.OrgCertRevoke) [41]u8 { + var out: [41]u8 = undefined; + @memcpy(out[0..32], &msg.node_pubkey); + out[32] = msg.reason; + std.mem.writeInt(u64, out[33..41], msg.lamport, .little); + return out; +} + +/// OrgTrustVouch signed payload: vouched_pubkey(32) β€– lamport(8 LE). +pub fn orgVouchSignedBytes(msg: messages.OrgTrustVouch) [40]u8 { + var out: [40]u8 = undefined; + @memcpy(out[0..32], &msg.vouched_pubkey); + std.mem.writeInt(u64, out[32..40], msg.lamport, .little); + return out; +} + fn encodeGossipEntry(buf: []u8, entry: messages.GossipEntry) !usize { var pos: usize = 0; diff --git a/src/services/policy.zig b/src/services/policy.zig index 054d7cd..5a9a45d 100644 --- a/src/services/policy.zig +++ b/src/services/policy.zig @@ -12,7 +12,6 @@ const std = @import("std"); - /// Read all available bytes from an Io.File into buf. Returns bytes read. fn readFileBytes(f: std.Io.File, buf: []u8) !usize { var reader = f.reader(zio(), &.{}); @@ -34,7 +33,6 @@ fn openDirAbsolute(path: []const u8) !std.Io.Dir { return std.Io.Dir.openDirAbsolute(zio(), path, .{ .iterate = true }); } - // ─── Rule types ─── pub const Action = enum(u1) { allow = 0, deny = 1 }; @@ -255,6 +253,27 @@ pub const ServiceFilter = struct { return self.default_action == .allow; } + /// Decide whether a decrypted inner IP packet is allowed, parsing both IPv4 + /// and IPv6. TCP/UDP are matched against policy; ICMP/ICMPv6 pass as control + /// traffic; anything we cannot classify falls back to the default action. + /// + /// SECURITY (M5): IPv6 packets previously returned null from + /// parseTransportHeader and were written to TUN with NO filtering, leaking + /// all IPv6 under a default-deny policy. This path classifies IPv6 and FAILS + /// CLOSED on anything unclassifiable when the default action is deny. + pub fn allowPacket( + self: *const ServiceFilter, + peer_pubkey: [32]u8, + peer_org_pubkey: ?[32]u8, + ip_packet: []const u8, + ) bool { + return switch (classifyPacket(ip_packet)) { + .transport => |ti| self.check(peer_pubkey, peer_org_pubkey, ti.proto, ti.dst_port), + .pass => true, + .unclassified => self.default_action == .allow, + }; + } + /// Load all service policies from the config directory. /// Expects: config_dir/services/{default, global.policy, peer/*.policy, org/*.policy} /// If the services/ directory doesn't exist, returns a default allow-all filter. @@ -496,6 +515,52 @@ pub fn parseTransportHeader(ip_packet: []const u8) ?TransportInfo { return .{ .proto = proto, .dst_port = dst_port }; } +/// Classification of a decrypted inner IP packet for the service filter. +pub const FilterClass = union(enum) { + /// TCP/UDP with a destination port to match against policy. + transport: TransportInfo, + /// ICMP / ICMPv6 control traffic β€” always passed. + pass, + /// Could not determine proto/port (IPv6 extension headers, truncated, + /// non-IP). Callers apply the default action β€” i.e. fail closed under deny. + unclassified, +}; + +fn classifyL4(protocol: u8, ip_packet: []const u8, l4_off: usize) FilterClass { + switch (protocol) { + 6, 17 => { + if (ip_packet.len < l4_off + 4) return .unclassified; + const dp = std.mem.readInt(u16, ip_packet[l4_off + 2 ..][0..2], .big); + return .{ .transport = .{ .proto = if (protocol == 6) .tcp else .udp, .dst_port = dp } }; + }, + 1, 58 => return .pass, // ICMP / ICMPv6 + else => return .unclassified, + } +} + +/// Parse an inner IP packet (IPv4 or IPv6) for the service filter. Unlike +/// parseTransportHeader (IPv4-only, used by its tests), this classifies IPv6 too +/// and distinguishes "pass" (ICMP) from "unclassified" so callers can fail closed. +pub fn classifyPacket(ip_packet: []const u8) FilterClass { + if (ip_packet.len < 1) return .unclassified; + switch (ip_packet[0] >> 4) { + 4 => { + if (ip_packet.len < 20) return .unclassified; + const ihl = @as(usize, ip_packet[0] & 0x0F) * 4; + if (ihl < 20) return .unclassified; + return classifyL4(ip_packet[9], ip_packet, ihl); + }, + 6 => { + if (ip_packet.len < 40) return .unclassified; + // Note: a chain of IPv6 extension headers makes the L4 offset != 40; + // classifyL4 will treat such next-header values as `.unclassified`, + // which fails closed under default-deny. + return classifyL4(ip_packet[6], ip_packet, 40); + }, + else => return .unclassified, + } +} + // ─── Small file reader (no allocator) ─── const SmallFile = struct { @@ -675,3 +740,38 @@ test "parseTransportHeader: ICMP returns null" { try std.testing.expectEqual(parseTransportHeader(&pkt), null); } + +test "classifyPacket handles IPv6 and allowPacket fails closed (M5 regression)" { + // IPv6 TCP to port 80 β†’ classified as transport (was bypassing the filter). + var v6 = std.mem.zeroes([44]u8); + v6[0] = 0x60; // IPv6 version + v6[6] = 6; // next header = TCP + v6[43] = 80; // dst port (offset 40 + 2) + const c = classifyPacket(&v6); + try std.testing.expect(c == .transport); + try std.testing.expectEqual(@as(u16, 80), c.transport.dst_port); + try std.testing.expectEqual(Proto.tcp, c.transport.proto); + + // IPv6 with an extension/unknown next header β†’ unclassified (fail-closed input). + var v6ext = std.mem.zeroes([44]u8); + v6ext[0] = 0x60; + v6ext[6] = 0; // hop-by-hop extension header + try std.testing.expect(classifyPacket(&v6ext) == .unclassified); + + // ICMPv6 β†’ pass (control traffic). + var icmp6 = std.mem.zeroes([44]u8); + icmp6[0] = 0x60; + icmp6[6] = 58; + try std.testing.expect(classifyPacket(&icmp6) == .pass); + + // Truncated IPv6 β†’ unclassified (no OOB). + try std.testing.expect(classifyPacket(&[_]u8{ 0x60, 0, 0 }) == .unclassified); + + // allowPacket: under default-deny, unclassified IPv6 is dropped, ICMPv6 passes. + const deny = ServiceFilter{ .default_action = .deny, .peer_count = 0, .org_count = 0 }; + try std.testing.expect(!deny.allowPacket(.{0} ** 32, null, &v6ext)); + try std.testing.expect(deny.allowPacket(.{0} ** 32, null, &icmp6)); + // Under default-allow (unconfigured), behavior is unchanged (passes). + const allow = ServiceFilter{ .default_action = .allow, .peer_count = 0, .org_count = 0 }; + try std.testing.expect(allow.allowPacket(.{0} ** 32, null, &v6ext)); +} diff --git a/src/wireguard/device.zig b/src/wireguard/device.zig index 762bb51..1c02b9a 100644 --- a/src/wireguard/device.zig +++ b/src/wireguard/device.zig @@ -227,6 +227,64 @@ fn meshIpHostId(ip: [4]u8) u16 { return (@as(u16, ip[2]) << 8) | @as(u16, ip[3]); } +/// SECURITY (H4): token-bucket rate limiter for inbound WireGuard handshake +/// initiations. Each accepted init forces an X25519 on the (single) control +/// thread; the MAC1 key is public, so without this a forged-init flood starves +/// SWIM + the data plane. A small per-source table provides fairness; a global +/// bucket bounds total work even under source-address spoofing (the spoofing +/// case is fully addressed only by the planned cookie/MAC2 follow-up). +const HandshakeLimiter = struct { + const SLOTS: usize = 512; // distinct source IPs tracked (hashed, evict-on-collision) + const PER_SRC_BURST: u32 = 8; + const PER_SRC_REFILL_NS: i128 = 250 * std.time.ns_per_ms; // 1 token / 250ms β‰ˆ 4/s + const GLOBAL_BURST: u32 = 256; + const GLOBAL_REFILL_NS: i128 = 10 * std.time.ns_per_ms; // 1 token / 10ms = 100/s + + const Bucket = struct { + ip: [4]u8 = .{ 0, 0, 0, 0 }, + tokens: u32 = PER_SRC_BURST, + last_ns: i128 = 0, + }; + + mutex: std.Io.Mutex = .init, + buckets: [SLOTS]Bucket = .{Bucket{}} ** SLOTS, + global_tokens: u32 = GLOBAL_BURST, + global_last_ns: i128 = 0, + + fn refill(tokens: u32, max: u32, last_ns: *i128, now_ns: i128, interval_ns: i128) u32 { + if (now_ns <= last_ns.*) return tokens; + const gained: u64 = @intCast(@divTrunc(now_ns - last_ns.*, interval_ns)); + if (gained == 0) return tokens; + last_ns.* += @as(i128, @intCast(gained)) * interval_ns; + const t: u64 = @as(u64, tokens) + gained; + return if (t > max) max else @intCast(t); + } + + /// Returns true if a handshake from `ip` may proceed (consuming a token). + fn allow(self: *HandshakeLimiter, ip: [4]u8, now_ns: i128) bool { + self.mutex.lockUncancelable(zio()); + defer self.mutex.unlock(zio()); + + // Global backstop first: bounds total X25519/sec even under spoofing. + self.global_tokens = refill(self.global_tokens, GLOBAL_BURST, &self.global_last_ns, now_ns, GLOBAL_REFILL_NS); + if (self.global_tokens == 0) return false; + + // Per-source bucket (hash β†’ slot, reset on collision with a different IP). + const h = (((@as(usize, ip[0]) *% 131 +% ip[1]) *% 131 +% ip[2]) *% 131 +% ip[3]); + const b = &self.buckets[h % SLOTS]; + if (!std.mem.eql(u8, &b.ip, &ip)) { + b.* = .{ .ip = ip, .tokens = PER_SRC_BURST, .last_ns = now_ns }; + } else { + b.tokens = refill(b.tokens, PER_SRC_BURST, &b.last_ns, now_ns, PER_SRC_REFILL_NS); + } + if (b.tokens == 0) return false; + + b.tokens -= 1; + self.global_tokens -= 1; + return true; + } +}; + /// Userspace WireGuard device. pub const WgDevice = struct { /// Our X25519 keypair @@ -253,6 +311,23 @@ pub const WgDevice = struct { /// Next sender index to assign (random-ish via wrapping increment) next_index: u32 = 0, + /// SECURITY (H6): guards the peer table against concurrent access. Data-plane + /// worker threads take the SHARED lock to encrypt/decrypt; the control thread + /// (peer death β†’ removePeer) and handshake handlers take the EXCLUSIVE lock. + /// Without this, removePeer secureZeros key material and nils a slot while a + /// worker is mid-encrypt, yielding torn reads / use-after-zero ciphertext on + /// the wire (silent under ReleaseFast). The lock is the OUTERMOST lock on the + /// data path (acquired before tx_ring push/send locks) so there is no + /// ordering inversion. + lock: std.Io.RwLock = .init, + + /// SECURITY (H4): per-source + global rate limit on inbound handshake + /// initiations, checked before the expensive X25519. The MAC1 anti-DoS key is + /// our public key, broadcast in cleartext gossip, so MAC1 alone cannot gate a + /// forged-init flood. (A full WireGuard cookie/MAC2 reply β€” which also defends + /// against source-spoofed floods β€” is a planned follow-up.) + hs_limiter: HandshakeLimiter = .{}, + pub fn init(static_private: [32]u8, static_public: [32]u8) WgDevice { // Start with a random sender index for unpredictability var seed: [4]u8 = undefined; @@ -287,6 +362,8 @@ pub const WgDevice = struct { /// Add or update a peer with IPv4/IPv6 endpoint and mesh addresses. pub fn addPeerWithEndpoint(self: *WgDevice, identity_key: [32]u8, wg_pubkey: [32]u8, endpoint: messages.Endpoint, mesh_ip: [4]u8, mesh_ip6: [16]u8) !usize { + self.lock.lockUncancelable(zio()); // H6: exclusive β€” mutates the peer table + defer self.lock.unlock(zio()); // Check if peer already exists if (self.static_map.get(wg_pubkey)) |existing| { if (self.peers[existing]) |*peer| { @@ -342,6 +419,8 @@ pub const WgDevice = struct { /// Remove a peer by WG public key. pub fn removePeer(self: *WgDevice, wg_pubkey: [32]u8) void { + self.lock.lockUncancelable(zio()); // H6: exclusive β€” zeroes keys + nils the slot + defer self.lock.unlock(zio()); if (self.static_map.get(wg_pubkey)) |slot| { if (self.peers[slot]) |*peer| { self.index_map.remove(peer.sender_index); @@ -360,6 +439,8 @@ pub const WgDevice = struct { /// Initiate a handshake with a peer. pub fn initiateHandshake(self: *WgDevice, slot: usize) !noise.HandshakeInitiation { + self.lock.lockUncancelable(zio()); // H6: exclusive β€” mutates index_map + peer + defer self.lock.unlock(zio()); const peer = if (self.peers[slot]) |*p| p else return error.PeerNotFound; const now = nowNs(); @@ -381,13 +462,26 @@ pub const WgDevice = struct { /// Handle an incoming handshake initiation (we are responder). /// O(1) lookup via static key table instead of O(N) peer iteration. - pub fn handleInitiation(self: *WgDevice, msg: *const noise.HandshakeInitiation) !struct { response: noise.HandshakeResponse, slot: usize } { - // Step 0: Verify MAC1 BEFORE expensive DH (anti-DoS gate) + pub fn handleInitiation(self: *WgDevice, msg: *const noise.HandshakeInitiation, src_ip: [4]u8) !struct { response: noise.HandshakeResponse, slot: usize } { + // Step 0: Verify MAC1 BEFORE expensive DH (cheap, no lock). // MAC1 is keyed with our public key, so we can check without knowing the sender. if (!noise.verifyMac1(self.static_public, msg)) { return error.InvalidMac; } + // Step 0.5 (H4): rate-limit per source BEFORE the X25519 below. The MAC1 + // key is public, so a flood of MAC1-valid inits would otherwise burn one + // X25519 each on this thread. Checked under the limiter's own lock so a + // flood does not even contend the device lock. + if (!self.hs_limiter.allow(src_ip, nowNs())) { + return error.HandshakeRateLimited; + } + + // The expensive crypto + peer-table mutation below run under the + // exclusive device lock (H6). + self.lock.lockUncancelable(zio()); + defer self.lock.unlock(zio()); + // Step 1: Perform the Noise IK preamble (e, es, decrypt static). // Returns NoisePreamble with intermediate state to avoid double DH. const preamble = try noise.decryptInitiatorStatic( @@ -429,6 +523,8 @@ pub const WgDevice = struct { /// Handle an incoming handshake response (we are initiator). pub fn handleResponse(self: *WgDevice, msg: *const noise.HandshakeResponse) !usize { + self.lock.lockUncancelable(zio()); // H6: exclusive β€” sets active_tunnel + defer self.lock.unlock(zio()); // Look up by receiver_index (which is our sender_index) const recv_idx = std.mem.littleToNative(u32, msg.receiver_index); const peer_slot = self.index_map.get(recv_idx) orelse return error.UnknownIndex; @@ -449,6 +545,8 @@ pub const WgDevice = struct { /// Encrypt an IP packet for a specific peer. pub fn encryptForPeer(self: *WgDevice, slot: usize, plaintext: []const u8, out: []u8) !usize { + self.lock.lockSharedUncancelable(zio()); // H6: shared β€” excludes removePeer + defer self.lock.unlockShared(zio()); const peer = if (self.peers[slot]) |*p| p else return error.PeerNotFound; const tun = if (peer.active_tunnel) |*t| t else return error.NoTunnel; return tun.encrypt(plaintext, out); @@ -459,6 +557,9 @@ pub const WgDevice = struct { pub fn decryptTransport(self: *WgDevice, packet: []const u8, out: []u8) !struct { slot: usize, len: usize } { if (packet.len < 16) return error.PacketTooShort; + self.lock.lockSharedUncancelable(zio()); // H6: shared β€” excludes removePeer + defer self.lock.unlockShared(zio()); + // Look up peer by receiver_index (which is the index they assigned us) const recv_idx = std.mem.readInt(u32, packet[4..8], .little); const peer_slot = self.index_map.get(recv_idx) orelse return error.UnknownIndex; @@ -466,6 +567,16 @@ pub const WgDevice = struct { const tun = if (peer.active_tunnel) |*t| t else return error.NoTunnel; const len = try tun.decrypt(packet, out); + + // SECURITY (H1): RX cryptokey routing. A decrypted transport packet must + // carry an inner SOURCE address that belongs to the peer it arrived from. + // The kernel WireGuard path enforces this via AllowedIPs; the userspace + // plane previously did not, letting any admitted peer spoof any mesh + // source IP (cross-tenant impersonation, ARP/conntrack poisoning, bypass + // of source-based service policy). Drop on mismatch. + if (!innerSourceAllowed(peer.mesh_ip, peer.mesh_ip6, out[0..len])) { + return error.SourceSpoofed; + } return .{ .slot = peer_slot, .len = len }; } @@ -502,6 +613,32 @@ pub const WgDevice = struct { } }; +/// SECURITY (H1): returns true iff a decrypted inner IP packet's SOURCE address +/// belongs to the peer it arrived from (RX cryptokey routing). Empty keepalives +/// pass. A runt or non-IP packet is rejected. A zero peer mesh IP means +/// "unconstrained" (interop/test peers that were added without a derived address); +/// production peers always carry a derived 10.99/16 address, so the check is +/// enforced for them. +fn innerSourceAllowed(peer_mesh_ip: [4]u8, peer_mesh_ip6: [16]u8, plaintext: []const u8) bool { + if (plaintext.len == 0) return true; // WireGuard keepalive + const version = plaintext[0] >> 4; + switch (version) { + 4 => { + if (plaintext.len < 20) return false; // too short to hold an IPv4 header + const unset = (peer_mesh_ip[0] | peer_mesh_ip[1] | peer_mesh_ip[2] | peer_mesh_ip[3]) == 0; + if (unset) return true; + return std.mem.eql(u8, plaintext[12..16], &peer_mesh_ip); + }, + 6 => { + if (plaintext.len < 40) return false; // too short to hold an IPv6 header + const zero6 = [_]u8{0} ** 16; + if (std.mem.eql(u8, &peer_mesh_ip6, &zero6)) return true; + return std.mem.eql(u8, plaintext[8..24], &peer_mesh_ip6); + }, + else => return false, // not IPv4/IPv6 β€” drop + } +} + // ─── Tests ─── test "WgDevice add/remove peer" { @@ -528,6 +665,67 @@ test "WgDevice add/remove peer" { try std.testing.expectEqual(dev.findByWgPubkey(peer_pub), null); } +test "handshake rate limiter throttles a single-source flood (H4 regression)" { + var lim = HandshakeLimiter{}; + const ip = [4]u8{ 203, 0, 113, 7 }; + const t0: i128 = 1_000_000_000; + + // A burst from one source is capped at PER_SRC_BURST; extras are denied. + var allowed: u32 = 0; + var i: usize = 0; + while (i < HandshakeLimiter.PER_SRC_BURST + 5) : (i += 1) { + if (lim.allow(ip, t0)) allowed += 1; + } + try std.testing.expectEqual(HandshakeLimiter.PER_SRC_BURST, allowed); + + // Tokens refill over time β€” a later attempt succeeds again. + try std.testing.expect(lim.allow(ip, t0 + HandshakeLimiter.PER_SRC_REFILL_NS + 1)); + + // A different source has its own independent budget. + try std.testing.expect(lim.allow(.{ 198, 51, 100, 9 }, t0)); +} + +test "innerSourceAllowed enforces RX cryptokey routing (H1 regression)" { + const peer_ip = [4]u8{ 10, 99, 1, 2 }; + const zero6 = [_]u8{0} ** 16; + + // Valid IPv4 packet whose source is the peer's own mesh IP β†’ allowed. + var pkt: [20]u8 = .{0} ** 20; + pkt[0] = 0x45; // IPv4, IHL=5 + @memcpy(pkt[12..16], &peer_ip); + try std.testing.expect(innerSourceAllowed(peer_ip, zero6, &pkt)); + + // Spoofed inner source β†’ rejected. + var spoof: [20]u8 = .{0} ** 20; + spoof[0] = 0x45; + spoof[12] = 10; + spoof[13] = 99; + spoof[14] = 9; + spoof[15] = 9; + try std.testing.expect(!innerSourceAllowed(peer_ip, zero6, &spoof)); + + // Empty keepalive β†’ allowed. + try std.testing.expect(innerSourceAllowed(peer_ip, zero6, &[_]u8{})); + + // Runt packet claiming IPv4 but too short β†’ rejected (and no OOB read). + try std.testing.expect(!innerSourceAllowed(peer_ip, zero6, &[_]u8{ 0x45, 0, 0 })); + + // Non-IP first nibble β†’ rejected. + try std.testing.expect(!innerSourceAllowed(peer_ip, zero6, &[_]u8{0x00} ** 20)); + + // Unconstrained peer (zero mesh IP, e.g. interop) β†’ not enforced. + try std.testing.expect(innerSourceAllowed(.{ 0, 0, 0, 0 }, zero6, &spoof)); + + // IPv6: matching source allowed, tampered source rejected. + const peer6 = [16]u8{ 0xfd, 0x99, 0x6d, 0x67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + var p6: [40]u8 = .{0} ** 40; + p6[0] = 0x60; // IPv6 + @memcpy(p6[8..24], &peer6); + try std.testing.expect(innerSourceAllowed(.{ 0, 0, 0, 0 }, peer6, &p6)); + p6[23] = 0x02; // tamper the source address + try std.testing.expect(!innerSourceAllowed(.{ 0, 0, 0, 0 }, peer6, &p6)); +} + test "IndexTable put/get/remove" { var table: IndexTable = .{}; table.put(42, 5); From 89fb50851b75f1ffd079bc04031afb7db85d0c64 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:41:54 -0300 Subject: [PATCH 2/3] fix(ci): pass src_ip to handleInitiation in Linux-only wg-interop binary The H4 rate-limit change added a src_ip parameter to WgDevice.handleInitiation; wg_interop.zig (built only on Linux) still used the old 1-arg call, breaking the ubuntu CI build. macOS doesn't compile this binary, so local builds missed it. --- src/wg_interop.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wg_interop.zig b/src/wg_interop.zig index 975b8d3..b007b1b 100644 --- a/src/wg_interop.zig +++ b/src/wg_interop.zig @@ -172,7 +172,7 @@ pub fn main(init: std.process.Init) !void { .wg_handshake_init => { if (recv.data.len >= @sizeOf(noise.HandshakeInitiation)) { const msg: *const noise.HandshakeInitiation = @ptrCast(@alignCast(recv.data.ptr)); - if (wg_dev.handleInitiation(msg)) |hs| { + if (wg_dev.handleInitiation(msg, recv.sender_addr)) |hs| { const resp = std.mem.asBytes(&hs.response); _ = udp.sendTo(resp, recv.sender_addr, recv.sender_port) catch 0; try stdout.writeStreamingAll(zio, " << responded to initiation\n"); From ab85b27106309ab2d1409ddcb7c22484f2a0e193 Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:54:12 -0300 Subject: [PATCH 3/3] Address PR review: fix framed-tunnel drop, post-decrypt race, and nits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code-review follow-ups on the hardening PR: - P1 (regression): the H1 RX source-IP check rejected non-IP plaintext, which dropped every FFI framed tunnel message (framed with a leading-zero length header β†’ version nibble 0). innerSourceAllowed now only constrains IPv4/IPv6 packets (the IP-routed ones); non-IP framed payloads pass to the inbox. - H6 race: the daemon re-read wg_dev.peers[slot] after decryptTransport had released the device lock β€” a data race vs removePeer that could also skip the service filter. decryptTransport now returns the peer identity_key (captured under the lock); all four daemon filter blocks and the FFI use it, so peers[] is never re-read post-decrypt. - P2: the FFI auto-register path charged the handshake rate limiter twice for one legitimate handshake (initial UnknownPeer probe + retry). Added handleInitiationAdmitted for the retry, which skips re-admission. - DNS: responseMatchesQuery now also validates the echoed QCLASS. - membership.upsert: free the previous name allocation on the update path (consistent with remove/deinit) to avoid a leak. - device.zig: make the handshake-limiter hash operands explicitly usize; give the initiation handlers a shared named return type (InitiationResult). Verified: zig build test, Debug, ReleaseFast, ReleaseSafe, and x86_64-linux cross-compile (Debug + ReleaseFast). --- src/discovery/membership.zig | 6 +++++ src/main.zig | 26 +++++++++--------- src/meshguard_ffi.zig | 24 ++++++----------- src/net/dns.zig | 1 + src/wireguard/device.zig | 51 ++++++++++++++++++++++++++---------- 5 files changed, 66 insertions(+), 42 deletions(-) diff --git a/src/discovery/membership.zig b/src/discovery/membership.zig index ff14252..3e15b29 100644 --- a/src/discovery/membership.zig +++ b/src/discovery/membership.zig @@ -119,6 +119,12 @@ pub const MembershipTable = struct { if (existing) |e| { // Only update if the incoming info is newer (higher Lamport timestamp) if (peer.lamport <= e.lamport) return; + // Free the previous name allocation we're about to overwrite, unless + // the caller is reusing the same buffer (avoids a leak and a later + // double-free; names are owned by self.allocator like remove/deinit). + if (e.name.len > 0 and e.name.ptr != peer.name.ptr) { + self.allocator.free(e.name); + } try self.peers.put(peer.pubkey, peer); return; } diff --git a/src/main.zig b/src/main.zig index cabddbc..24d4f1a 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2686,9 +2686,11 @@ fn decryptRxWorker( const pkt = decrypt_q.getPacket(result.idx); if (wg_dev.decryptTransport(pkt, &out_buf)) |dec| { // Service filter: check access before TUN write (IPv4 + IPv6, M5). - if (wg_dev.peers[dec.slot]) |peer| { - const org_pk = orgPubkeyLocked(membership, peer.identity_key); - if (!service_filter.allowPacket(peer.identity_key, org_pk, out_buf[0..dec.len])) continue; + { + // identity_key comes from decryptTransport (captured under the + // device lock) β€” no unlocked peers[] re-read (H6 race). + const org_pk = orgPubkeyLocked(membership, dec.identity_key); + if (!service_filter.allowPacket(dec.identity_key, org_pk, out_buf[0..dec.len])) continue; } // Write decrypted packet to TUN @@ -2730,9 +2732,9 @@ fn processIncomingPacket( if (n_decrypted.* < 64) { if (wg_dev.decryptTransport(pkt, &decrypt_storage[n_decrypted.*])) |result| { // Check service filter before buffering (IPv4 + IPv6, M5). - if (wg_dev.peers[result.slot]) |peer| { - const org_pk = orgPubkeyLocked(swim.membership, peer.identity_key); - if (!service_filter.allowPacket(peer.identity_key, org_pk, decrypt_storage[n_decrypted.*][0..result.len])) return; + { + const org_pk = orgPubkeyLocked(swim.membership, result.identity_key); + if (!service_filter.allowPacket(result.identity_key, org_pk, decrypt_storage[n_decrypted.*][0..result.len])) return; } decrypt_lens[n_decrypted.*] = result.len; decrypt_slots[n_decrypted.*] = result.slot; @@ -2811,9 +2813,9 @@ fn windowsEventLoop( // Decrypt WG transport β†’ write plaintext to Wintun if (wg_dev.decryptTransport(pkt, &decrypt_buf)) |result| { // Apply service filter before writing to TUN (IPv4 + IPv6, M5). - if (wg_dev.peers[result.slot]) |peer| { - const org_pk = orgPubkeyLocked(swim.membership, peer.identity_key); - if (!service_filter.allowPacket(peer.identity_key, org_pk, decrypt_buf[0..result.len])) continue; + { + const org_pk = orgPubkeyLocked(swim.membership, result.identity_key); + if (!service_filter.allowPacket(result.identity_key, org_pk, decrypt_buf[0..result.len])) continue; } tun_dev.write(decrypt_buf[0..result.len]) catch {}; } else |_| {} @@ -2960,9 +2962,9 @@ fn macosEventLoop( // Decrypt WG transport β†’ write plaintext to utun if (wg_dev.decryptTransport(pkt, &decrypt_buf)) |result| { // Apply service filter before writing to TUN (IPv4 + IPv6, M5). - if (wg_dev.peers[result.slot]) |peer| { - const org_pk = orgPubkeyLocked(swim.membership, peer.identity_key); - if (!service_filter.allowPacket(peer.identity_key, org_pk, decrypt_buf[0..result.len])) continue; + { + const org_pk = orgPubkeyLocked(swim.membership, result.identity_key); + if (!service_filter.allowPacket(result.identity_key, org_pk, decrypt_buf[0..result.len])) continue; } tun_dev.write(decrypt_buf[0..result.len]) catch {}; } else |_| {} diff --git a/src/meshguard_ffi.zig b/src/meshguard_ffi.zig index e303b84..f557a4d 100644 --- a/src/meshguard_ffi.zig +++ b/src/meshguard_ffi.zig @@ -1165,8 +1165,11 @@ fn handleWgPacket(ctx: *MeshguardContext, data: []const u8, sender_addr: [4]u8, } else |_| {} } - // Retry after auto-registration - break :blk dev.handleInitiation(msg, sender_addr) catch return; + // Retry after auto-registration. Use the "admitted" path so the + // rate limiter is not charged a second time for this one + // legitimate handshake (the first attempt already consumed a + // token before failing with UnknownPeer). + break :blk dev.handleInitiationAdmitted(msg) catch return; }; // Send handshake response back to the initiator @@ -1212,20 +1215,9 @@ fn handleWgPacket(ctx: *MeshguardContext, data: []const u8, sender_addr: [4]u8, var plaintext: [1500]u8 = undefined; const result = dev.decryptTransport(data, &plaintext) catch return; - // Look up the peer's identity key for the sender field - var sender_identity: [32]u8 = .{0} ** 32; - if (dev.peers[result.slot]) |*peer| { - sender_identity = peer.identity_key; - // Finding #5 (v2): Only update endpoint if changed. - // Under shared lock, only safe for same-value writes. - if (!std.mem.eql(u8, &peer.endpoint_addr, &sender_addr) or - peer.endpoint_port != sender_port) - { - // Endpoint changed β€” defer update to avoid torn read. - // The next handshake (exclusive lock) will pick up the new address - // from the UDP source. For now, just use sender_identity. - } - } + // identity_key is captured inside decryptTransport under the device + // lock, so we never re-read peers[slot] here (avoids the removePeer race). + const sender_identity: [32]u8 = result.identity_key; // Strip the 4-byte length framing header. // Wire format: [2B reserved=0] [2B LE length] [payload] diff --git a/src/net/dns.zig b/src/net/dns.zig index 1396d27..9a247d3 100644 --- a/src/net/dns.zig +++ b/src/net/dns.zig @@ -481,6 +481,7 @@ fn responseMatchesQuery(data: []const u8, expected_id: u16, domain: []const u8, if (HEADER_LEN + q_end + 4 > data.len) return false; if (!std.ascii.eqlIgnoreCase(data[HEADER_LEN .. HEADER_LEN + q_end], q_buf[0..q_end])) return false; if (readU16(data, HEADER_LEN + q_end) != qtype) return false; // QTYPE echoed + if (readU16(data, HEADER_LEN + q_end + 2) != CLASS_IN) return false; // QCLASS echoed return true; } diff --git a/src/wireguard/device.zig b/src/wireguard/device.zig index 1c02b9a..ca5d140 100644 --- a/src/wireguard/device.zig +++ b/src/wireguard/device.zig @@ -270,7 +270,7 @@ const HandshakeLimiter = struct { if (self.global_tokens == 0) return false; // Per-source bucket (hash β†’ slot, reset on collision with a different IP). - const h = (((@as(usize, ip[0]) *% 131 +% ip[1]) *% 131 +% ip[2]) *% 131 +% ip[3]); + const h = (((@as(usize, ip[0]) *% 131 +% @as(usize, ip[1])) *% 131 +% @as(usize, ip[2])) *% 131 +% @as(usize, ip[3])); const b = &self.buckets[h % SLOTS]; if (!std.mem.eql(u8, &b.ip, &ip)) { b.* = .{ .ip = ip, .tokens = PER_SRC_BURST, .last_ns = now_ns }; @@ -285,6 +285,12 @@ const HandshakeLimiter = struct { } }; +/// Result of processing an inbound handshake initiation (we are responder). +pub const InitiationResult = struct { + response: noise.HandshakeResponse, + slot: usize, +}; + /// Userspace WireGuard device. pub const WgDevice = struct { /// Our X25519 keypair @@ -462,7 +468,7 @@ pub const WgDevice = struct { /// Handle an incoming handshake initiation (we are responder). /// O(1) lookup via static key table instead of O(N) peer iteration. - pub fn handleInitiation(self: *WgDevice, msg: *const noise.HandshakeInitiation, src_ip: [4]u8) !struct { response: noise.HandshakeResponse, slot: usize } { + pub fn handleInitiation(self: *WgDevice, msg: *const noise.HandshakeInitiation, src_ip: [4]u8) !InitiationResult { // Step 0: Verify MAC1 BEFORE expensive DH (cheap, no lock). // MAC1 is keyed with our public key, so we can check without knowing the sender. if (!noise.verifyMac1(self.static_public, msg)) { @@ -477,6 +483,15 @@ pub const WgDevice = struct { return error.HandshakeRateLimited; } + return self.handleInitiationAdmitted(msg); + } + + /// Process a handshake initiation that has ALREADY passed admission control + /// (MAC1 + rate limit). Used by the FFI auto-register path to retry after + /// registering the peer, WITHOUT charging the rate limiter a second time for + /// the same legitimate handshake (the first attempt already consumed a token + /// before failing with UnknownPeer). + pub fn handleInitiationAdmitted(self: *WgDevice, msg: *const noise.HandshakeInitiation) !InitiationResult { // The expensive crypto + peer-table mutation below run under the // exclusive device lock (H6). self.lock.lockUncancelable(zio()); @@ -553,8 +568,10 @@ pub const WgDevice = struct { } /// Decrypt an incoming transport packet. - /// Returns: (peer_slot, decrypted_length) - pub fn decryptTransport(self: *WgDevice, packet: []const u8, out: []u8) !struct { slot: usize, len: usize } { + /// Returns: (peer_slot, decrypted_length, peer identity_key). The identity_key + /// is captured under the lock so callers never have to re-read peers[slot] + /// afterwards (which would race removePeer β€” see H6). + pub fn decryptTransport(self: *WgDevice, packet: []const u8, out: []u8) !struct { slot: usize, len: usize, identity_key: [32]u8 } { if (packet.len < 16) return error.PacketTooShort; self.lock.lockSharedUncancelable(zio()); // H6: shared β€” excludes removePeer @@ -565,6 +582,7 @@ pub const WgDevice = struct { const peer_slot = self.index_map.get(recv_idx) orelse return error.UnknownIndex; const peer = if (self.peers[peer_slot]) |*p| p else return error.PeerNotFound; const tun = if (peer.active_tunnel) |*t| t else return error.NoTunnel; + const identity_key = peer.identity_key; const len = try tun.decrypt(packet, out); @@ -577,7 +595,7 @@ pub const WgDevice = struct { if (!innerSourceAllowed(peer.mesh_ip, peer.mesh_ip6, out[0..len])) { return error.SourceSpoofed; } - return .{ .slot = peer_slot, .len = len }; + return .{ .slot = peer_slot, .len = len, .identity_key = identity_key }; } /// Find a peer slot by identity key. @@ -613,12 +631,16 @@ pub const WgDevice = struct { } }; -/// SECURITY (H1): returns true iff a decrypted inner IP packet's SOURCE address -/// belongs to the peer it arrived from (RX cryptokey routing). Empty keepalives -/// pass. A runt or non-IP packet is rejected. A zero peer mesh IP means -/// "unconstrained" (interop/test peers that were added without a derived address); -/// production peers always carry a derived 10.99/16 address, so the check is -/// enforced for them. +/// SECURITY (H1): for a decrypted inner IPv4/IPv6 packet, returns true iff its +/// SOURCE address belongs to the peer it arrived from (RX cryptokey routing). +/// +/// Only IPv4 (version nibble 4) and IPv6 (6) packets are constrained β€” those are +/// the ones routed onto the host IP stack, where a spoofed source matters. Empty +/// keepalives and NON-IP plaintext pass through: the FFI tunnel API frames app +/// payloads (leading-zero length header β†’ version nibble 0) and delivers them to +/// an inbox, not the IP stack, so the source-IP rule does not apply. A zero peer +/// mesh IP means "unconstrained" (interop/test peers added without a derived +/// address); production peers always carry a derived 10.99/16 address. fn innerSourceAllowed(peer_mesh_ip: [4]u8, peer_mesh_ip6: [16]u8, plaintext: []const u8) bool { if (plaintext.len == 0) return true; // WireGuard keepalive const version = plaintext[0] >> 4; @@ -635,7 +657,7 @@ fn innerSourceAllowed(peer_mesh_ip: [4]u8, peer_mesh_ip6: [16]u8, plaintext: []c if (std.mem.eql(u8, &peer_mesh_ip6, &zero6)) return true; return std.mem.eql(u8, plaintext[8..24], &peer_mesh_ip6); }, - else => return false, // not IPv4/IPv6 β€” drop + else => return true, // non-IP framed payload (FFI tunnel inbox) β€” not IP-routed } } @@ -710,8 +732,9 @@ test "innerSourceAllowed enforces RX cryptokey routing (H1 regression)" { // Runt packet claiming IPv4 but too short β†’ rejected (and no OOB read). try std.testing.expect(!innerSourceAllowed(peer_ip, zero6, &[_]u8{ 0x45, 0, 0 })); - // Non-IP first nibble β†’ rejected. - try std.testing.expect(!innerSourceAllowed(peer_ip, zero6, &[_]u8{0x00} ** 20)); + // Non-IP framed payload (e.g. FFI tunnel inbox, leading-zero header) β†’ allowed: + // the source-IP rule only applies to IP-routed packets. + try std.testing.expect(innerSourceAllowed(peer_ip, zero6, &[_]u8{0x00} ** 20)); // Unconstrained peer (zero mesh IP, e.g. interop) β†’ not enforced. try std.testing.expect(innerSourceAllowed(.{ 0, 0, 0, 0 }, zero6, &spoof));