From d4e8a5d2c229940a8322f8308acf1a8e267f3584 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Mon, 4 May 2026 10:12:11 +0000 Subject: [PATCH 01/16] Block AF_ALG (family 38) in searcher container seccomp profile Defense in depth against the AF_ALG/algif_aead syscall surface that copy.fail (CVE-2026-31431) abuses. The existing socket() rule already blocks AF_VSOCK (family 40); extend the same rule to also block AF_ALG (family 38). Multiple args in a single seccomp rule are AND-ed per the OCI spec, so the rule now allows socket() only when arg[0] is neither 40 nor 38. --- .../flashbox/common/mkosi.extra/etc/containers/seccomp.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json index c4d91109..70affcb5 100644 --- a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json +++ b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json @@ -433,6 +433,11 @@ "index": 0, "value": 40, "op": "SCMP_CMP_NE" + }, + { + "index": 0, + "value": 38, + "op": "SCMP_CMP_NE" } ] }, From 400cc6b93dec57d820d6282cf7fad37a9988dbad Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Mon, 4 May 2026 10:13:08 +0000 Subject: [PATCH 02/16] Drop CONFIG_CRYPTO_USER_API_* (no consumer on this image) The AF_ALG userspace crypto API (algif_hash / algif_skcipher / algif_rng / algif_aead) was enabled with a '# For tdx-init' annotation, but tdx-init itself uses Go's stdlib crypto/hmac + crypto/sha256 (pure userspace) and shells out to cryptsetup, which on Debian uses libgcrypt + libargon2 for PBKDF and dm-crypt for actual block encryption -- dm-crypt talks to the in-kernel skcipher API directly, not via the AF_ALG userspace surface. Lighthouse and the rest of the image userspace use ring / aes-gcm. Removing the surface eliminates the entry point for CVE-2026-31431 (copy.fail) at the kernel level and shrinks the surface for any future algif_* CVE. Pre-merge: boot the rebuilt image and confirm 'journalctl -b' has no AF_ALG/algif_* warnings, and that 'tdx-init set-passphrase' / lighthouse restart / searcher container init paths all work end-to-end. --- modules/flashbox/common/kernel/config.d/10-bob | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/modules/flashbox/common/kernel/config.d/10-bob b/modules/flashbox/common/kernel/config.d/10-bob index 11eb55e0..7c64ae17 100644 --- a/modules/flashbox/common/kernel/config.d/10-bob +++ b/modules/flashbox/common/kernel/config.d/10-bob @@ -36,8 +36,14 @@ CONFIG_IP_NF_MANGLE=y CONFIG_IP_NF_RAW=y CONFIG_NET_SCHED=y -# For tdx-init -CONFIG_CRYPTO_USER_API_HASH=y -CONFIG_CRYPTO_USER_API_SKCIPHER=y -CONFIG_CRYPTO_USER_API_RNG=y -CONFIG_CRYPTO_USER_API_AEAD=y +# AF_ALG userspace crypto API. +# Not used by anything on this image: tdx-init uses Go-stdlib HMAC and +# shells out to cryptsetup (libgcrypt + libargon2 in userspace; dm-crypt +# uses the in-kernel skcipher API directly, not AF_ALG). lighthouse and +# the rest of the userspace use ring/aes-gcm. Disabled to remove the +# attack surface for CVE-2026-31431 (copy.fail) and similar. +# CONFIG_CRYPTO_USER_API is not set +# CONFIG_CRYPTO_USER_API_HASH is not set +# CONFIG_CRYPTO_USER_API_SKCIPHER is not set +# CONFIG_CRYPTO_USER_API_RNG is not set +# CONFIG_CRYPTO_USER_API_AEAD is not set From f610562b39cee64e4c583fab6b9ebbe904f69892 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Mon, 4 May 2026 10:15:30 +0000 Subject: [PATCH 03/16] Pin CONFIG_CRYPTO_AUTHENCESN off (no in-tree consumer) authencesn is an AEAD template whose only intended in-tree consumer is the IPsec/XFRM stack when an SA has the Extended Sequence Number flag set. IPsec is disabled on this image (CONFIG_INET_AH/ESP/INET6_AH/INET6_ESP all 'not set' in 01-sane-defaults), so authencesn has no in-tree user here. Pinning it off explicitly removes the algorithm even if Debian's cloud config inherits it as =y, and removes the specific code path that the copy.fail bug rearranges -- belt-and-suspenders alongside the AF_ALG removal in the previous commit. --- modules/flashbox/common/kernel/config.d/10-bob | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/flashbox/common/kernel/config.d/10-bob b/modules/flashbox/common/kernel/config.d/10-bob index 7c64ae17..9f480eda 100644 --- a/modules/flashbox/common/kernel/config.d/10-bob +++ b/modules/flashbox/common/kernel/config.d/10-bob @@ -47,3 +47,11 @@ CONFIG_NET_SCHED=y # CONFIG_CRYPTO_USER_API_SKCIPHER is not set # CONFIG_CRYPTO_USER_API_RNG is not set # CONFIG_CRYPTO_USER_API_AEAD is not set + +# Authenticated encryption template combining cipher + HMAC + Extended +# Sequence Number handling. Only in-tree consumer is IPsec/XFRM with the +# ESN flag set, but IPsec is disabled on this image (see 01-sane-defaults: +# CONFIG_INET_AH/ESP/INET6_AH/INET6_ESP not set). authencesn was the +# specific algorithm at the heart of CVE-2026-31431 (copy.fail); pinning +# it off removes the algorithm from the kernel as defense in depth. +# CONFIG_CRYPTO_AUTHENCESN is not set From 25693de3b2b3003c98ad68e43b3b413ef5e31c8f Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Fri, 8 May 2026 09:10:05 +0000 Subject: [PATCH 04/16] Block AF_RXRPC (family 45) and AF_KEY (family 15) in searcher container seccomp profile Defense in depth against the RxRPC and PF_KEY/XFRM kernel codepaths. The existing socket() rule already blocks AF_VSOCK (40) and AF_ALG (38); extend the same rule to also block AF_RXRPC (45) and AF_KEY (15). Numeric values verified against include/linux/socket.h (PF_RXRPC = 45, PF_KEY = 15) -- same lesson learned from copy.fail, where the rule intended to block AF_ALG was blocking AF_VSOCK because the constant was off by two. Multiple args in a single seccomp rule are AND-ed per the OCI spec, so the rule now allows socket() only when arg[0] is none of {15, 38, 40, 45}. The host kernel does not currently compile any of these families in (MODULES=n + CONFIG_AF_RXRPC=m / CONFIG_NET_KEY=m in the Debian base both resolve to 'not set' after olddefconfig), so socket() with these families already returns EAFNOSUPPORT. This change makes the rejection explicit at the seccomp layer, which keeps the path closed even if a future kernel-config edit re-enables one of these families. No legitimate searcher workload uses AF_RXRPC (kernel AFS client) or AF_KEY (legacy IPsec keying interface). The container's egress firewall in init-container.sh already blocks the relevant network paths. --- .../common/mkosi.extra/etc/containers/seccomp.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json index 70affcb5..3463e563 100644 --- a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json +++ b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json @@ -438,6 +438,16 @@ "index": 0, "value": 38, "op": "SCMP_CMP_NE" + }, + { + "index": 0, + "value": 45, + "op": "SCMP_CMP_NE" + }, + { + "index": 0, + "value": 15, + "op": "SCMP_CMP_NE" } ] }, From d6eaedd586a59480f80b7d08881b117f2a4709ca Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Fri, 8 May 2026 09:22:19 +0000 Subject: [PATCH 05/16] Pin AF_RXRPC, RXKAD, XFRM_USER off (no consumer on any image) Three more kernel codepaths with no in-tree user on any flashbots image, joining the existing # CONFIG_INET_AH/ESP/INET6_AH/INET6_ESP/NET_KEY disables in this file: - AF_RXRPC + RXKAD: kernel RxRPC session sockets and Kerberos security, used only by the in-kernel AFS filesystem client. No image runs an AFS client, no userspace opens AF_RXRPC sockets. - XFRM_USER: netlink control interface for XFRM transforms (`ip xfrm`, strongSwan, libreswan). The image firewall is iptables; no IPsec daemon runs anywhere. With INET_AH/ESP/INET6_AH/INET6_ESP/NET_KEY already off, XFRM has no transforms to configure -- the netlink control interface is dead surface. Debian's cloud-amd64 base config has CONFIG_AF_RXRPC=m, CONFIG_RXKAD=y, CONFIG_XFRM_USER=m. CONFIG_MODULES is unset on this image (00-no-modules), so olddefconfig already resolves AF_RXRPC and XFRM_USER to 'not set', and RXKAD follows because it sits inside `if AF_RXRPC` in net/rxrpc/Kconfig. RXKAD is the one to watch -- a straight `=y` in Debian, not auto-disabled by MODULES=n alone, so an explicit pin is the only thing that keeps it off if the surrounding config drifts. Pinning the three explicitly removes the inference step and keeps the kernel attack surface small if a future Debian config or kconfig snippet edit changes a default. Mirrors the same belt-and-suspenders pattern used for AUTHENCESN and the AF_ALG family elsewhere in this branch. --- shared/kernel/config.d/01-sane-defaults | 3 +++ 1 file changed, 3 insertions(+) diff --git a/shared/kernel/config.d/01-sane-defaults b/shared/kernel/config.d/01-sane-defaults index 9e2d8b94..0ce073c8 100644 --- a/shared/kernel/config.d/01-sane-defaults +++ b/shared/kernel/config.d/01-sane-defaults @@ -176,6 +176,7 @@ CONFIG_VIRTIO_MEM=y CONFIG_BALLOON_COMPACTION=y CONFIG_MEMORY_BALLOON=y +# CONFIG_AF_RXRPC is not set # CONFIG_BCACHEFS_FS is not set # CONFIG_BLK_DEV_RBD is not set # CONFIG_BONDING is not set @@ -221,6 +222,7 @@ CONFIG_MEMORY_BALLOON=y # CONFIG_NFS_FS is not set # CONFIG_NTFS3_FS is not set # CONFIG_RDS is not set +# CONFIG_RXKAD is not set # CONFIG_SMB_SERVER is not set # CONFIG_SURFACE_PLATFORMS is not set # CONFIG_SUSPEND is not set @@ -246,6 +248,7 @@ CONFIG_MEMORY_BALLOON=y # CONFIG_X86_SPEEDSTEP_CENTRINO is not set # CONFIG_XDP_SOCKETS is not set # CONFIG_XEN is not set +# CONFIG_XFRM_USER is not set # CONFIG_ZONEFS_FS is not set CONFIG_X86_DISABLED_FEATURE_XENPV=y CONFIG_X86_REQUIRED_FEATURE_PGE=y From 0ae7c5fb70abeb0a9358283db77fda62f6e5699a Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Fri, 8 May 2026 09:55:24 +0000 Subject: [PATCH 06/16] Pin XFRM core/algo/espintcp off (close inference gap) Followup to the previous commit that pinned AF_RXRPC/RXKAD/XFRM_USER off. XFRM_USER is the netlink config interface; this commit pins the rest of the XFRM machinery so no XFRM code is compiled into the kernel at all. In net/xfrm/Kconfig: - CONFIG_XFRM (bool, no default) is selected only by transforms (INET_ESP/AH/IPCOMP, INET6_ESP/AH/IPCOMP, NET_KEY, XFRM_USER, XFRM_INTERFACE). All are 'not set' on this image (NET_KEY, INET[6]_AH/ESP/IPCOMP earlier in this file; XFRM_USER in the previous commit; XFRM_INTERFACE depends on IPV6 which is off). - CONFIG_XFRM_ALGO (tristate, no default) is selected by the same transform protocols, all off. - CONFIG_XFRM_ESPINTCP (bool) is the ESP-in-TCP encap glue, only meaningful with ESP, which is off. So all three resolve to 'not set' via olddefconfig already; the explicit pin removes the inference step and stays correct if a future kconfig snippet edit selects something that pulls XFRM back in. Functional impact: none. Verified that NET_IP_TUNNEL/NET_UDP_TUNNEL, TLS, KVM, HYPERV, VIRTIO, container runtime, dropbear, and the flashbox firewall do not depend on XFRM. NETFILTER_XT_MATCH_POLICY depends on XFRM and is the only iptables match that does -- flashbox firewall scripts do not use \`-m policy\` (grepped 0 hits in init-firewall.sh, toggle, and the per-image firewall-config files), so its absence is invisible. Removes the kernel-side primitive used by the ESP-in-UDP MSG_SPLICE_PAGES no-COW page-cache writes (Copy_Fail2 / Dirty Frag's ESP path) at the strongest layer: the ESP code is not even compiled in. --- shared/kernel/config.d/01-sane-defaults | 3 +++ 1 file changed, 3 insertions(+) diff --git a/shared/kernel/config.d/01-sane-defaults b/shared/kernel/config.d/01-sane-defaults index 0ce073c8..57fa6188 100644 --- a/shared/kernel/config.d/01-sane-defaults +++ b/shared/kernel/config.d/01-sane-defaults @@ -248,6 +248,9 @@ CONFIG_MEMORY_BALLOON=y # CONFIG_X86_SPEEDSTEP_CENTRINO is not set # CONFIG_XDP_SOCKETS is not set # CONFIG_XEN is not set +# CONFIG_XFRM is not set +# CONFIG_XFRM_ALGO is not set +# CONFIG_XFRM_ESPINTCP is not set # CONFIG_XFRM_USER is not set # CONFIG_ZONEFS_FS is not set CONFIG_X86_DISABLED_FEATURE_XENPV=y From ce2dbb4703fc267d355197fd27dccb68acd71ff5 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Mon, 11 May 2026 10:01:46 +0000 Subject: [PATCH 07/16] Disable default-on zero-copy receive (IO_URING + IO_URING_ZCRX + NET_DEVMEM) Three kernel features that auto-enable on this image despite having no consumer: init/Kconfig: config IO_URING bool "..." if EXPERT, default y io_uring/Kconfig: config IO_URING_ZCRX def_bool y, depends on IO_URING + PAGE_POOL + INET + NET_RX_BUSY_POLL net/Kconfig: config NET_DEVMEM def_bool y, depends on DMA_SHARED_BUFFER + PAGE_POOL All three are kernel zero-copy-IO paths -- io_uring is the broader async I/O subsystem; IO_URING_ZCRX (6.15+) is its receive-into- registered-memory variant; NET_DEVMEM (6.11+) is the socket-level receive-into-device-memory variant ("devmem TCP"). They share the underlying net_iov / page-pool memory-provider machinery. Grepping the image confirms no consumer: - lighthouse + rbuilder use tokio with the default mio/epoll reactor (no tokio-uring), confirmed by greps for liburing / io_uring / tokio_uring across the in-tree Rust sources. - tdx-init is Go; Go runtime poller uses epoll on Linux. - systemd / podman / runc / dropbear / chrony / iptables / conntrack do not use io_uring or devmem. - No GPU / DRM / media drivers in the kernel snippets, so DMA_SHARED_BUFFER's selectors are not present either. What disabling these closes: 1. An OOB heap write in io_uring/zcrx.c:io_zcrx_return_niov_freelist() (freelist[] free_count not bounds-checked; 4-byte OOB into adjacent slab). Disclosed 2026-05-06; hardening commit 770594e is in mainline 2026-04-21 but not in linux-source-6.19_6.19.13-1~bpo13+1 which we ship. 2. io_uring as a whole, which has been a recurring CVE factory (CVE-2023-21400, CVE-2024-1086, CVE-2024-26581, CVE-2024-50266, ...). KSPP and several distros (ChromeOS, parts of AWS Bottlerocket) disable it by default on production servers. 3. NET_DEVMEM, which currently resolves to n via olddefconfig because DMA_SHARED_BUFFER has no selector on this image, but pinning it explicit keeps that property stable if a future driver pull-in selects DMA_SHARED_BUFFER -- same inference gap that prompted pinning XFRM core/algo/espintcp in the previous commit. EXPERT is already y in the Debian base config, so the `if EXPERT` prompt gate on IO_URING is non-binding -- olddefconfig respects the explicit "is not set" line. IO_URING_ZCRX would follow automatically (`depends on IO_URING`), but pinning it explicit makes the disable visible at the source. Container-side note: the searcher container's seccomp profile (defaultAction: SCMP_ACT_ERRNO, allow-listed syscalls only) does not include any io_uring_* in its allow list, so io_uring was already blocked there by default-deny. This commit removes the code from the kernel binary entirely; an explicit io_uring_* deny rule is added in a separate commit for belt-and-suspenders. --- shared/kernel/config.d/01-sane-defaults | 3 +++ 1 file changed, 3 insertions(+) diff --git a/shared/kernel/config.d/01-sane-defaults b/shared/kernel/config.d/01-sane-defaults index 57fa6188..4afe4daf 100644 --- a/shared/kernel/config.d/01-sane-defaults +++ b/shared/kernel/config.d/01-sane-defaults @@ -198,6 +198,8 @@ CONFIG_MEMORY_BALLOON=y # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set # CONFIG_INFINIBAND is not set +# CONFIG_IO_URING is not set +# CONFIG_IO_URING_ZCRX is not set # CONFIG_IPV6_ILA is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_IPV6_MROUTE is not set @@ -213,6 +215,7 @@ CONFIG_MEMORY_BALLOON=y # CONFIG_ISO9660_FS is not set # CONFIG_KVM_XEN is not set # CONFIG_L2TP is not set +# CONFIG_NET_DEVMEM is not set # CONFIG_NET_FOU is not set # CONFIG_NET_IPGRE_DEMUX is not set # CONFIG_NET_IPIP is not set From 98afb27b29de1896cbca8c9ac242004ede96d899 Mon Sep 17 00:00:00 2001 From: MoeMahhouk Date: Mon, 11 May 2026 20:44:23 +0000 Subject: [PATCH 08/16] Re-enable CRYPTO_USER_API_{HASH,SKCIPHER} -- required by cryptsetup Debian's cryptsetup 2.8.1 is built with KERNEL_CAPI (visible in `cryptsetup --version` flags), and libcryptsetup in this binary has no openssl/gcrypt userspace backend compiled in. It hard-fails at startup with "Cannot initialize crypto backend" if AF_ALG is unavailable. tdx-init shells out to cryptsetup for LUKS2 format/open/resize/token operations, so without AF_ALG the persistent disk can never be initialized and the image cannot boot far enough to mount /persistent. The prior disable of all CRYPTO_USER_API_* was based on a code-path audit that under-counted what libcryptsetup actually uses at runtime. Verified on a dev image with strace + cryptsetup --debug on a loopback: # Running pbkdf2(sha256) benchmark. <- algif_hash # Running argon2id() benchmark. <- userspace libargon2 # Updating keyslot area [0x8000]. <- algif_skcipher Re-enable the minimum needed for that flow: the AF_ALG umbrella, HASH (PBKDF2 + MAC), and SKCIPHER (AES-XTS keyslot encryption). Keep _AEAD and _RNG explicitly off as kernel attack-surface hardening: - _AEAD: not used by the LUKS2 default flow (aes-xts-plain64). It is the most exposed AF_ALG subfamily; keeping it off removes that interface at the syscall layer. - _RNG: cryptsetup reads /dev/urandom directly for random data; it does not open algif_rng. Searcher container exposure is unchanged: the seccomp profile in modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json blocks socket() for AF_ALG (family 38), so re-enabling _HASH and _SKCIPHER on the host kernel does not widen the container's syscall surface. --- .../flashbox/common/kernel/config.d/10-bob | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/modules/flashbox/common/kernel/config.d/10-bob b/modules/flashbox/common/kernel/config.d/10-bob index 9f480eda..685a9256 100644 --- a/modules/flashbox/common/kernel/config.d/10-bob +++ b/modules/flashbox/common/kernel/config.d/10-bob @@ -37,14 +37,22 @@ CONFIG_IP_NF_RAW=y CONFIG_NET_SCHED=y # AF_ALG userspace crypto API. -# Not used by anything on this image: tdx-init uses Go-stdlib HMAC and -# shells out to cryptsetup (libgcrypt + libargon2 in userspace; dm-crypt -# uses the in-kernel skcipher API directly, not AF_ALG). lighthouse and -# the rest of the userspace use ring/aes-gcm. Disabled to remove the -# attack surface for CVE-2026-31431 (copy.fail) and similar. -# CONFIG_CRYPTO_USER_API is not set -# CONFIG_CRYPTO_USER_API_HASH is not set -# CONFIG_CRYPTO_USER_API_SKCIPHER is not set +# Debian's cryptsetup 2.8.1 is built with KERNEL_CAPI (see `cryptsetup +# --version` flags) and libcryptsetup hard-fails at startup with +# "Cannot initialize crypto backend" if AF_ALG is unavailable -- the +# openssl/gcrypt userspace backends are not compiled into this binary. +# tdx-init shells out to cryptsetup for LUKS2 format/open/resize, so +# AF_ALG is needed to mount the persistent disk. Verified on a dev +# image with strace + cryptsetup --debug: cryptsetup uses SKCIPHER +# (AES-XTS keyslot encryption) and HASH (PBKDF fallback / MAC) during +# the LUKS2 default flow. +# +# Re-enable only what cryptsetup actually needs; keep _AEAD and _RNG +# off as kernel attack-surface hardening (_RNG is unused by cryptsetup, +# which reads /dev/urandom directly). +CONFIG_CRYPTO_USER_API=y +CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y # CONFIG_CRYPTO_USER_API_RNG is not set # CONFIG_CRYPTO_USER_API_AEAD is not set From 2226b8dac2bcbafc8a1cb068a37d6c51c2056816 Mon Sep 17 00:00:00 2001 From: shashial <10578726+shashial@users.noreply.github.com> Date: Tue, 12 May 2026 21:37:48 +0100 Subject: [PATCH 09/16] seccomp: sync default profile to moby/profiles + layer family deny rules Replaces the previous seccomp extension attempt (commits 8f3342c and b459f19) with two changes: 1. Sync the bundled profile to moby/profiles main (currently tagged seccomp/v0.2.1): https://github.com/moby/profiles/blob/main/seccomp/default.json The relevant upstream change is dec315c (2026-04-30, "seccomp: Block AF_ALG in default socket policy"), which restructures the socket rule from a single `SCMP_CMP_NE 40` into three range-based rules: ALLOW arg0 < 38 ALLOW arg0 == 39 ALLOW arg0 > 40 Net effect: AF_ALG (38) and AF_VSOCK (40) both block at the seccomp layer. AF_VSOCK preservation comes along for free; AF_ALG blocking is the headline copy.fail (CVE-2026-31431) mitigation. This subsumes 8f3342c and the AF_VSOCK preservation half of b459f19. 2. Append three SCMP_ACT_ERRNO rules with errnoRet=97 (EAFNOSUPPORT) for AF_KEY (15), AF_RXRPC (33), and AF_MCTP (45). These families are also pinned off at the kernel layer (CONFIG_NET_KEY=n, CONFIG_AF_RXRPC=n, CONFIG_MCTP=n -- the last one in the next commit). The seccomp rules are tripwires in case the kernel config ever drifts; errnoRet=97 keeps the seccomp block visually indistinguishable from the kernel's own "family not registered" response. Why not just keep b459f19's socket rule? It packed four SCMP_CMP_NE conditions on arg0 into one seccomp_rule_add call, which libseccomp documents as supporting only one comparison per arg per rule (upstream issue #118, manpage clarification PR #225). On libseccomp 2.6.0 it silently produces a BPF tree where AF_KEY/AF_ALG/AF_MCTP fall through to ALLOW and the preserved AF_VSOCK block regresses. The LT/EQ/GT upstream pattern sidesteps that case because its three rules occupy different libseccomp op-priority levels; the layered EQ rules added here sit in priority-3 alongside upstream's `EQ 39` with disjoint datums and emit reachable BPF. Verified end-to-end on podman 5.8.2 + crun: in a fresh container, AF_KEY/AF_RXRPC/AF_ALG/AF_VSOCK/AF_MCTP all block, and every other family produces identical output to `seccomp=unconfined`. --- .../mkosi.extra/etc/containers/seccomp.json | 1766 +++++++++-------- 1 file changed, 919 insertions(+), 847 deletions(-) diff --git a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json index 3463e563..a7560ced 100644 --- a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json +++ b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json @@ -1,848 +1,920 @@ { - "defaultAction": "SCMP_ACT_ERRNO", - "defaultErrnoRet": 1, - "archMap": [ - { - "architecture": "SCMP_ARCH_X86_64", - "subArchitectures": [ - "SCMP_ARCH_X86", - "SCMP_ARCH_X32" - ] - }, - { - "architecture": "SCMP_ARCH_AARCH64", - "subArchitectures": [ - "SCMP_ARCH_ARM" - ] - }, - { - "architecture": "SCMP_ARCH_MIPS64", - "subArchitectures": [ - "SCMP_ARCH_MIPS", - "SCMP_ARCH_MIPS64N32" - ] - }, - { - "architecture": "SCMP_ARCH_MIPS64N32", - "subArchitectures": [ - "SCMP_ARCH_MIPS", - "SCMP_ARCH_MIPS64" - ] - }, - { - "architecture": "SCMP_ARCH_MIPSEL64", - "subArchitectures": [ - "SCMP_ARCH_MIPSEL", - "SCMP_ARCH_MIPSEL64N32" - ] - }, - { - "architecture": "SCMP_ARCH_MIPSEL64N32", - "subArchitectures": [ - "SCMP_ARCH_MIPSEL", - "SCMP_ARCH_MIPSEL64" - ] - }, - { - "architecture": "SCMP_ARCH_S390X", - "subArchitectures": [ - "SCMP_ARCH_S390" - ] - }, - { - "architecture": "SCMP_ARCH_RISCV64", - "subArchitectures": null - } - ], - "syscalls": [ - { - "names": [ - "accept", - "accept4", - "access", - "adjtimex", - "alarm", - "bind", - "brk", - "cachestat", - "capget", - "capset", - "chdir", - "chmod", - "chown", - "chown32", - "clock_adjtime", - "clock_adjtime64", - "clock_getres", - "clock_getres_time64", - "clock_gettime", - "clock_gettime64", - "clock_nanosleep", - "clock_nanosleep_time64", - "close", - "close_range", - "connect", - "copy_file_range", - "creat", - "dup", - "dup2", - "dup3", - "epoll_create", - "epoll_create1", - "epoll_ctl", - "epoll_ctl_old", - "epoll_pwait", - "epoll_pwait2", - "epoll_wait", - "epoll_wait_old", - "eventfd", - "eventfd2", - "execve", - "execveat", - "exit", - "exit_group", - "faccessat", - "faccessat2", - "fadvise64", - "fadvise64_64", - "fallocate", - "fanotify_mark", - "fchdir", - "fchmod", - "fchmodat", - "fchmodat2", - "fchown", - "fchown32", - "fchownat", - "fcntl", - "fcntl64", - "fdatasync", - "fgetxattr", - "flistxattr", - "flock", - "fork", - "fremovexattr", - "fsetxattr", - "fstat", - "fstat64", - "fstatat64", - "fstatfs", - "fstatfs64", - "fsync", - "ftruncate", - "ftruncate64", - "futex", - "futex_requeue", - "futex_time64", - "futex_wait", - "futex_waitv", - "futex_wake", - "futimesat", - "getcpu", - "getcwd", - "getdents", - "getdents64", - "getegid", - "getegid32", - "geteuid", - "geteuid32", - "getgid", - "getgid32", - "getgroups", - "getgroups32", - "getitimer", - "getpeername", - "getpgid", - "getpgrp", - "getpid", - "getppid", - "getpriority", - "getrandom", - "getresgid", - "getresgid32", - "getresuid", - "getresuid32", - "getrlimit", - "get_robust_list", - "getrusage", - "getsid", - "getsockname", - "getsockopt", - "get_thread_area", - "gettid", - "gettimeofday", - "getuid", - "getuid32", - "getxattr", - "inotify_add_watch", - "inotify_init", - "inotify_init1", - "inotify_rm_watch", - "io_cancel", - "ioctl", - "io_destroy", - "io_getevents", - "io_pgetevents", - "io_pgetevents_time64", - "ioprio_get", - "ioprio_set", - "io_setup", - "io_submit", - "ipc", - "kill", - "landlock_add_rule", - "landlock_create_ruleset", - "landlock_restrict_self", - "lchown", - "lchown32", - "lgetxattr", - "link", - "linkat", - "listen", - "listxattr", - "llistxattr", - "_llseek", - "lremovexattr", - "lseek", - "lsetxattr", - "lstat", - "lstat64", - "madvise", - "map_shadow_stack", - "membarrier", - "memfd_create", - "memfd_secret", - "mincore", - "mkdir", - "mkdirat", - "mknod", - "mknodat", - "mlock", - "mlock2", - "mlockall", - "mmap", - "mmap2", - "mprotect", - "mq_getsetattr", - "mq_notify", - "mq_open", - "mq_timedreceive", - "mq_timedreceive_time64", - "mq_timedsend", - "mq_timedsend_time64", - "mq_unlink", - "mremap", - "msgctl", - "msgget", - "msgrcv", - "msgsnd", - "msync", - "munlock", - "munlockall", - "munmap", - "name_to_handle_at", - "nanosleep", - "newfstatat", - "_newselect", - "open", - "openat", - "openat2", - "pause", - "pidfd_open", - "pidfd_send_signal", - "pipe", - "pipe2", - "pkey_alloc", - "pkey_free", - "pkey_mprotect", - "poll", - "ppoll", - "ppoll_time64", - "prctl", - "pread64", - "preadv", - "preadv2", - "prlimit64", - "process_mrelease", - "pselect6", - "pselect6_time64", - "pwrite64", - "pwritev", - "pwritev2", - "read", - "readahead", - "readlink", - "readlinkat", - "readv", - "recv", - "recvfrom", - "recvmmsg", - "recvmmsg_time64", - "recvmsg", - "remap_file_pages", - "removexattr", - "rename", - "renameat", - "renameat2", - "restart_syscall", - "rmdir", - "rseq", - "rt_sigaction", - "rt_sigpending", - "rt_sigprocmask", - "rt_sigqueueinfo", - "rt_sigreturn", - "rt_sigsuspend", - "rt_sigtimedwait", - "rt_sigtimedwait_time64", - "rt_tgsigqueueinfo", - "sched_getaffinity", - "sched_getattr", - "sched_getparam", - "sched_get_priority_max", - "sched_get_priority_min", - "sched_getscheduler", - "sched_rr_get_interval", - "sched_rr_get_interval_time64", - "sched_setaffinity", - "sched_setattr", - "sched_setparam", - "sched_setscheduler", - "sched_yield", - "seccomp", - "select", - "semctl", - "semget", - "semop", - "semtimedop", - "semtimedop_time64", - "send", - "sendfile", - "sendfile64", - "sendmmsg", - "sendmsg", - "sendto", - "setfsgid", - "setfsgid32", - "setfsuid", - "setfsuid32", - "setgid", - "setgid32", - "setgroups", - "setgroups32", - "setitimer", - "setpgid", - "setpriority", - "setregid", - "setregid32", - "setresgid", - "setresgid32", - "setresuid", - "setresuid32", - "setreuid", - "setreuid32", - "setrlimit", - "set_robust_list", - "setsid", - "setsockopt", - "set_thread_area", - "set_tid_address", - "setuid", - "setuid32", - "setxattr", - "shmat", - "shmctl", - "shmdt", - "shmget", - "shutdown", - "sigaltstack", - "signalfd", - "signalfd4", - "sigprocmask", - "sigreturn", - "socketcall", - "socketpair", - "splice", - "stat", - "stat64", - "statfs", - "statfs64", - "statx", - "symlink", - "symlinkat", - "sync", - "sync_file_range", - "syncfs", - "sysinfo", - "tee", - "tgkill", - "time", - "timer_create", - "timer_delete", - "timer_getoverrun", - "timer_gettime", - "timer_gettime64", - "timer_settime", - "timer_settime64", - "timerfd_create", - "timerfd_gettime", - "timerfd_gettime64", - "timerfd_settime", - "timerfd_settime64", - "times", - "tkill", - "truncate", - "truncate64", - "ugetrlimit", - "umask", - "uname", - "unlink", - "unlinkat", - "utime", - "utimensat", - "utimensat_time64", - "utimes", - "vfork", - "vmsplice", - "wait4", - "waitid", - "waitpid", - "write", - "writev" - ], - "action": "SCMP_ACT_ALLOW" - }, - { - "names": [ - "process_vm_readv", - "process_vm_writev", - "ptrace" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "minKernel": "4.8" - } - }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 40, - "op": "SCMP_CMP_NE" - }, - { - "index": 0, - "value": 38, - "op": "SCMP_CMP_NE" - }, - { - "index": 0, - "value": 45, - "op": "SCMP_CMP_NE" - }, - { - "index": 0, - "value": 15, - "op": "SCMP_CMP_NE" - } - ] - }, - { - "names": [ - "personality" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 0, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "personality" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 8, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "personality" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 131072, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "personality" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 131080, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "personality" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 4294967295, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "sync_file_range2", - "swapcontext" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "ppc64le" - ] - } - }, - { - "names": [ - "arm_fadvise64_64", - "arm_sync_file_range", - "sync_file_range2", - "breakpoint", - "cacheflush", - "set_tls" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "arm", - "arm64" - ] - } - }, - { - "names": [ - "arch_prctl" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "amd64", - "x32" - ] - } - }, - { - "names": [ - "modify_ldt" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "amd64", - "x32", - "x86" - ] - } - }, - { - "names": [ - "s390_pci_mmio_read", - "s390_pci_mmio_write", - "s390_runtime_instr" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "s390", - "s390x" - ] - } - }, - { - "names": [ - "riscv_flush_icache" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "riscv64" - ] - } - }, - { - "names": [ - "open_by_handle_at" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_DAC_READ_SEARCH" - ] - } - }, - { - "names": [ - "bpf", - "clone", - "clone3", - "fanotify_init", - "fsconfig", - "fsmount", - "fsopen", - "fspick", - "lookup_dcookie", - "mount", - "mount_setattr", - "move_mount", - "open_tree", - "perf_event_open", - "quotactl", - "quotactl_fd", - "setdomainname", - "sethostname", - "setns", - "syslog", - "umount", - "umount2", - "unshare" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_ADMIN" - ] - } - }, - { - "names": [ - "clone" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 2114060288, - "op": "SCMP_CMP_MASKED_EQ" - } - ], - "excludes": { - "caps": [ - "CAP_SYS_ADMIN" - ], - "arches": [ - "s390", - "s390x" - ] - } - }, - { - "names": [ - "clone" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 1, - "value": 2114060288, - "op": "SCMP_CMP_MASKED_EQ" - } - ], - "comment": "s390 parameter ordering for clone is different", - "includes": { - "arches": [ - "s390", - "s390x" - ] - }, - "excludes": { - "caps": [ - "CAP_SYS_ADMIN" - ] - } - }, - { - "names": [ - "clone3" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 38, - "excludes": { - "caps": [ - "CAP_SYS_ADMIN" - ] - } - }, - { - "names": [ - "reboot" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_BOOT" - ] - } - }, - { - "names": [ - "chroot" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_CHROOT" - ] - } - }, - { - "names": [ - "delete_module", - "init_module", - "finit_module" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_MODULE" - ] - } - }, - { - "names": [ - "acct" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_PACCT" - ] - } - }, - { - "names": [ - "kcmp", - "pidfd_getfd", - "process_madvise", - "process_vm_readv", - "process_vm_writev", - "ptrace" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_PTRACE" - ] - } - }, - { - "names": [ - "iopl", - "ioperm" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_RAWIO" - ] - } - }, - { - "names": [ - "settimeofday", - "stime", - "clock_settime", - "clock_settime64" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_TIME" - ] - } - }, - { - "names": [ - "vhangup" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_TTY_CONFIG" - ] - } - }, - { - "names": [ - "get_mempolicy", - "mbind", - "set_mempolicy", - "set_mempolicy_home_node" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_NICE" - ] - } - }, - { - "names": [ - "syslog" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYSLOG" - ] - } - }, - { - "names": [ - "bpf" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_BPF" - ] - } - }, - { - "names": [ - "perf_event_open" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_PERFMON" - ] - } - } - ] -} \ No newline at end of file + "defaultAction": "SCMP_ACT_ERRNO", + "defaultErrnoRet": 1, + "archMap": [ + { + "architecture": "SCMP_ARCH_X86_64", + "subArchitectures": [ + "SCMP_ARCH_X86", + "SCMP_ARCH_X32" + ] + }, + { + "architecture": "SCMP_ARCH_AARCH64", + "subArchitectures": [ + "SCMP_ARCH_ARM" + ] + }, + { + "architecture": "SCMP_ARCH_MIPS64", + "subArchitectures": [ + "SCMP_ARCH_MIPS", + "SCMP_ARCH_MIPS64N32" + ] + }, + { + "architecture": "SCMP_ARCH_MIPS64N32", + "subArchitectures": [ + "SCMP_ARCH_MIPS", + "SCMP_ARCH_MIPS64" + ] + }, + { + "architecture": "SCMP_ARCH_MIPSEL64", + "subArchitectures": [ + "SCMP_ARCH_MIPSEL", + "SCMP_ARCH_MIPSEL64N32" + ] + }, + { + "architecture": "SCMP_ARCH_MIPSEL64N32", + "subArchitectures": [ + "SCMP_ARCH_MIPSEL", + "SCMP_ARCH_MIPSEL64" + ] + }, + { + "architecture": "SCMP_ARCH_S390X", + "subArchitectures": [ + "SCMP_ARCH_S390" + ] + }, + { + "architecture": "SCMP_ARCH_RISCV64", + "subArchitectures": null + }, + { + "architecture": "SCMP_ARCH_LOONGARCH64", + "subArchitectures": null + } + ], + "syscalls": [ + { + "names": [ + "accept", + "accept4", + "access", + "adjtimex", + "alarm", + "bind", + "brk", + "cachestat", + "capget", + "capset", + "chdir", + "chmod", + "chown", + "chown32", + "clock_adjtime", + "clock_adjtime64", + "clock_getres", + "clock_getres_time64", + "clock_gettime", + "clock_gettime64", + "clock_nanosleep", + "clock_nanosleep_time64", + "close", + "close_range", + "connect", + "copy_file_range", + "creat", + "dup", + "dup2", + "dup3", + "epoll_create", + "epoll_create1", + "epoll_ctl", + "epoll_ctl_old", + "epoll_pwait", + "epoll_pwait2", + "epoll_wait", + "epoll_wait_old", + "eventfd", + "eventfd2", + "execve", + "execveat", + "exit", + "exit_group", + "faccessat", + "faccessat2", + "fadvise64", + "fadvise64_64", + "fallocate", + "fanotify_mark", + "fchdir", + "fchmod", + "fchmodat", + "fchmodat2", + "fchown", + "fchown32", + "fchownat", + "fcntl", + "fcntl64", + "fdatasync", + "fgetxattr", + "flistxattr", + "flock", + "fork", + "fremovexattr", + "fsetxattr", + "fstat", + "fstat64", + "fstatat64", + "fstatfs", + "fstatfs64", + "fsync", + "ftruncate", + "ftruncate64", + "futex", + "futex_requeue", + "futex_time64", + "futex_wait", + "futex_waitv", + "futex_wake", + "futimesat", + "getcpu", + "getcwd", + "getdents", + "getdents64", + "getegid", + "getegid32", + "geteuid", + "geteuid32", + "getgid", + "getgid32", + "getgroups", + "getgroups32", + "getitimer", + "getpeername", + "getpgid", + "getpgrp", + "getpid", + "getppid", + "getpriority", + "getrandom", + "getresgid", + "getresgid32", + "getresuid", + "getresuid32", + "getrlimit", + "get_robust_list", + "getrusage", + "getsid", + "getsockname", + "getsockopt", + "get_thread_area", + "gettid", + "gettimeofday", + "getuid", + "getuid32", + "getxattr", + "getxattrat", + "inotify_add_watch", + "inotify_init", + "inotify_init1", + "inotify_rm_watch", + "io_cancel", + "ioctl", + "io_destroy", + "io_getevents", + "io_pgetevents", + "io_pgetevents_time64", + "ioprio_get", + "ioprio_set", + "io_setup", + "io_submit", + "ipc", + "kill", + "landlock_add_rule", + "landlock_create_ruleset", + "landlock_restrict_self", + "lchown", + "lchown32", + "lgetxattr", + "link", + "linkat", + "listen", + "listmount", + "listxattr", + "listxattrat", + "llistxattr", + "_llseek", + "lremovexattr", + "lseek", + "lsetxattr", + "lstat", + "lstat64", + "madvise", + "map_shadow_stack", + "membarrier", + "memfd_create", + "memfd_secret", + "mincore", + "mkdir", + "mkdirat", + "mknod", + "mknodat", + "mlock", + "mlock2", + "mlockall", + "mmap", + "mmap2", + "mprotect", + "mq_getsetattr", + "mq_notify", + "mq_open", + "mq_timedreceive", + "mq_timedreceive_time64", + "mq_timedsend", + "mq_timedsend_time64", + "mq_unlink", + "mremap", + "mseal", + "msgctl", + "msgget", + "msgrcv", + "msgsnd", + "msync", + "munlock", + "munlockall", + "munmap", + "name_to_handle_at", + "nanosleep", + "newfstatat", + "_newselect", + "open", + "openat", + "openat2", + "pause", + "pidfd_open", + "pidfd_send_signal", + "pipe", + "pipe2", + "pkey_alloc", + "pkey_free", + "pkey_mprotect", + "poll", + "ppoll", + "ppoll_time64", + "prctl", + "pread64", + "preadv", + "preadv2", + "prlimit64", + "process_mrelease", + "pselect6", + "pselect6_time64", + "pwrite64", + "pwritev", + "pwritev2", + "read", + "readahead", + "readlink", + "readlinkat", + "readv", + "recv", + "recvfrom", + "recvmmsg", + "recvmmsg_time64", + "recvmsg", + "remap_file_pages", + "removexattr", + "removexattrat", + "rename", + "renameat", + "renameat2", + "restart_syscall", + "riscv_hwprobe", + "rmdir", + "rseq", + "rt_sigaction", + "rt_sigpending", + "rt_sigprocmask", + "rt_sigqueueinfo", + "rt_sigreturn", + "rt_sigsuspend", + "rt_sigtimedwait", + "rt_sigtimedwait_time64", + "rt_tgsigqueueinfo", + "sched_getaffinity", + "sched_getattr", + "sched_getparam", + "sched_get_priority_max", + "sched_get_priority_min", + "sched_getscheduler", + "sched_rr_get_interval", + "sched_rr_get_interval_time64", + "sched_setaffinity", + "sched_setattr", + "sched_setparam", + "sched_setscheduler", + "sched_yield", + "seccomp", + "select", + "semctl", + "semget", + "semop", + "semtimedop", + "semtimedop_time64", + "send", + "sendfile", + "sendfile64", + "sendmmsg", + "sendmsg", + "sendto", + "setfsgid", + "setfsgid32", + "setfsuid", + "setfsuid32", + "setgid", + "setgid32", + "setgroups", + "setgroups32", + "setitimer", + "setpgid", + "setpriority", + "setregid", + "setregid32", + "setresgid", + "setresgid32", + "setresuid", + "setresuid32", + "setreuid", + "setreuid32", + "setrlimit", + "set_robust_list", + "setsid", + "setsockopt", + "set_thread_area", + "set_tid_address", + "setuid", + "setuid32", + "setxattr", + "setxattrat", + "shmat", + "shmctl", + "shmdt", + "shmget", + "shutdown", + "sigaltstack", + "signalfd", + "signalfd4", + "sigprocmask", + "sigreturn", + "socketcall", + "socketpair", + "splice", + "stat", + "stat64", + "statfs", + "statfs64", + "statmount", + "statx", + "symlink", + "symlinkat", + "sync", + "sync_file_range", + "syncfs", + "sysinfo", + "tee", + "tgkill", + "time", + "timer_create", + "timer_delete", + "timer_getoverrun", + "timer_gettime", + "timer_gettime64", + "timer_settime", + "timer_settime64", + "timerfd_create", + "timerfd_gettime", + "timerfd_gettime64", + "timerfd_settime", + "timerfd_settime64", + "times", + "tkill", + "truncate", + "truncate64", + "ugetrlimit", + "umask", + "uname", + "unlink", + "unlinkat", + "uretprobe", + "utime", + "utimensat", + "utimensat_time64", + "utimes", + "vfork", + "vmsplice", + "wait4", + "waitid", + "waitpid", + "write", + "writev" + ], + "action": "SCMP_ACT_ALLOW" + }, + { + "names": [ + "process_vm_readv", + "process_vm_writev", + "ptrace" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "minKernel": "4.8" + } + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 38, + "op": "SCMP_CMP_LT" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 39, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 40, + "op": "SCMP_CMP_GT" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 0, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 8, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 131072, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 131080, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 4294967295, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "sync_file_range2", + "swapcontext" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "ppc64le" + ] + } + }, + { + "names": [ + "arm_fadvise64_64", + "arm_sync_file_range", + "sync_file_range2", + "breakpoint", + "cacheflush", + "set_tls" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "arm", + "arm64" + ] + } + }, + { + "names": [ + "arch_prctl" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "amd64", + "x32" + ] + } + }, + { + "names": [ + "modify_ldt" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "amd64", + "x32", + "x86" + ] + } + }, + { + "names": [ + "s390_pci_mmio_read", + "s390_pci_mmio_write", + "s390_runtime_instr" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "s390", + "s390x" + ] + } + }, + { + "names": [ + "riscv_flush_icache" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "riscv64" + ] + } + }, + { + "names": [ + "open_by_handle_at" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_DAC_READ_SEARCH" + ] + } + }, + { + "names": [ + "bpf", + "clone", + "clone3", + "fanotify_init", + "fsconfig", + "fsmount", + "fsopen", + "fspick", + "lookup_dcookie", + "lsm_get_self_attr", + "lsm_list_modules", + "lsm_set_self_attr", + "mount", + "mount_setattr", + "move_mount", + "open_tree", + "perf_event_open", + "quotactl", + "quotactl_fd", + "setdomainname", + "sethostname", + "setns", + "syslog", + "umount", + "umount2", + "unshare" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_ADMIN" + ] + } + }, + { + "names": [ + "clone" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 2114060288, + "op": "SCMP_CMP_MASKED_EQ" + } + ], + "excludes": { + "caps": [ + "CAP_SYS_ADMIN" + ], + "arches": [ + "s390", + "s390x" + ] + } + }, + { + "names": [ + "clone" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 1, + "value": 2114060288, + "op": "SCMP_CMP_MASKED_EQ" + } + ], + "comment": "s390 parameter ordering for clone is different", + "includes": { + "arches": [ + "s390", + "s390x" + ] + }, + "excludes": { + "caps": [ + "CAP_SYS_ADMIN" + ] + } + }, + { + "names": [ + "clone3" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 38, + "excludes": { + "caps": [ + "CAP_SYS_ADMIN" + ] + } + }, + { + "names": [ + "reboot" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_BOOT" + ] + } + }, + { + "names": [ + "chroot" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_CHROOT" + ] + } + }, + { + "names": [ + "delete_module", + "init_module", + "finit_module" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_MODULE" + ] + } + }, + { + "names": [ + "acct" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_PACCT" + ] + } + }, + { + "names": [ + "kcmp", + "pidfd_getfd", + "process_madvise", + "process_vm_readv", + "process_vm_writev", + "ptrace" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_PTRACE" + ] + } + }, + { + "names": [ + "iopl", + "ioperm" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_RAWIO" + ] + } + }, + { + "names": [ + "settimeofday", + "stime", + "clock_settime", + "clock_settime64" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_TIME" + ] + } + }, + { + "names": [ + "vhangup" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_TTY_CONFIG" + ] + } + }, + { + "names": [ + "get_mempolicy", + "mbind", + "set_mempolicy", + "set_mempolicy_home_node" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_NICE" + ] + } + }, + { + "names": [ + "syslog" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYSLOG" + ] + } + }, + { + "names": [ + "bpf" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_BPF" + ] + } + }, + { + "names": [ + "perf_event_open" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_PERFMON" + ] + } + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 97, + "comment": "Defense in depth on top of CONFIG_NET_KEY=n.", + "args": [ + { + "index": 0, + "value": 15, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 97, + "comment": "Defense in depth on top of CONFIG_AF_RXRPC=n.", + "args": [ + { + "index": 0, + "value": 33, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 97, + "comment": "Defense in depth on top of CONFIG_MCTP=n", + "args": [ + { + "index": 0, + "value": 45, + "op": "SCMP_CMP_EQ" + } + ] + } + ] +} From f90002fb8e3e0d9b69b8ae7d0cbb086d36389505 Mon Sep 17 00:00:00 2001 From: shashial <10578726+shashial@users.noreply.github.com> Date: Tue, 12 May 2026 21:37:51 +0100 Subject: [PATCH 10/16] kernel: pin CONFIG_MCTP=n AF_MCTP (Management Component Transport Protocol) has no consumer on this image. Same rationale as the AF_RXRPC and RXKAD pins in e5c78b3: reduce kernel attack surface by not registering the family at all. Pairs with the SCMP_CMP_EQ 45 rule in the previous commit, which acts as a seccomp-layer tripwire if this kernel pin ever drifts back to =y. --- shared/kernel/config.d/01-sane-defaults | 1 + 1 file changed, 1 insertion(+) diff --git a/shared/kernel/config.d/01-sane-defaults b/shared/kernel/config.d/01-sane-defaults index 4afe4daf..03381481 100644 --- a/shared/kernel/config.d/01-sane-defaults +++ b/shared/kernel/config.d/01-sane-defaults @@ -215,6 +215,7 @@ CONFIG_MEMORY_BALLOON=y # CONFIG_ISO9660_FS is not set # CONFIG_KVM_XEN is not set # CONFIG_L2TP is not set +# CONFIG_MCTP is not set # CONFIG_NET_DEVMEM is not set # CONFIG_NET_FOU is not set # CONFIG_NET_IPGRE_DEMUX is not set From b250bc83bddf9075d7630653847b4743ecaa11ff Mon Sep 17 00:00:00 2001 From: shashial <10578726+shashial@users.noreply.github.com> Date: Mon, 18 May 2026 17:33:29 +0100 Subject: [PATCH 11/16] add more syscalls and kernel config options --- .../mkosi.extra/etc/containers/seccomp.json | 96 +++++++++++++++++++ shared/kernel/config.d/01-sane-defaults | 9 ++ 2 files changed, 105 insertions(+) diff --git a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json index a7560ced..4cffb445 100644 --- a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json +++ b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json @@ -915,6 +915,102 @@ "op": "SCMP_CMP_EQ" } ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 97, + "comment": "Block AF_KCM (41). sibling of the MSG_SPLICE_PAGES family that contains Fragnesia (XFRM ESP-in-TCP).", + "args": [ + { + "index": 0, + "value": 41, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 97, + "comment": "Block AF_QIPCRTR (42). Qualcomm IPC router", + "args": [ + { + "index": 0, + "value": 42, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 97, + "comment": "Block AF_SMC (43)", + "args": [ + { + "index": 0, + "value": 43, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 97, + "comment": "Defense in depth on top of CONFIG_XDP_SOCKETS=n: block AF_XDP (44).", + "args": [ + { + "index": 0, + "value": 44, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "pidfd_getfd" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 1, + "comment": "Block 25 cap-gates pidfd_getfd on CAP_SYS_PTRACE" + }, + { + "names": [ + "io_uring_setup", + "io_uring_enter", + "io_uring_register" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 38, + "comment": "Defense in depth on top of CONFIG_IO_URING=n" + }, + { + "names": [ + "userfaultfd" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 1, + "comment": "userfaultfd is a recurring race-window primitive in kernel LPEs (it lets userspace pause kernel-side page faults)" + }, + { + "names": [ + "add_key", + "request_key", + "keyctl" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 38, + "comment": "Kernel keyring API" } ] } diff --git a/shared/kernel/config.d/01-sane-defaults b/shared/kernel/config.d/01-sane-defaults index 03381481..a001ebc1 100644 --- a/shared/kernel/config.d/01-sane-defaults +++ b/shared/kernel/config.d/01-sane-defaults @@ -176,6 +176,7 @@ CONFIG_VIRTIO_MEM=y CONFIG_BALLOON_COMPACTION=y CONFIG_MEMORY_BALLOON=y +# CONFIG_AF_KCM is not set # CONFIG_AF_RXRPC is not set # CONFIG_BCACHEFS_FS is not set # CONFIG_BLK_DEV_RBD is not set @@ -244,8 +245,16 @@ CONFIG_MEMORY_BALLOON=y # CONFIG_TCP_CONG_VENO is not set # CONFIG_TCP_CONG_WESTWOOD is not set # CONFIG_TCP_CONG_YEAH is not set +# CONFIG_TCP_MD5SIG is not set # CONFIG_TIPC is not set # CONFIG_UDF_FS is not set +# CONFIG_USERFAULTFD is not set +# CONFIG_VDPA is not set +# CONFIG_VHOST is not set +# CONFIG_VHOST_NET is not set +# CONFIG_VHOST_SCSI is not set +# CONFIG_VHOST_VDPA is not set +# CONFIG_VHOST_VSOCK is not set # CONFIG_X86_ACPI_CPUFREQ_CPB is not set # CONFIG_X86_P4_CLOCKMOD is not set # CONFIG_X86_POWERNOW_K8 is not set From 240c33921d5d788ba837c654695d7c323064af0e Mon Sep 17 00:00:00 2001 From: shashial <10578726+shashial@users.noreply.github.com> Date: Mon, 18 May 2026 18:44:03 +0100 Subject: [PATCH 12/16] shrink rules --- .../mkosi.extra/etc/containers/seccomp.json | 78 +------------------ 1 file changed, 2 insertions(+), 76 deletions(-) diff --git a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json index 4cffb445..b96cf405 100644 --- a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json +++ b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json @@ -467,10 +467,11 @@ "socket" ], "action": "SCMP_ACT_ALLOW", + "comment": "Blocks AF_ALG (38), AF_VSOCK (40), AF_KCM (41), AF_QIPCRTR (42), AF_SMC (43), AF_XDP (44), AF_MCTP (45) via the default ERRNO.", "args": [ { "index": 0, - "value": 40, + "value": 45, "op": "SCMP_CMP_GT" } ] @@ -901,81 +902,6 @@ } ] }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 97, - "comment": "Defense in depth on top of CONFIG_MCTP=n", - "args": [ - { - "index": 0, - "value": 45, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 97, - "comment": "Block AF_KCM (41). sibling of the MSG_SPLICE_PAGES family that contains Fragnesia (XFRM ESP-in-TCP).", - "args": [ - { - "index": 0, - "value": 41, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 97, - "comment": "Block AF_QIPCRTR (42). Qualcomm IPC router", - "args": [ - { - "index": 0, - "value": 42, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 97, - "comment": "Block AF_SMC (43)", - "args": [ - { - "index": 0, - "value": 43, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 97, - "comment": "Defense in depth on top of CONFIG_XDP_SOCKETS=n: block AF_XDP (44).", - "args": [ - { - "index": 0, - "value": 44, - "op": "SCMP_CMP_EQ" - } - ] - }, { "names": [ "pidfd_getfd" From c6540416141fd31d8928e560b814f16b506be5f6 Mon Sep 17 00:00:00 2001 From: shashial <10578726+shashial@users.noreply.github.com> Date: Tue, 19 May 2026 17:22:15 +0100 Subject: [PATCH 13/16] add regression-check python script --- scripts/verify-hardening.py | 299 ++++++++++++++++++++++++++++++++++++ 1 file changed, 299 insertions(+) create mode 100755 scripts/verify-hardening.py diff --git a/scripts/verify-hardening.py b/scripts/verify-hardening.py new file mode 100755 index 00000000..38ceb4e4 --- /dev/null +++ b/scripts/verify-hardening.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python3 +""" +verify-hardening.py — post-build regression for flashbox hardening. + +Run inside the searcher container of a booted image. Exit 0 if all checks +pass, 1 on regression. + +This is a hand-maintained checklist, not a source-driven test. When you +add a seccomp rule or a kernel pin, ADD A CORRESPONDING CHECK BELOW. +The script is the regression spec — keeping it current is part of the +rule-addition workflow. + +Usage: + python3 verify-hardening.py [--json] + make verify-hardening TARGET=root@host:port # scp + ssh + run +""" + +import argparse +import ctypes +import ctypes.util +import errno +import json +import os +import socket +import sys + + +libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True) + + +# Syscall numbers for x86_64 (production) and aarch64 (podman-on-mac testbed). +_SYSNRS = { + "x86_64": {"swapon": 167, "userfaultfd": 323, "io_uring_setup": 425, + "io_uring_enter": 426, "io_uring_register": 427, + "pidfd_open": 434, "pidfd_getfd": 438, "add_key": 248, + "request_key": 249, "keyctl": 250, "unshare": 272, + "clone3": 435, "mount": 165, "bpf": 321}, + "aarch64": {"swapon": 224, "userfaultfd": 282, "io_uring_setup": 425, + "io_uring_enter": 426, "io_uring_register": 427, + "pidfd_open": 434, "pidfd_getfd": 438, "add_key": 217, + "request_key": 218, "keyctl": 219, "unshare": 97, + "clone3": 435, "mount": 40, "bpf": 280}, +} + + +def _detect_arch(): + try: + v = open("/proc/version").read() + return "aarch64" if ("aarch64" in v or "arm64" in v) else "x86_64" + except FileNotFoundError: + return "x86_64" + + +_ARCH = _detect_arch() +SYS = _SYSNRS[_ARCH] + +_USE_COLOR = sys.stdout.isatty() and os.environ.get("NO_COLOR") is None +_C = (lambda code, t: f"\033[{code}m{t}\033[0m") if _USE_COLOR else (lambda code, t: t) +PASS = _C("32", "PASS") +FAIL = _C("31", "FAIL") +INFO = _C("33", "INFO") + + +def _ename(n): + return errno.errorcode.get(n, str(n)) + + +def _syscall(nr, *args): + ctypes.set_errno(0) + r = libc.syscall(nr, *args) + return r, ctypes.get_errno() + + +class Recorder: + def __init__(self): + self.results = [] # (status, label, msg) + + def record(self, status, label, msg): + self.results.append((status, label, msg)) + print(f" {status} {label:46s} {msg}") + + def expect_deny(self, label, expected, indeterminate, fn): + try: + rc, e = fn() + except OSError as ex: + rc, e = -1, ex.errno + if rc is not None and rc >= 0: + self.record(FAIL, label, f"syscall succeeded (rc={rc}) — expected denial") + elif e in expected: + self.record(PASS, label, f"denied with {_ename(e)}") + elif e in indeterminate: + self.record(INFO, label, f"got {_ename(e)} — covered, layer indistinguishable") + else: + exp = ", ".join(_ename(x) for x in expected) + self.record(FAIL, label, f"got {_ename(e)}; expected {exp}") + + def expect_ok(self, label, fn): + try: + fn() + self.record(PASS, label, "ok") + except OSError as ex: + self.record(FAIL, label, f"{_ename(ex.errno)}: {ex.strerror}") + + def check_kallsyms_absent(self, kallsyms, label, sym): + present = (f" {sym}\n" in kallsyms) or (f"\t{sym}\n" in kallsyms) + if present: + self.record(FAIL, label, f"symbol {sym} PRESENT (pin not in effect)") + else: + self.record(PASS, label, f"symbol {sym} absent") + + def info(self, label, msg): + self.record(INFO, label, msg) + + +def _section(t): + print(f"\n=== {t} ===") + + +def _socket_probe(family): + def _(): + try: + socket.socket(family, socket.SOCK_RAW, 0).close() + return 0, 0 + except OSError as ex: + return -1, ex.errno + return _ + + +def main(): + ap = argparse.ArgumentParser(description="post-build hardening regression") + ap.add_argument("--json", action="store_true", help="machine-readable output") + args = ap.parse_args() + + r = Recorder() + print(f"Kernel arch detected from /proc/version: {_ARCH}") + + _section("Canary: seccomp filter active") + r.expect_deny("swapon (default-ERRNO)", (1,), (), + lambda: _syscall(SYS["swapon"], ctypes.c_char_p(b"/nonexistent"), 0)) + + _section("Sanity: legitimate syscalls still pass") + r.expect_ok("AF_INET socket open", + lambda: socket.socket(socket.AF_INET, socket.SOCK_STREAM).close()) + r.expect_ok("AF_UNIX socket open", + lambda: socket.socket(socket.AF_UNIX, socket.SOCK_STREAM).close()) + r.expect_ok("AF_NETLINK socket open", + lambda: socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, 0).close()) + r.expect_ok("pidfd_open(self,0)", + lambda: os.close(os.pidfd_open(os.getpid(), 0))) + + # Cap-gated escape primitives: container's CapEff (0x800405fb) excludes + # CAP_SYS_ADMIN; these must all deny. + _section("Sandbox boundaries (cap-gated denies)") + CLONE_NEWUSER, CLONE_NEWNET = 0x10000000, 0x40000000 + r.expect_deny("unshare(NEWUSER|NEWNET)", (1,), (), + lambda: _syscall(SYS["unshare"], CLONE_NEWUSER | CLONE_NEWNET)) + r.expect_deny("clone3(empty args)", (1, 22, 38), (38,), + lambda: _syscall(SYS["clone3"], 0, 0)) + r.expect_deny("mount(NULL, /, NULL, 0, NULL)", (1,), (), + lambda: _syscall(SYS["mount"], 0, ctypes.c_char_p(b"/"), 0, 0, 0)) + r.expect_deny("bpf(BPF_PROG_LOAD, ...)", (1,), (), + lambda: _syscall(SYS["bpf"], 5, 0, 0)) + + # EQ rules inside the arg0<38 ALLOW range. EAFNOSUPPORT is indeterminate + # because the kernel also has these families pinned off; the libseccomp + # regression section below pries the layers apart. + _section("Explicit EQ socket family denies") + r.expect_deny("AF_KEY (15)", (97,), (97,), _socket_probe(15)) + r.expect_deny("AF_RXRPC (33)", (97,), (97,), _socket_probe(33)) + + # Simplified block 4 is arg0>45; these fall to defaultErrnoRet=1. + _section("Gap denies (default-ERRNO)") + for fam, label in [(38, "AF_ALG (38)"), (40, "AF_VSOCK (40)"), + (41, "AF_KCM (41)"), (42, "AF_QIPCRTR (42)"), + (43, "AF_SMC (43)"), (44, "AF_XDP (44)"), + (45, "AF_MCTP (45)")]: + r.expect_deny(label, (1,), (97,), _socket_probe(fam)) + + # libseccomp NE/EQ chain-ordering bug regression. On a kernel with the + # family compiled in, EAFNOSUPPORT proves seccomp fired (not kernel). + _section("libseccomp chain-ordering regression") + try: + kallsyms = open("/proc/kallsyms").read() + except (FileNotFoundError, PermissionError): + kallsyms = "" + for fam, label, sym in [(15, "AF_KEY vs CONFIG_NET_KEY", "pfkey_create"), + (33, "AF_RXRPC vs CONFIG_AF_RXRPC", "rxrpc_create")]: + if sym in kallsyms: + r.expect_deny(f"{label} [kernel-present]", (97,), (), _socket_probe(fam)) + else: + try: + socket.socket(fam, socket.SOCK_RAW, 0).close() + r.record(FAIL, label, "syscall succeeded — regression") + except OSError as ex: + r.info(label, f"kernel-absent ({sym} not in kallsyms); got {_ename(ex.errno)}") + + _section("Syscall denies") + + def t_pidfd_getfd(): + pfd = os.pidfd_open(os.getpid(), 0) + try: + return _syscall(SYS["pidfd_getfd"], ctypes.c_int(pfd), + ctypes.c_int(0), ctypes.c_uint(0)) + finally: + os.close(pfd) + r.expect_deny("pidfd_getfd", (1,), (), t_pidfd_getfd) + r.expect_deny("io_uring_setup", (38,), (38,), + lambda: _syscall(SYS["io_uring_setup"], ctypes.c_uint(32), ctypes.c_void_p(0))) + r.expect_deny("io_uring_enter", (38,), (38,), + lambda: _syscall(SYS["io_uring_enter"], ctypes.c_int(-1), 0, 0, 0, 0, 0)) + r.expect_deny("io_uring_register", (38,), (38,), + lambda: _syscall(SYS["io_uring_register"], ctypes.c_int(-1), 0, 0, 0)) + r.expect_deny("userfaultfd", (1,), (), + lambda: _syscall(SYS["userfaultfd"], 0)) + r.expect_deny("keyctl", (38,), (38,), + lambda: _syscall(SYS["keyctl"], 0, 0, 0, 0, 0)) + r.expect_deny("add_key", (38,), (38,), + lambda: _syscall(SYS["add_key"], ctypes.c_char_p(b"user"), + ctypes.c_char_p(b"flashbox-test"), + ctypes.c_char_p(b"x"), ctypes.c_size_t(1), + ctypes.c_int(0))) + r.expect_deny("request_key", (38,), (38,), + lambda: _syscall(SYS["request_key"], ctypes.c_char_p(b"user"), + ctypes.c_char_p(b"flashbox-test"), 0, 0)) + + # Kernel-config pins, verified by absence of representative kallsyms symbol. + # NF_TABLES is overlaid back on by modules/flashbox/common/kernel/config.d/10-bob + # for iptables-nft; expected PRESENT on flashbox images. + _section("Deployed kernel-config pins (via /proc/kallsyms)") + if not kallsyms: + r.info("kallsyms unreadable", "skipping kernel-config audit") + else: + is_flashbox = "mkosi-cloud" in os.uname().release + nft_present = "nft_chain_validate" in kallsyms + if is_flashbox: + label = "CONFIG_NF_TABLES (flashbox: 10-bob =y)" + if nft_present: + r.info(label, "present as expected for iptables-nft") + else: + r.record(FAIL, label, "absent — would break iptables-nft") + else: + r.check_kallsyms_absent(kallsyms, "CONFIG_NF_TABLES=n", "nft_chain_validate") + + # Pins added on this branch + pre-existing pins worth regressing. + for cfg, sym in [ + ("CONFIG_AF_KCM=n", "kcm_create_basic"), + ("CONFIG_USERFAULTFD=n", "new_userfaultfd"), + ("CONFIG_TCP_MD5SIG=n", "tcp_md5_do_add"), + ("CONFIG_VHOST=n", "vhost_dev_init"), + ("CONFIG_VHOST_NET=n", "vhost_net_open"), + ("CONFIG_VHOST_VSOCK=n", "vhost_vsock_dev_open"), + ("CONFIG_VDPA=n", "vdpa_register_device"), + ("CONFIG_AF_RXRPC=n", "rxrpc_create"), + ("CONFIG_NET_KEY=n", "pfkey_create"), + ("CONFIG_MCTP=n", "mctp_init"), + ("CONFIG_XFRM=n", "xfrm_state_alloc"), + ("CONFIG_XFRM_ESPINTCP=n", "espintcp_init_sk"), + ("CONFIG_IO_URING=n", "__do_sys_io_uring_setup"), + ]: + r.check_kallsyms_absent(kallsyms, cfg, sym) + + # Tripwire: presence indicates the upstream ptrace fix landed and + # the in-tree backport (when we add it) can be dropped. + if "task_still_dumpable" in kallsyms: + r.info("__ptrace_may_access fix landed", + "task_still_dumpable PRESENT — drop in-tree backport") + else: + r.info("__ptrace_may_access fix not backported", + "seccomp pidfd_getfd deny is the only layer") + + _section("Summary") + n_pass = sum(1 for x in r.results if x[0] == PASS) + n_info = sum(1 for x in r.results if x[0] == INFO) + n_fail = sum(1 for x in r.results if x[0] == FAIL) + print(f" {n_pass} pass, {n_info} info, {n_fail} fail") + if n_fail: + print("\nFAIL details:") + for status, label, msg in r.results: + if status == FAIL: + print(f" • {label}: {msg}") + + if args.json: + def strip(s): + return s.replace("\033[32m", "").replace("\033[31m", "") \ + .replace("\033[33m", "").replace("\033[0m", "") + out = { + "arch": _ARCH, + "summary": {"pass": n_pass, "info": n_info, "fail": n_fail}, + "results": [{"status": strip(s), "label": l, "msg": m} + for s, l, m in r.results], + } + print("\n--- JSON ---") + print(json.dumps(out, indent=2)) + + return 0 if n_fail == 0 else 1 + + +if __name__ == "__main__": + sys.exit(main()) From d2380103b1ce239db0339069aa5f6f18d73e8934 Mon Sep 17 00:00:00 2001 From: 0x416e746f6e Date: Tue, 26 May 2026 13:24:51 +0200 Subject: [PATCH 14/16] fix: use `systemd-repart` from nix (cherry picked from commit 526af72bf7199aff9fe3e104bab3f2de458d3ef2) --- mkosi.profiles/gcp/mkosi.postoutput | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mkosi.profiles/gcp/mkosi.postoutput b/mkosi.profiles/gcp/mkosi.postoutput index ac95f434..c59f38e6 100755 --- a/mkosi.profiles/gcp/mkosi.postoutput +++ b/mkosi.profiles/gcp/mkosi.postoutput @@ -21,7 +21,14 @@ cp mkosi.profiles/gcp/repart.d/00-uki.conf "${REPART_TMPDIR}/00-uki.conf" echo "SizeMinBytes=${ESP_BYTES}" >> "${REPART_TMPDIR}/00-uki.conf" echo "SizeMaxBytes=${ESP_BYTES}" >> "${REPART_TMPDIR}/00-uki.conf" -systemd-repart --empty=create \ +# +# - default systemd-repart (in /usr/bin) is "systemd 252 (252.39-1~deb12u2)" +# - the one from nix (/nix/store/*-systemd-*/bin) is newer ("systemd 258 (258.3)" a.t.m.) +# - to use "--copy-source" CLI option we need a newer one +# - this hack ensures systemd-repart resolves to the nix one +# +PATH="${PATH#/usr/bin:/usr/sbin:}" systemd-repart \ + --empty=create \ --size=${DISK_GIB}G \ --definitions="${REPART_TMPDIR}" \ --copy-source=${OUTPUTDIR} \ From d04fde7d16a02a03e7e6cac800e0913d456aca35 Mon Sep 17 00:00:00 2001 From: 0x416e746f6e Date: Wed, 27 May 2026 12:57:15 +0200 Subject: [PATCH 15/16] chore: convert indentation back to tabs --- .../mkosi.extra/etc/containers/seccomp.json | 1880 ++++++++--------- 1 file changed, 940 insertions(+), 940 deletions(-) diff --git a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json index b96cf405..f36b2937 100644 --- a/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json +++ b/modules/flashbox/common/mkosi.extra/etc/containers/seccomp.json @@ -1,942 +1,942 @@ { - "defaultAction": "SCMP_ACT_ERRNO", - "defaultErrnoRet": 1, - "archMap": [ - { - "architecture": "SCMP_ARCH_X86_64", - "subArchitectures": [ - "SCMP_ARCH_X86", - "SCMP_ARCH_X32" - ] - }, - { - "architecture": "SCMP_ARCH_AARCH64", - "subArchitectures": [ - "SCMP_ARCH_ARM" - ] - }, - { - "architecture": "SCMP_ARCH_MIPS64", - "subArchitectures": [ - "SCMP_ARCH_MIPS", - "SCMP_ARCH_MIPS64N32" - ] - }, - { - "architecture": "SCMP_ARCH_MIPS64N32", - "subArchitectures": [ - "SCMP_ARCH_MIPS", - "SCMP_ARCH_MIPS64" - ] - }, - { - "architecture": "SCMP_ARCH_MIPSEL64", - "subArchitectures": [ - "SCMP_ARCH_MIPSEL", - "SCMP_ARCH_MIPSEL64N32" - ] - }, - { - "architecture": "SCMP_ARCH_MIPSEL64N32", - "subArchitectures": [ - "SCMP_ARCH_MIPSEL", - "SCMP_ARCH_MIPSEL64" - ] - }, - { - "architecture": "SCMP_ARCH_S390X", - "subArchitectures": [ - "SCMP_ARCH_S390" - ] - }, - { - "architecture": "SCMP_ARCH_RISCV64", - "subArchitectures": null - }, - { - "architecture": "SCMP_ARCH_LOONGARCH64", - "subArchitectures": null - } - ], - "syscalls": [ - { - "names": [ - "accept", - "accept4", - "access", - "adjtimex", - "alarm", - "bind", - "brk", - "cachestat", - "capget", - "capset", - "chdir", - "chmod", - "chown", - "chown32", - "clock_adjtime", - "clock_adjtime64", - "clock_getres", - "clock_getres_time64", - "clock_gettime", - "clock_gettime64", - "clock_nanosleep", - "clock_nanosleep_time64", - "close", - "close_range", - "connect", - "copy_file_range", - "creat", - "dup", - "dup2", - "dup3", - "epoll_create", - "epoll_create1", - "epoll_ctl", - "epoll_ctl_old", - "epoll_pwait", - "epoll_pwait2", - "epoll_wait", - "epoll_wait_old", - "eventfd", - "eventfd2", - "execve", - "execveat", - "exit", - "exit_group", - "faccessat", - "faccessat2", - "fadvise64", - "fadvise64_64", - "fallocate", - "fanotify_mark", - "fchdir", - "fchmod", - "fchmodat", - "fchmodat2", - "fchown", - "fchown32", - "fchownat", - "fcntl", - "fcntl64", - "fdatasync", - "fgetxattr", - "flistxattr", - "flock", - "fork", - "fremovexattr", - "fsetxattr", - "fstat", - "fstat64", - "fstatat64", - "fstatfs", - "fstatfs64", - "fsync", - "ftruncate", - "ftruncate64", - "futex", - "futex_requeue", - "futex_time64", - "futex_wait", - "futex_waitv", - "futex_wake", - "futimesat", - "getcpu", - "getcwd", - "getdents", - "getdents64", - "getegid", - "getegid32", - "geteuid", - "geteuid32", - "getgid", - "getgid32", - "getgroups", - "getgroups32", - "getitimer", - "getpeername", - "getpgid", - "getpgrp", - "getpid", - "getppid", - "getpriority", - "getrandom", - "getresgid", - "getresgid32", - "getresuid", - "getresuid32", - "getrlimit", - "get_robust_list", - "getrusage", - "getsid", - "getsockname", - "getsockopt", - "get_thread_area", - "gettid", - "gettimeofday", - "getuid", - "getuid32", - "getxattr", - "getxattrat", - "inotify_add_watch", - "inotify_init", - "inotify_init1", - "inotify_rm_watch", - "io_cancel", - "ioctl", - "io_destroy", - "io_getevents", - "io_pgetevents", - "io_pgetevents_time64", - "ioprio_get", - "ioprio_set", - "io_setup", - "io_submit", - "ipc", - "kill", - "landlock_add_rule", - "landlock_create_ruleset", - "landlock_restrict_self", - "lchown", - "lchown32", - "lgetxattr", - "link", - "linkat", - "listen", - "listmount", - "listxattr", - "listxattrat", - "llistxattr", - "_llseek", - "lremovexattr", - "lseek", - "lsetxattr", - "lstat", - "lstat64", - "madvise", - "map_shadow_stack", - "membarrier", - "memfd_create", - "memfd_secret", - "mincore", - "mkdir", - "mkdirat", - "mknod", - "mknodat", - "mlock", - "mlock2", - "mlockall", - "mmap", - "mmap2", - "mprotect", - "mq_getsetattr", - "mq_notify", - "mq_open", - "mq_timedreceive", - "mq_timedreceive_time64", - "mq_timedsend", - "mq_timedsend_time64", - "mq_unlink", - "mremap", - "mseal", - "msgctl", - "msgget", - "msgrcv", - "msgsnd", - "msync", - "munlock", - "munlockall", - "munmap", - "name_to_handle_at", - "nanosleep", - "newfstatat", - "_newselect", - "open", - "openat", - "openat2", - "pause", - "pidfd_open", - "pidfd_send_signal", - "pipe", - "pipe2", - "pkey_alloc", - "pkey_free", - "pkey_mprotect", - "poll", - "ppoll", - "ppoll_time64", - "prctl", - "pread64", - "preadv", - "preadv2", - "prlimit64", - "process_mrelease", - "pselect6", - "pselect6_time64", - "pwrite64", - "pwritev", - "pwritev2", - "read", - "readahead", - "readlink", - "readlinkat", - "readv", - "recv", - "recvfrom", - "recvmmsg", - "recvmmsg_time64", - "recvmsg", - "remap_file_pages", - "removexattr", - "removexattrat", - "rename", - "renameat", - "renameat2", - "restart_syscall", - "riscv_hwprobe", - "rmdir", - "rseq", - "rt_sigaction", - "rt_sigpending", - "rt_sigprocmask", - "rt_sigqueueinfo", - "rt_sigreturn", - "rt_sigsuspend", - "rt_sigtimedwait", - "rt_sigtimedwait_time64", - "rt_tgsigqueueinfo", - "sched_getaffinity", - "sched_getattr", - "sched_getparam", - "sched_get_priority_max", - "sched_get_priority_min", - "sched_getscheduler", - "sched_rr_get_interval", - "sched_rr_get_interval_time64", - "sched_setaffinity", - "sched_setattr", - "sched_setparam", - "sched_setscheduler", - "sched_yield", - "seccomp", - "select", - "semctl", - "semget", - "semop", - "semtimedop", - "semtimedop_time64", - "send", - "sendfile", - "sendfile64", - "sendmmsg", - "sendmsg", - "sendto", - "setfsgid", - "setfsgid32", - "setfsuid", - "setfsuid32", - "setgid", - "setgid32", - "setgroups", - "setgroups32", - "setitimer", - "setpgid", - "setpriority", - "setregid", - "setregid32", - "setresgid", - "setresgid32", - "setresuid", - "setresuid32", - "setreuid", - "setreuid32", - "setrlimit", - "set_robust_list", - "setsid", - "setsockopt", - "set_thread_area", - "set_tid_address", - "setuid", - "setuid32", - "setxattr", - "setxattrat", - "shmat", - "shmctl", - "shmdt", - "shmget", - "shutdown", - "sigaltstack", - "signalfd", - "signalfd4", - "sigprocmask", - "sigreturn", - "socketcall", - "socketpair", - "splice", - "stat", - "stat64", - "statfs", - "statfs64", - "statmount", - "statx", - "symlink", - "symlinkat", - "sync", - "sync_file_range", - "syncfs", - "sysinfo", - "tee", - "tgkill", - "time", - "timer_create", - "timer_delete", - "timer_getoverrun", - "timer_gettime", - "timer_gettime64", - "timer_settime", - "timer_settime64", - "timerfd_create", - "timerfd_gettime", - "timerfd_gettime64", - "timerfd_settime", - "timerfd_settime64", - "times", - "tkill", - "truncate", - "truncate64", - "ugetrlimit", - "umask", - "uname", - "unlink", - "unlinkat", - "uretprobe", - "utime", - "utimensat", - "utimensat_time64", - "utimes", - "vfork", - "vmsplice", - "wait4", - "waitid", - "waitpid", - "write", - "writev" - ], - "action": "SCMP_ACT_ALLOW" - }, - { - "names": [ - "process_vm_readv", - "process_vm_writev", - "ptrace" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "minKernel": "4.8" - } - }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 38, - "op": "SCMP_CMP_LT" - } - ] - }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 39, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ALLOW", - "comment": "Blocks AF_ALG (38), AF_VSOCK (40), AF_KCM (41), AF_QIPCRTR (42), AF_SMC (43), AF_XDP (44), AF_MCTP (45) via the default ERRNO.", - "args": [ - { - "index": 0, - "value": 45, - "op": "SCMP_CMP_GT" - } - ] - }, - { - "names": [ - "personality" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 0, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "personality" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 8, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "personality" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 131072, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "personality" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 131080, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "personality" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 4294967295, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "sync_file_range2", - "swapcontext" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "ppc64le" - ] - } - }, - { - "names": [ - "arm_fadvise64_64", - "arm_sync_file_range", - "sync_file_range2", - "breakpoint", - "cacheflush", - "set_tls" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "arm", - "arm64" - ] - } - }, - { - "names": [ - "arch_prctl" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "amd64", - "x32" - ] - } - }, - { - "names": [ - "modify_ldt" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "amd64", - "x32", - "x86" - ] - } - }, - { - "names": [ - "s390_pci_mmio_read", - "s390_pci_mmio_write", - "s390_runtime_instr" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "s390", - "s390x" - ] - } - }, - { - "names": [ - "riscv_flush_icache" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "arches": [ - "riscv64" - ] - } - }, - { - "names": [ - "open_by_handle_at" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_DAC_READ_SEARCH" - ] - } - }, - { - "names": [ - "bpf", - "clone", - "clone3", - "fanotify_init", - "fsconfig", - "fsmount", - "fsopen", - "fspick", - "lookup_dcookie", - "lsm_get_self_attr", - "lsm_list_modules", - "lsm_set_self_attr", - "mount", - "mount_setattr", - "move_mount", - "open_tree", - "perf_event_open", - "quotactl", - "quotactl_fd", - "setdomainname", - "sethostname", - "setns", - "syslog", - "umount", - "umount2", - "unshare" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_ADMIN" - ] - } - }, - { - "names": [ - "clone" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 0, - "value": 2114060288, - "op": "SCMP_CMP_MASKED_EQ" - } - ], - "excludes": { - "caps": [ - "CAP_SYS_ADMIN" - ], - "arches": [ - "s390", - "s390x" - ] - } - }, - { - "names": [ - "clone" - ], - "action": "SCMP_ACT_ALLOW", - "args": [ - { - "index": 1, - "value": 2114060288, - "op": "SCMP_CMP_MASKED_EQ" - } - ], - "comment": "s390 parameter ordering for clone is different", - "includes": { - "arches": [ - "s390", - "s390x" - ] - }, - "excludes": { - "caps": [ - "CAP_SYS_ADMIN" - ] - } - }, - { - "names": [ - "clone3" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 38, - "excludes": { - "caps": [ - "CAP_SYS_ADMIN" - ] - } - }, - { - "names": [ - "reboot" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_BOOT" - ] - } - }, - { - "names": [ - "chroot" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_CHROOT" - ] - } - }, - { - "names": [ - "delete_module", - "init_module", - "finit_module" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_MODULE" - ] - } - }, - { - "names": [ - "acct" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_PACCT" - ] - } - }, - { - "names": [ - "kcmp", - "pidfd_getfd", - "process_madvise", - "process_vm_readv", - "process_vm_writev", - "ptrace" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_PTRACE" - ] - } - }, - { - "names": [ - "iopl", - "ioperm" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_RAWIO" - ] - } - }, - { - "names": [ - "settimeofday", - "stime", - "clock_settime", - "clock_settime64" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_TIME" - ] - } - }, - { - "names": [ - "vhangup" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_TTY_CONFIG" - ] - } - }, - { - "names": [ - "get_mempolicy", - "mbind", - "set_mempolicy", - "set_mempolicy_home_node" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYS_NICE" - ] - } - }, - { - "names": [ - "syslog" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_SYSLOG" - ] - } - }, - { - "names": [ - "bpf" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_BPF" - ] - } - }, - { - "names": [ - "perf_event_open" - ], - "action": "SCMP_ACT_ALLOW", - "includes": { - "caps": [ - "CAP_PERFMON" - ] - } - }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 97, - "comment": "Defense in depth on top of CONFIG_NET_KEY=n.", - "args": [ - { - "index": 0, - "value": 15, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "socket" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 97, - "comment": "Defense in depth on top of CONFIG_AF_RXRPC=n.", - "args": [ - { - "index": 0, - "value": 33, - "op": "SCMP_CMP_EQ" - } - ] - }, - { - "names": [ - "pidfd_getfd" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 1, - "comment": "Block 25 cap-gates pidfd_getfd on CAP_SYS_PTRACE" - }, - { - "names": [ - "io_uring_setup", - "io_uring_enter", - "io_uring_register" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 38, - "comment": "Defense in depth on top of CONFIG_IO_URING=n" - }, - { - "names": [ - "userfaultfd" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 1, - "comment": "userfaultfd is a recurring race-window primitive in kernel LPEs (it lets userspace pause kernel-side page faults)" - }, - { - "names": [ - "add_key", - "request_key", - "keyctl" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 38, - "comment": "Kernel keyring API" - } - ] + "defaultAction": "SCMP_ACT_ERRNO", + "defaultErrnoRet": 1, + "archMap": [ + { + "architecture": "SCMP_ARCH_X86_64", + "subArchitectures": [ + "SCMP_ARCH_X86", + "SCMP_ARCH_X32" + ] + }, + { + "architecture": "SCMP_ARCH_AARCH64", + "subArchitectures": [ + "SCMP_ARCH_ARM" + ] + }, + { + "architecture": "SCMP_ARCH_MIPS64", + "subArchitectures": [ + "SCMP_ARCH_MIPS", + "SCMP_ARCH_MIPS64N32" + ] + }, + { + "architecture": "SCMP_ARCH_MIPS64N32", + "subArchitectures": [ + "SCMP_ARCH_MIPS", + "SCMP_ARCH_MIPS64" + ] + }, + { + "architecture": "SCMP_ARCH_MIPSEL64", + "subArchitectures": [ + "SCMP_ARCH_MIPSEL", + "SCMP_ARCH_MIPSEL64N32" + ] + }, + { + "architecture": "SCMP_ARCH_MIPSEL64N32", + "subArchitectures": [ + "SCMP_ARCH_MIPSEL", + "SCMP_ARCH_MIPSEL64" + ] + }, + { + "architecture": "SCMP_ARCH_S390X", + "subArchitectures": [ + "SCMP_ARCH_S390" + ] + }, + { + "architecture": "SCMP_ARCH_RISCV64", + "subArchitectures": null + }, + { + "architecture": "SCMP_ARCH_LOONGARCH64", + "subArchitectures": null + } + ], + "syscalls": [ + { + "names": [ + "accept", + "accept4", + "access", + "adjtimex", + "alarm", + "bind", + "brk", + "cachestat", + "capget", + "capset", + "chdir", + "chmod", + "chown", + "chown32", + "clock_adjtime", + "clock_adjtime64", + "clock_getres", + "clock_getres_time64", + "clock_gettime", + "clock_gettime64", + "clock_nanosleep", + "clock_nanosleep_time64", + "close", + "close_range", + "connect", + "copy_file_range", + "creat", + "dup", + "dup2", + "dup3", + "epoll_create", + "epoll_create1", + "epoll_ctl", + "epoll_ctl_old", + "epoll_pwait", + "epoll_pwait2", + "epoll_wait", + "epoll_wait_old", + "eventfd", + "eventfd2", + "execve", + "execveat", + "exit", + "exit_group", + "faccessat", + "faccessat2", + "fadvise64", + "fadvise64_64", + "fallocate", + "fanotify_mark", + "fchdir", + "fchmod", + "fchmodat", + "fchmodat2", + "fchown", + "fchown32", + "fchownat", + "fcntl", + "fcntl64", + "fdatasync", + "fgetxattr", + "flistxattr", + "flock", + "fork", + "fremovexattr", + "fsetxattr", + "fstat", + "fstat64", + "fstatat64", + "fstatfs", + "fstatfs64", + "fsync", + "ftruncate", + "ftruncate64", + "futex", + "futex_requeue", + "futex_time64", + "futex_wait", + "futex_waitv", + "futex_wake", + "futimesat", + "getcpu", + "getcwd", + "getdents", + "getdents64", + "getegid", + "getegid32", + "geteuid", + "geteuid32", + "getgid", + "getgid32", + "getgroups", + "getgroups32", + "getitimer", + "getpeername", + "getpgid", + "getpgrp", + "getpid", + "getppid", + "getpriority", + "getrandom", + "getresgid", + "getresgid32", + "getresuid", + "getresuid32", + "getrlimit", + "get_robust_list", + "getrusage", + "getsid", + "getsockname", + "getsockopt", + "get_thread_area", + "gettid", + "gettimeofday", + "getuid", + "getuid32", + "getxattr", + "getxattrat", + "inotify_add_watch", + "inotify_init", + "inotify_init1", + "inotify_rm_watch", + "io_cancel", + "ioctl", + "io_destroy", + "io_getevents", + "io_pgetevents", + "io_pgetevents_time64", + "ioprio_get", + "ioprio_set", + "io_setup", + "io_submit", + "ipc", + "kill", + "landlock_add_rule", + "landlock_create_ruleset", + "landlock_restrict_self", + "lchown", + "lchown32", + "lgetxattr", + "link", + "linkat", + "listen", + "listmount", + "listxattr", + "listxattrat", + "llistxattr", + "_llseek", + "lremovexattr", + "lseek", + "lsetxattr", + "lstat", + "lstat64", + "madvise", + "map_shadow_stack", + "membarrier", + "memfd_create", + "memfd_secret", + "mincore", + "mkdir", + "mkdirat", + "mknod", + "mknodat", + "mlock", + "mlock2", + "mlockall", + "mmap", + "mmap2", + "mprotect", + "mq_getsetattr", + "mq_notify", + "mq_open", + "mq_timedreceive", + "mq_timedreceive_time64", + "mq_timedsend", + "mq_timedsend_time64", + "mq_unlink", + "mremap", + "mseal", + "msgctl", + "msgget", + "msgrcv", + "msgsnd", + "msync", + "munlock", + "munlockall", + "munmap", + "name_to_handle_at", + "nanosleep", + "newfstatat", + "_newselect", + "open", + "openat", + "openat2", + "pause", + "pidfd_open", + "pidfd_send_signal", + "pipe", + "pipe2", + "pkey_alloc", + "pkey_free", + "pkey_mprotect", + "poll", + "ppoll", + "ppoll_time64", + "prctl", + "pread64", + "preadv", + "preadv2", + "prlimit64", + "process_mrelease", + "pselect6", + "pselect6_time64", + "pwrite64", + "pwritev", + "pwritev2", + "read", + "readahead", + "readlink", + "readlinkat", + "readv", + "recv", + "recvfrom", + "recvmmsg", + "recvmmsg_time64", + "recvmsg", + "remap_file_pages", + "removexattr", + "removexattrat", + "rename", + "renameat", + "renameat2", + "restart_syscall", + "riscv_hwprobe", + "rmdir", + "rseq", + "rt_sigaction", + "rt_sigpending", + "rt_sigprocmask", + "rt_sigqueueinfo", + "rt_sigreturn", + "rt_sigsuspend", + "rt_sigtimedwait", + "rt_sigtimedwait_time64", + "rt_tgsigqueueinfo", + "sched_getaffinity", + "sched_getattr", + "sched_getparam", + "sched_get_priority_max", + "sched_get_priority_min", + "sched_getscheduler", + "sched_rr_get_interval", + "sched_rr_get_interval_time64", + "sched_setaffinity", + "sched_setattr", + "sched_setparam", + "sched_setscheduler", + "sched_yield", + "seccomp", + "select", + "semctl", + "semget", + "semop", + "semtimedop", + "semtimedop_time64", + "send", + "sendfile", + "sendfile64", + "sendmmsg", + "sendmsg", + "sendto", + "setfsgid", + "setfsgid32", + "setfsuid", + "setfsuid32", + "setgid", + "setgid32", + "setgroups", + "setgroups32", + "setitimer", + "setpgid", + "setpriority", + "setregid", + "setregid32", + "setresgid", + "setresgid32", + "setresuid", + "setresuid32", + "setreuid", + "setreuid32", + "setrlimit", + "set_robust_list", + "setsid", + "setsockopt", + "set_thread_area", + "set_tid_address", + "setuid", + "setuid32", + "setxattr", + "setxattrat", + "shmat", + "shmctl", + "shmdt", + "shmget", + "shutdown", + "sigaltstack", + "signalfd", + "signalfd4", + "sigprocmask", + "sigreturn", + "socketcall", + "socketpair", + "splice", + "stat", + "stat64", + "statfs", + "statfs64", + "statmount", + "statx", + "symlink", + "symlinkat", + "sync", + "sync_file_range", + "syncfs", + "sysinfo", + "tee", + "tgkill", + "time", + "timer_create", + "timer_delete", + "timer_getoverrun", + "timer_gettime", + "timer_gettime64", + "timer_settime", + "timer_settime64", + "timerfd_create", + "timerfd_gettime", + "timerfd_gettime64", + "timerfd_settime", + "timerfd_settime64", + "times", + "tkill", + "truncate", + "truncate64", + "ugetrlimit", + "umask", + "uname", + "unlink", + "unlinkat", + "uretprobe", + "utime", + "utimensat", + "utimensat_time64", + "utimes", + "vfork", + "vmsplice", + "wait4", + "waitid", + "waitpid", + "write", + "writev" + ], + "action": "SCMP_ACT_ALLOW" + }, + { + "names": [ + "process_vm_readv", + "process_vm_writev", + "ptrace" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "minKernel": "4.8" + } + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 38, + "op": "SCMP_CMP_LT" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 39, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ALLOW", + "comment": "Blocks AF_ALG (38), AF_VSOCK (40), AF_KCM (41), AF_QIPCRTR (42), AF_SMC (43), AF_XDP (44), AF_MCTP (45) via the default ERRNO.", + "args": [ + { + "index": 0, + "value": 45, + "op": "SCMP_CMP_GT" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 0, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 8, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 131072, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 131080, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 4294967295, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "sync_file_range2", + "swapcontext" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "ppc64le" + ] + } + }, + { + "names": [ + "arm_fadvise64_64", + "arm_sync_file_range", + "sync_file_range2", + "breakpoint", + "cacheflush", + "set_tls" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "arm", + "arm64" + ] + } + }, + { + "names": [ + "arch_prctl" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "amd64", + "x32" + ] + } + }, + { + "names": [ + "modify_ldt" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "amd64", + "x32", + "x86" + ] + } + }, + { + "names": [ + "s390_pci_mmio_read", + "s390_pci_mmio_write", + "s390_runtime_instr" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "s390", + "s390x" + ] + } + }, + { + "names": [ + "riscv_flush_icache" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "arches": [ + "riscv64" + ] + } + }, + { + "names": [ + "open_by_handle_at" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_DAC_READ_SEARCH" + ] + } + }, + { + "names": [ + "bpf", + "clone", + "clone3", + "fanotify_init", + "fsconfig", + "fsmount", + "fsopen", + "fspick", + "lookup_dcookie", + "lsm_get_self_attr", + "lsm_list_modules", + "lsm_set_self_attr", + "mount", + "mount_setattr", + "move_mount", + "open_tree", + "perf_event_open", + "quotactl", + "quotactl_fd", + "setdomainname", + "sethostname", + "setns", + "syslog", + "umount", + "umount2", + "unshare" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_ADMIN" + ] + } + }, + { + "names": [ + "clone" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 2114060288, + "op": "SCMP_CMP_MASKED_EQ" + } + ], + "excludes": { + "caps": [ + "CAP_SYS_ADMIN" + ], + "arches": [ + "s390", + "s390x" + ] + } + }, + { + "names": [ + "clone" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 1, + "value": 2114060288, + "op": "SCMP_CMP_MASKED_EQ" + } + ], + "comment": "s390 parameter ordering for clone is different", + "includes": { + "arches": [ + "s390", + "s390x" + ] + }, + "excludes": { + "caps": [ + "CAP_SYS_ADMIN" + ] + } + }, + { + "names": [ + "clone3" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 38, + "excludes": { + "caps": [ + "CAP_SYS_ADMIN" + ] + } + }, + { + "names": [ + "reboot" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_BOOT" + ] + } + }, + { + "names": [ + "chroot" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_CHROOT" + ] + } + }, + { + "names": [ + "delete_module", + "init_module", + "finit_module" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_MODULE" + ] + } + }, + { + "names": [ + "acct" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_PACCT" + ] + } + }, + { + "names": [ + "kcmp", + "pidfd_getfd", + "process_madvise", + "process_vm_readv", + "process_vm_writev", + "ptrace" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_PTRACE" + ] + } + }, + { + "names": [ + "iopl", + "ioperm" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_RAWIO" + ] + } + }, + { + "names": [ + "settimeofday", + "stime", + "clock_settime", + "clock_settime64" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_TIME" + ] + } + }, + { + "names": [ + "vhangup" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_TTY_CONFIG" + ] + } + }, + { + "names": [ + "get_mempolicy", + "mbind", + "set_mempolicy", + "set_mempolicy_home_node" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYS_NICE" + ] + } + }, + { + "names": [ + "syslog" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_SYSLOG" + ] + } + }, + { + "names": [ + "bpf" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_BPF" + ] + } + }, + { + "names": [ + "perf_event_open" + ], + "action": "SCMP_ACT_ALLOW", + "includes": { + "caps": [ + "CAP_PERFMON" + ] + } + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 97, + "comment": "Defense in depth on top of CONFIG_NET_KEY=n.", + "args": [ + { + "index": 0, + "value": 15, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 97, + "comment": "Defense in depth on top of CONFIG_AF_RXRPC=n.", + "args": [ + { + "index": 0, + "value": 33, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "pidfd_getfd" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 1, + "comment": "Block 25 cap-gates pidfd_getfd on CAP_SYS_PTRACE" + }, + { + "names": [ + "io_uring_setup", + "io_uring_enter", + "io_uring_register" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 38, + "comment": "Defense in depth on top of CONFIG_IO_URING=n" + }, + { + "names": [ + "userfaultfd" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 1, + "comment": "userfaultfd is a recurring race-window primitive in kernel LPEs (it lets userspace pause kernel-side page faults)" + }, + { + "names": [ + "add_key", + "request_key", + "keyctl" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 38, + "comment": "Kernel keyring API" + } + ] } From 0f90ea8517aacfcca45ff8fe08eab714eaaab937 Mon Sep 17 00:00:00 2001 From: 0x416e746f6e Date: Thu, 28 May 2026 13:39:39 +0200 Subject: [PATCH 16/16] chore: alight op-rbuilder version --- .../l2/op-rbuilder/mkosi.extra/etc/flashbots/op-rbuilder.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/l2/op-rbuilder/mkosi.extra/etc/flashbots/op-rbuilder.yaml b/modules/l2/op-rbuilder/mkosi.extra/etc/flashbots/op-rbuilder.yaml index 92eb2589..f3f03ff2 100644 --- a/modules/l2/op-rbuilder/mkosi.extra/etc/flashbots/op-rbuilder.yaml +++ b/modules/l2/op-rbuilder/mkosi.extra/etc/flashbots/op-rbuilder.yaml @@ -1,7 +1,7 @@ node_healthchecker: git_reference: v0.1.11 op_rbuilder: - git_reference: op-rbuilder/v0.4.4 + git_reference: op-rbuilder/v0.4.6 rproxy: git_reference: v0.0.11 rust: