diff --git a/src/runtime/forkipc.c b/src/runtime/forkipc.c index a2cfe9c..963cb61 100644 --- a/src/runtime/forkipc.c +++ b/src/runtime/forkipc.c @@ -360,6 +360,23 @@ int fork_child_main(int ipc_fd, #define LINUX_CLONE_CHILD_SETTID 0x01000000 /* LINUX_SIGCHLD defined in syscall_signal.h (included above) */ +/* Namespace flags. elfuse implements no namespace isolation. Both + * sys_clone and sys_clone3 reject them. + */ +#define LINUX_CLONE_NEWTIME 0x00000080 +#define LINUX_CLONE_NEWNS 0x00020000 +#define LINUX_CLONE_NEWCGROUP 0x02000000 +#define LINUX_CLONE_NEWUTS 0x04000000 +#define LINUX_CLONE_NEWIPC 0x08000000 +#define LINUX_CLONE_NEWUSER 0x10000000 +#define LINUX_CLONE_NEWPID 0x20000000 +#define LINUX_CLONE_NEWNET 0x40000000 + +#define LINUX_CLONE3_NS_FLAGS \ + (LINUX_CLONE_NEWNS | LINUX_CLONE_NEWCGROUP | LINUX_CLONE_NEWUTS | \ + LINUX_CLONE_NEWIPC | LINUX_CLONE_NEWUSER | LINUX_CLONE_NEWPID | \ + LINUX_CLONE_NEWNET | LINUX_CLONE_NEWTIME) + /* CLONE_THREAD: create a new guest thread in the same VM. */ /* Arguments passed to the worker pthread. Allocated by sys_clone_thread, freed @@ -1069,6 +1086,14 @@ int64_t sys_clone(hv_vcpu_t vcpu, uint64_t ctid_gva, bool verbose) { + /* Namespaces are not implemented. CLONE_NEWTIME (0x80) lives in the CSIGNAL + * low byte and, like CLONE_INTO_CGROUP (bit 33) and set_tid, cannot be + * conveyed through clone(2) at all, so only the higher namespace bits are + * reachable here. + */ + if ((flags & ~(uint64_t) 0xff) & LINUX_CLONE3_NS_FLAGS) + return -LINUX_EINVAL; + /* CLONE_THREAD: create a new thread in the same VM (not a new process) */ if (flags & LINUX_CLONE_THREAD) { return sys_clone_thread(vcpu, g, flags, child_stack, stack_map_start, @@ -1507,22 +1532,9 @@ struct linux_clone_args { #define CLONE_ARGS_SIZE_VER0 64 /* v5.3: first 8 fields (flags..tls) */ -/* Unsupported clone3 flags: reject early rather than silently ignoring. */ +/* Unsupported clone3-only flags: reject early rather than silently ignoring. */ #define LINUX_CLONE_PIDFD 0x00001000 #define LINUX_CLONE_INTO_CGROUP 0x200000000ULL -#define LINUX_CLONE_NEWNS 0x00020000 -#define LINUX_CLONE_NEWCGROUP 0x02000000 -#define LINUX_CLONE_NEWUTS 0x04000000 -#define LINUX_CLONE_NEWIPC 0x08000000 -#define LINUX_CLONE_NEWUSER 0x10000000 -#define LINUX_CLONE_NEWPID 0x20000000 -#define LINUX_CLONE_NEWNET 0x40000000 -#define LINUX_CLONE_NEWTIME 0x00000080 - -#define LINUX_CLONE3_NS_FLAGS \ - (LINUX_CLONE_NEWNS | LINUX_CLONE_NEWCGROUP | LINUX_CLONE_NEWUTS | \ - LINUX_CLONE_NEWIPC | LINUX_CLONE_NEWUSER | LINUX_CLONE_NEWPID | \ - LINUX_CLONE_NEWNET | LINUX_CLONE_NEWTIME) int64_t sys_clone3(hv_vcpu_t vcpu, guest_t *g, diff --git a/tests/test-clone3.c b/tests/test-clone3.c index dc679b9..c23c18b 100644 --- a/tests/test-clone3.c +++ b/tests/test-clone3.c @@ -658,6 +658,35 @@ static void test_partial_deferred_stack_munmap(void) munmap(reuse_stack, stack_size); } +/* Test 15: legacy clone(2) rejects CLONE_NEW* namespace flags with EINVAL, + * matching clone3 (issue #44). Before the fix these flags fell through to a + * plain fork that falsely appeared to succeed. CLONE_NEWTIME is omitted: it + * lives in the CSIGNAL low byte and is not reachable through clone(2). + */ +static void test_legacy_clone_namespaces(void) +{ + static const struct { + unsigned long flag; + const char *name; + } ns_flags[] = { + {0x00020000, "CLONE_NEWNS"}, {0x02000000, "CLONE_NEWCGROUP"}, + {0x04000000, "CLONE_NEWUTS"}, {0x08000000, "CLONE_NEWIPC"}, + {0x10000000, "CLONE_NEWUSER"}, {0x20000000, "CLONE_NEWPID"}, + {0x40000000, "CLONE_NEWNET"}, + }; + for (size_t i = 0; i < sizeof(ns_flags) / sizeof(ns_flags[0]); i++) { + /* SIGCHLD (17) in the low byte makes this a fork-like clone. */ + long ret = raw_clone(ns_flags[i].flag | 17, NULL, NULL, 0, NULL); + CHECK(ret == -22 /* EINVAL */, + "clone(%s) returned %ld (expected -EINVAL)", ns_flags[i].name, + ret); + if (ret == 0) /* defensive: a leaked child must not run the suite */ + raw_syscall1(__NR_exit, 0); + else if (ret > 0) + raw_syscall4(__NR_wait4, ret, 0, 0, 0); + } +} + int main(int argc, char **argv) { if (argc > 1 && !strcmp(argv[1], "--clone3-vfork-child")) @@ -687,6 +716,7 @@ int main(int argc, char **argv) test_vfork_exec_unblocks_parent(); test_deferred_stack_munmap(); test_partial_deferred_stack_munmap(); + test_legacy_clone_namespaces(); SUMMARY("test-clone3"); return fails > 0 ? 1 : 0;