diff --git a/csrc/binding.cpp b/csrc/binding.cpp index 493d5c1..48dd074 100644 --- a/csrc/binding.cpp +++ b/csrc/binding.cpp @@ -16,12 +16,12 @@ int supervisor_main(int sock_fd); namespace nb = nanobind; -void do_bench(int result_fd, int input_fd, const std::string& kernel_qualname, const nb::object& test_generator, +void do_bench(int result_fd, int input_fd, int supervisor_sock_fd, const std::string& kernel_qualname, const nb::object& test_generator, const nb::dict& test_kwargs, std::uintptr_t stream, bool discard, bool nvtx, bool landlock, bool mseal, - int supervisor_sock_fd) { + bool allow_root) { std::vector signature_bytes(32); auto config = read_benchmark_parameters(input_fd, signature_bytes.data()); - auto mgr = make_benchmark_manager(result_fd, signature_bytes, config.Seed, discard, nvtx, landlock, mseal, supervisor_sock_fd); + auto mgr = make_benchmark_manager(result_fd, signature_bytes, config.Seed, discard, nvtx, landlock, mseal, allow_root, supervisor_sock_fd); cleanse(signature_bytes.data(), 32); { @@ -54,6 +54,7 @@ NB_MODULE(_pygpubench, m) { m.def("do_bench", do_bench, nb::arg("result_fd"), nb::arg("input_fd"), + nb::arg("supervisor_sock_fd"), nb::arg("kernel_qualname"), nb::arg("test_generator"), nb::arg("test_kwargs"), @@ -62,7 +63,7 @@ NB_MODULE(_pygpubench, m) { nb::arg("nvtx") = false, nb::arg("landlock") = true, nb::arg("mseal") = true, - nb::arg("supervisor_sock_fd") = -1 // -1 = seccomp disabled + nb::arg("allow_root") = false ); m.def("run_supervisor", [](int sock_fd) { diff --git a/csrc/landlock.cpp b/csrc/landlock.cpp index 916c82d..cec8ba6 100644 --- a/csrc/landlock.cpp +++ b/csrc/landlock.cpp @@ -213,7 +213,27 @@ void setup_seccomp_filter(scmp_filter_ctx ctx) { "block prctl(SET_PTRACER)"); } -void install_seccomp_filter() { +/// Even with dumpable=0, if we are running as root we can open /proc/self/mem. +/// This function tests accessing that file; only if we explicitly opt-in running as root +/// does it allow keeping the file accessible (useful for testing) +static void validate_proc_self_mem(bool allow_root) { + int fd = open("/proc/self/mem", O_RDONLY); + if (fd >= 0) { + close(fd); + if (!allow_root) + throw std::runtime_error("/proc/self/mem is readable: Running as root?"); + else { + fprintf(stderr, "WARNING: /proc/self/mem is readable: Running as root?\n"); + fflush(stderr); + } + } else if (errno == EACCES || errno == EPERM) { + // good, can't access /proc/self/mem + } else { + throw std::system_error(errno, std::system_category(), "open(/proc/self/mem)"); + } +} + +void install_seccomp_filter(bool allow_root) { scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW); if (!ctx) throw std::runtime_error("seccomp_init failed"); try { @@ -228,6 +248,9 @@ void install_seccomp_filter() { throw std::system_error(errno, std::system_category(), "prctl(PR_SET_DUMPABLE)"); } + // check that /proc/self/mem is protected + validate_proc_self_mem(allow_root); + // Prevent gaining privileges (if attacker tries setuid exploits) if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { throw std::system_error(errno, std::system_category(), "prctl(PR_SET_NO_NEW_PRIVS)"); diff --git a/csrc/manager.cpp b/csrc/manager.cpp index f0e70d6..10ddb63 100644 --- a/csrc/manager.cpp +++ b/csrc/manager.cpp @@ -31,7 +31,7 @@ extern void install_landlock(); extern bool mseal_supported(); extern void seal_mappings(); extern bool supports_seccomp_notify(); -extern void install_seccomp_filter(); +extern void install_seccomp_filter(bool allow_root); extern void seccomp_install_memory_notify(int supervisor_sock, uintptr_t lo, uintptr_t hi); static void check_check_approx_match_dispatch(unsigned* result, void* expected_data, nb::dlpack::dtype expected_type, @@ -139,7 +139,7 @@ void BenchmarkManagerDeleter::operator()(BenchmarkManager* p) const noexcept { BenchmarkManagerPtr make_benchmark_manager( int result_fd, const std::vector& signature, std::uint64_t seed, - bool discard, bool nvtx, bool landlock, bool mseal, int supervisor_socket) + bool discard, bool nvtx, bool landlock, bool mseal, bool allow_root, int supervisor_socket) { const std::size_t page_size = static_cast(getpagesize()); const std::size_t alloc_size = (BenchmarkManagerArenaSize + page_size - 1) & ~(page_size - 1); @@ -155,7 +155,7 @@ BenchmarkManagerPtr make_benchmark_manager( raw = new (mem) BenchmarkManager( static_cast(mem), alloc_size, result_fd, signature, seed, - discard, nvtx, landlock, mseal, supervisor_socket); + discard, nvtx, landlock, mseal, allow_root, supervisor_socket); } catch (...) { // If construction throws, release the mmap'd region before propagating. if (munmap(mem, alloc_size) != 0) { @@ -170,7 +170,7 @@ BenchmarkManagerPtr make_benchmark_manager( BenchmarkManager::BenchmarkManager(std::byte* arena, std::size_t arena_size, int result_fd, const std::vector& signature, std::uint64_t seed, bool discard, - bool nvtx, bool landlock, bool mseal, int supervisor_socket) + bool nvtx, bool landlock, bool mseal, bool allow_root, int supervisor_socket) : mArena(arena), mResource(arena + sizeof(BenchmarkManager), arena_size - sizeof(BenchmarkManager), @@ -203,6 +203,7 @@ BenchmarkManager::BenchmarkManager(std::byte* arena, std::size_t arena_size, mNVTXEnabled = nvtx; mLandlock = landlock; mSeal = mseal; + mAllowRoot = allow_root; mDiscardCache = discard; mSeed = seed; std::random_device rd; @@ -343,7 +344,7 @@ void BenchmarkManager::install_protections() { seal_mappings(); } - install_seccomp_filter(); + install_seccomp_filter(mAllowRoot); } static void setup_seccomp(int sock, bool install_notify, std::uintptr_t lo, std::uintptr_t hi) { diff --git a/csrc/manager.h b/csrc/manager.h index 28bb8d1..5a05de9 100644 --- a/csrc/manager.h +++ b/csrc/manager.h @@ -43,7 +43,7 @@ using BenchmarkManagerPtr = std::unique_ptr& signature, std::uint64_t seed, - bool discard, bool nvtx, bool landlock, bool mseal, int supervisor_socket); + bool discard, bool nvtx, bool landlock, bool mseal, bool allow_root, int supervisor_socket); class BenchmarkManager { @@ -53,14 +53,14 @@ class BenchmarkManager { void send_report(); void clean_up(); private: - friend BenchmarkManagerPtr make_benchmark_manager(int result_fd, const std::vector& signature, std::uint64_t seed, bool discard, bool nvtx, bool landlock, bool mseal, int supervisor_socket); + friend BenchmarkManagerPtr make_benchmark_manager(int result_fd, const std::vector& signature, std::uint64_t seed, bool discard, bool nvtx, bool landlock, bool mseal, bool allow_root, int supervisor_socket); friend BenchmarkManagerDeleter; /// `arena` is the mmap region that owns all memory for this object and its vectors. /// The BenchmarkManager must have been placement-newed into the front of that region; /// the rest is used as a monotonic PMR arena for internal vectors. BenchmarkManager(std::byte* arena, std::size_t arena_size, int result_fd, const std::vector& signature, std::uint64_t seed, - bool discard, bool nvtx, bool landlock, bool mseal, int supervisor_socket); + bool discard, bool nvtx, bool landlock, bool mseal, bool allow_root, int supervisor_socket); ~BenchmarkManager(); struct Expected { @@ -110,6 +110,7 @@ class BenchmarkManager { bool mDiscardCache = true; bool mLandlock = true; bool mSeal = true; + bool mAllowRoot = false; int mSupervisorSock = -1; std::uint64_t mSeed = -1; std::pmr::vector mExpectedOutputs; diff --git a/exploits/run_all.py b/exploits/run_all.py index 4e6a73f..a56d1d7 100644 --- a/exploits/run_all.py +++ b/exploits/run_all.py @@ -7,15 +7,18 @@ Usage: cd exploits/ - python run_all.py # run all submission_*.py tests + python run_all.py # run all submission_*.py tests python run_all.py submission_gc_nan # single test (no extension) python run_all.py submission_gc_nan.py # single test (with extension) + python run_all.py --repeats 50 --size 256 + python run_all.py --no-discard --no-landlock --allow-root """ import glob import os import sys import time import traceback +import argparse # Ensure this directory is importable by subprocess (for spawn context) _dir = os.path.dirname(os.path.abspath(__file__)) @@ -60,10 +63,77 @@ def classify_result(module, has_errors, ran_full, valid_times, stats): return "CHEAT FAILED (detected)" +def parse_args(): + parser = argparse.ArgumentParser( + description="PyGPUBench Exploit Test Suite", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + # ── Positional / filter ─────────────────────────────────────────────── + parser.add_argument( + "filter", + nargs="?", + default=None, + metavar="TEST", + help="Run only tests whose module name contains this string " + "(with or without .py extension). Omit to run all tests.", + ) + + # ── Test configuration ──────────────────────────────────────────────── + bench = parser.add_argument_group("benchmark configuration") + bench.add_argument( + "--repeats", type=int, default=100, metavar="N", + help="Number of benchmark repetitions per kernel.", + ) + bench.add_argument( + "--seed", type=int, default=42, + help="RNG seed passed to the test-case generator.", + ) + bench.add_argument( + "--size", type=int, default=512, + help="Image size (pixels) passed to the test-case generator.", + ) + + # ── do_bench_isolated keyword arguments ─────────────────────────────── + iso = parser.add_argument_group( + "isolation / sandbox options", + description="Keyword arguments forwarded verbatim to " + "pygpubench.do_bench_isolated().", + ) + iso.add_argument( + "--no-discard", dest="discard", action="store_false", default=True, + help="Disable discarding the first timing sample (warm-up run).", + ) + iso.add_argument( + "--nvtx", action="store_true", default=False, + help="Enable NVTX range annotations for profiling.", + ) + iso.add_argument( + "--timeout", type=int, default=300, metavar="SECONDS", + help="Per-test wall-clock timeout in seconds.", + ) + iso.add_argument( + "--no-landlock", dest="landlock", action="store_false", default=True, + help="Disable the landlock filesystem sandbox.", + ) + iso.add_argument( + "--no-mseal", dest="mseal", action="store_false", default=True, + help="Disable mseal memory-sealing protection.", + ) + iso.add_argument( + "--allow-root", action="store_true", default=False, + help="Allow the benchmark to run as the root user.", + ) + + return parser.parse_args() + + def main(): import multiprocessing multiprocessing.freeze_support() + args = parse_args() + # ── Preflight checks ────────────────────────────────────────────────── print("=" * 70) print("PyGPUBench Exploit Test Suite") @@ -89,19 +159,25 @@ def main(): from benchmark import generate_test_case # ── Test configuration ──────────────────────────────────────────────── - REPEATS = 100 - SEED = 42 - SIZE = 512 # smaller for faster tests - TEST_ARGS = {"size": SIZE} + REPEATS = args.repeats + SEED = args.seed + TEST_ARGS = {"size": args.size} + + # Keyword arguments forwarded to do_bench_isolated + bench_kwargs = dict( + discard = args.discard, + nvtx = args.nvtx, + timeout = args.timeout, + landlock = args.landlock, + mseal = args.mseal, + allow_root = args.allow_root, + ) # ── Discover tests ──────────────────────────────────────────────────── all_modules = discover_tests() - if len(sys.argv) > 1: - # Allow specifying with or without .py extension - filter_name = sys.argv[1] - if filter_name.endswith(".py"): - filter_name = filter_name[:-3] + if args.filter is not None: + filter_name = args.filter.removesuffix(".py") matched = [m for m in all_modules if filter_name in m] if not matched: print(f"No test matching '{filter_name}'") @@ -137,7 +213,7 @@ def main(): try: t0 = time.time() res = pygpubench.do_bench_isolated( - f"{module}.kernel", generate_test_case, TEST_ARGS, REPEATS, SEED, discard=True + f"{module}.kernel", generate_test_case, TEST_ARGS, REPEATS, SEED, **bench_kwargs, ) elapsed = time.time() - t0 diff --git a/python/pygpubench/__init__.py b/python/pygpubench/__init__.py index 6a81b21..46abda1 100644 --- a/python/pygpubench/__init__.py +++ b/python/pygpubench/__init__.py @@ -32,7 +32,7 @@ def _do_bench_impl(out_fd: "multiprocessing.connection.Connection", in_fd: "multiprocessing.connection.Connection", supervisor_sock: "socket.socket", qualname: str, test_generator: TestGeneratorInterface, test_args: dict, stream: int = None, discard: bool = True, - nvtx: bool = False, tb_conn: "multiprocessing.connection.Connection" = None, landlock=True, mseal=True): + nvtx: bool = False, tb_conn: "multiprocessing.connection.Connection" = None, landlock=True, mseal=True, allow_root=False): """ Benchmarks the kernel referred to by `qualname` against the test case returned by `test_generator`. :param out_fd: Writable file descriptor to which benchmark results are written. @@ -45,6 +45,7 @@ def _do_bench_impl(out_fd: "multiprocessing.connection.Connection", in_fd: "mult :param tb_conn: A connection to a multiprocessing pipe for sending tracebacks to the parent process. :param landlock: Whether to enable landlock. Enabled by default, prevents write access to the file system outside /tmp. :param mseal: Whether to enable memory sealing. Enabled by default, prevents making executable mappings writable. + :param allow_root: Whether to allow the benchmark to run as root (opt-in via ``allow_root=True``). When run as root, the benchmark process's memory can be read through /proc/self/mem despite being protected. """ if stream is None: import torch @@ -55,6 +56,7 @@ def _do_bench_impl(out_fd: "multiprocessing.connection.Connection", in_fd: "mult _pygpubench.do_bench( out_fd.fileno(), in_fd.fileno(), + supervisor_sock.fileno(), qualname, test_generator, test_args, @@ -63,7 +65,7 @@ def _do_bench_impl(out_fd: "multiprocessing.connection.Connection", in_fd: "mult nvtx, landlock, mseal, - supervisor_sock.fileno(), + allow_root, ) except BaseException: if tb_conn is not None: @@ -154,6 +156,7 @@ def do_bench_isolated( timeout: int = 300, landlock = True, mseal = True, + allow_root = False, ) -> BenchmarkResult: """ Runs kernel benchmark (`do_bench_impl`) in a subprocess for proper isolation. @@ -200,6 +203,7 @@ def do_bench_isolated( child_tb_conn, landlock, mseal, + allow_root, ), ) diff --git a/test/grayscale.py b/test/grayscale.py index 21aa41b..5e48006 100644 --- a/test/grayscale.py +++ b/test/grayscale.py @@ -40,11 +40,11 @@ def generate_test_case(**kwargs): kernels = ["valid_custom_kernel_eager", "valid_custom_kernel_compiled", "valid_custom_kernel_stream"] for kernel in kernels: print(kernel) - res = pygpubench.do_bench_isolated(f"submission.{kernel}", generate_test_case, {"size": 1024}, 100, 5, discard=True, landlock=False, mseal=False) + res = pygpubench.do_bench_isolated(f"submission.{kernel}", generate_test_case, {"size": 1024}, 100, 5, discard=True, landlock=False, mseal=False, allow_root=True) print("❌" if not res.success else "✅", pygpubench.basic_stats(res.time_us)) broken = ["wrong_custom_kernel_backward_race", "wrong_custom_kernel_forward_race"] for kernel in broken: print(kernel) - res = pygpubench.do_bench_isolated(f"submission.{kernel}", generate_test_case, {"size": 1024}, 100, 5, discard=True, landlock=False, mseal=False) + res = pygpubench.do_bench_isolated(f"submission.{kernel}", generate_test_case, {"size": 1024}, 100, 5, discard=True, landlock=False, mseal=False, allow_root=True) print("❌" if not res.success else "✅",pygpubench.basic_stats(res.time_us)) print("done")