Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions csrc/binding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ int supervisor_main(int sock_fd);
namespace nb = nanobind;


void do_bench(int result_fd, int input_fd, const std::string& kernel_qualname, const nb::object& test_generator,
void do_bench(int result_fd, int input_fd, int supervisor_sock_fd, const std::string& kernel_qualname, const nb::object& test_generator,
const nb::dict& test_kwargs, std::uintptr_t stream, bool discard, bool nvtx, bool landlock, bool mseal,
int supervisor_sock_fd) {
bool allow_root) {
std::vector<char> signature_bytes(32);
auto config = read_benchmark_parameters(input_fd, signature_bytes.data());
auto mgr = make_benchmark_manager(result_fd, signature_bytes, config.Seed, discard, nvtx, landlock, mseal, supervisor_sock_fd);
auto mgr = make_benchmark_manager(result_fd, signature_bytes, config.Seed, discard, nvtx, landlock, mseal, allow_root, supervisor_sock_fd);
cleanse(signature_bytes.data(), 32);

{
Expand Down Expand Up @@ -54,6 +54,7 @@ NB_MODULE(_pygpubench, m) {
m.def("do_bench", do_bench,
nb::arg("result_fd"),
nb::arg("input_fd"),
nb::arg("supervisor_sock_fd"),
nb::arg("kernel_qualname"),
nb::arg("test_generator"),
nb::arg("test_kwargs"),
Expand All @@ -62,7 +63,7 @@ NB_MODULE(_pygpubench, m) {
nb::arg("nvtx") = false,
nb::arg("landlock") = true,
nb::arg("mseal") = true,
nb::arg("supervisor_sock_fd") = -1 // -1 = seccomp disabled
nb::arg("allow_root") = false
);

m.def("run_supervisor", [](int sock_fd) {
Expand Down
25 changes: 24 additions & 1 deletion csrc/landlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,27 @@ void setup_seccomp_filter(scmp_filter_ctx ctx) {
"block prctl(SET_PTRACER)");
}

void install_seccomp_filter() {
/// Even with dumpable=0, if we are running as root we can open /proc/self/mem.
/// This function tests accessing that file; only if we explicitly opt-in running as root
/// does it allow keeping the file accessible (useful for testing)
static void validate_proc_self_mem(bool allow_root) {
int fd = open("/proc/self/mem", O_RDONLY);
if (fd >= 0) {
close(fd);
if (!allow_root)
throw std::runtime_error("/proc/self/mem is readable: Running as root?");
else {
fprintf(stderr, "WARNING: /proc/self/mem is readable: Running as root?\n");
fflush(stderr);
}
} else if (errno == EACCES || errno == EPERM) {
// good, can't access /proc/self/mem
} else {
throw std::system_error(errno, std::system_category(), "open(/proc/self/mem)");
}
}

void install_seccomp_filter(bool allow_root) {
scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_ALLOW);
if (!ctx) throw std::runtime_error("seccomp_init failed");
try {
Expand All @@ -228,6 +248,9 @@ void install_seccomp_filter() {
throw std::system_error(errno, std::system_category(), "prctl(PR_SET_DUMPABLE)");
}

// check that /proc/self/mem is protected
validate_proc_self_mem(allow_root);

// Prevent gaining privileges (if attacker tries setuid exploits)
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
throw std::system_error(errno, std::system_category(), "prctl(PR_SET_NO_NEW_PRIVS)");
Expand Down
11 changes: 6 additions & 5 deletions csrc/manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ extern void install_landlock();
extern bool mseal_supported();
extern void seal_mappings();
extern bool supports_seccomp_notify();
extern void install_seccomp_filter();
extern void install_seccomp_filter(bool allow_root);
extern void seccomp_install_memory_notify(int supervisor_sock, uintptr_t lo, uintptr_t hi);

static void check_check_approx_match_dispatch(unsigned* result, void* expected_data, nb::dlpack::dtype expected_type,
Expand Down Expand Up @@ -139,7 +139,7 @@ void BenchmarkManagerDeleter::operator()(BenchmarkManager* p) const noexcept {

BenchmarkManagerPtr make_benchmark_manager(
int result_fd, const std::vector<char>& signature, std::uint64_t seed,
bool discard, bool nvtx, bool landlock, bool mseal, int supervisor_socket)
bool discard, bool nvtx, bool landlock, bool mseal, bool allow_root, int supervisor_socket)
{
const std::size_t page_size = static_cast<std::size_t>(getpagesize());
const std::size_t alloc_size = (BenchmarkManagerArenaSize + page_size - 1) & ~(page_size - 1);
Expand All @@ -155,7 +155,7 @@ BenchmarkManagerPtr make_benchmark_manager(
raw = new (mem) BenchmarkManager(
static_cast<std::byte*>(mem), alloc_size,
result_fd, signature, seed,
discard, nvtx, landlock, mseal, supervisor_socket);
discard, nvtx, landlock, mseal, allow_root, supervisor_socket);
} catch (...) {
// If construction throws, release the mmap'd region before propagating.
if (munmap(mem, alloc_size) != 0) {
Expand All @@ -170,7 +170,7 @@ BenchmarkManagerPtr make_benchmark_manager(

BenchmarkManager::BenchmarkManager(std::byte* arena, std::size_t arena_size,
int result_fd, const std::vector<char>& signature, std::uint64_t seed, bool discard,
bool nvtx, bool landlock, bool mseal, int supervisor_socket)
bool nvtx, bool landlock, bool mseal, bool allow_root, int supervisor_socket)
: mArena(arena),
mResource(arena + sizeof(BenchmarkManager),
arena_size - sizeof(BenchmarkManager),
Expand Down Expand Up @@ -203,6 +203,7 @@ BenchmarkManager::BenchmarkManager(std::byte* arena, std::size_t arena_size,
mNVTXEnabled = nvtx;
mLandlock = landlock;
mSeal = mseal;
mAllowRoot = allow_root;
mDiscardCache = discard;
mSeed = seed;
std::random_device rd;
Expand Down Expand Up @@ -343,7 +344,7 @@ void BenchmarkManager::install_protections() {
seal_mappings();
}

install_seccomp_filter();
install_seccomp_filter(mAllowRoot);
}

static void setup_seccomp(int sock, bool install_notify, std::uintptr_t lo, std::uintptr_t hi) {
Expand Down
7 changes: 4 additions & 3 deletions csrc/manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ using BenchmarkManagerPtr = std::unique_ptr<BenchmarkManager, BenchmarkManagerDe

BenchmarkManagerPtr make_benchmark_manager(
int result_fd, const std::vector<char>& signature, std::uint64_t seed,
bool discard, bool nvtx, bool landlock, bool mseal, int supervisor_socket);
bool discard, bool nvtx, bool landlock, bool mseal, bool allow_root, int supervisor_socket);


class BenchmarkManager {
Expand All @@ -53,14 +53,14 @@ class BenchmarkManager {
void send_report();
void clean_up();
private:
friend BenchmarkManagerPtr make_benchmark_manager(int result_fd, const std::vector<char>& signature, std::uint64_t seed, bool discard, bool nvtx, bool landlock, bool mseal, int supervisor_socket);
friend BenchmarkManagerPtr make_benchmark_manager(int result_fd, const std::vector<char>& signature, std::uint64_t seed, bool discard, bool nvtx, bool landlock, bool mseal, bool allow_root, int supervisor_socket);
friend BenchmarkManagerDeleter;
/// `arena` is the mmap region that owns all memory for this object and its vectors.
/// The BenchmarkManager must have been placement-newed into the front of that region;
/// the rest is used as a monotonic PMR arena for internal vectors.
BenchmarkManager(std::byte* arena, std::size_t arena_size,
int result_fd, const std::vector<char>& signature, std::uint64_t seed,
bool discard, bool nvtx, bool landlock, bool mseal, int supervisor_socket);
bool discard, bool nvtx, bool landlock, bool mseal, bool allow_root, int supervisor_socket);
~BenchmarkManager();

struct Expected {
Expand Down Expand Up @@ -110,6 +110,7 @@ class BenchmarkManager {
bool mDiscardCache = true;
bool mLandlock = true;
bool mSeal = true;
bool mAllowRoot = false;
int mSupervisorSock = -1;
std::uint64_t mSeed = -1;
std::pmr::vector<Expected> mExpectedOutputs;
Expand Down
98 changes: 87 additions & 11 deletions exploits/run_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,18 @@

Usage:
cd exploits/
python run_all.py # run all submission_*.py tests
python run_all.py # run all submission_*.py tests
python run_all.py submission_gc_nan # single test (no extension)
python run_all.py submission_gc_nan.py # single test (with extension)
python run_all.py --repeats 50 --size 256
python run_all.py --no-discard --no-landlock --allow-root
"""
import glob
import os
import sys
import time
import traceback
import argparse

# Ensure this directory is importable by subprocess (for spawn context)
_dir = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -60,10 +63,77 @@ def classify_result(module, has_errors, ran_full, valid_times, stats):
return "CHEAT FAILED (detected)"


def parse_args():
parser = argparse.ArgumentParser(
description="PyGPUBench Exploit Test Suite",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)

# ── Positional / filter ───────────────────────────────────────────────
parser.add_argument(
"filter",
nargs="?",
default=None,
metavar="TEST",
help="Run only tests whose module name contains this string "
"(with or without .py extension). Omit to run all tests.",
)

# ── Test configuration ────────────────────────────────────────────────
bench = parser.add_argument_group("benchmark configuration")
bench.add_argument(
"--repeats", type=int, default=100, metavar="N",
help="Number of benchmark repetitions per kernel.",
)
bench.add_argument(
"--seed", type=int, default=42,
help="RNG seed passed to the test-case generator.",
)
bench.add_argument(
"--size", type=int, default=512,
help="Image size (pixels) passed to the test-case generator.",
)

# ── do_bench_isolated keyword arguments ───────────────────────────────
iso = parser.add_argument_group(
"isolation / sandbox options",
description="Keyword arguments forwarded verbatim to "
"pygpubench.do_bench_isolated().",
)
iso.add_argument(
"--no-discard", dest="discard", action="store_false", default=True,
help="Disable discarding the first timing sample (warm-up run).",
)
iso.add_argument(
"--nvtx", action="store_true", default=False,
help="Enable NVTX range annotations for profiling.",
)
iso.add_argument(
"--timeout", type=int, default=300, metavar="SECONDS",
help="Per-test wall-clock timeout in seconds.",
)
iso.add_argument(
"--no-landlock", dest="landlock", action="store_false", default=True,
help="Disable the landlock filesystem sandbox.",
)
iso.add_argument(
"--no-mseal", dest="mseal", action="store_false", default=True,
help="Disable mseal memory-sealing protection.",
)
iso.add_argument(
"--allow-root", action="store_true", default=False,
help="Allow the benchmark to run as the root user.",
)

return parser.parse_args()


def main():
import multiprocessing
multiprocessing.freeze_support()

args = parse_args()

# ── Preflight checks ──────────────────────────────────────────────────
print("=" * 70)
print("PyGPUBench Exploit Test Suite")
Expand All @@ -89,19 +159,25 @@ def main():
from benchmark import generate_test_case

# ── Test configuration ────────────────────────────────────────────────
REPEATS = 100
SEED = 42
SIZE = 512 # smaller for faster tests
TEST_ARGS = {"size": SIZE}
REPEATS = args.repeats
SEED = args.seed
TEST_ARGS = {"size": args.size}

# Keyword arguments forwarded to do_bench_isolated
bench_kwargs = dict(
discard = args.discard,
nvtx = args.nvtx,
timeout = args.timeout,
landlock = args.landlock,
mseal = args.mseal,
allow_root = args.allow_root,
)

# ── Discover tests ────────────────────────────────────────────────────
all_modules = discover_tests()

if len(sys.argv) > 1:
# Allow specifying with or without .py extension
filter_name = sys.argv[1]
if filter_name.endswith(".py"):
filter_name = filter_name[:-3]
if args.filter is not None:
filter_name = args.filter.removesuffix(".py")
matched = [m for m in all_modules if filter_name in m]
if not matched:
print(f"No test matching '{filter_name}'")
Expand Down Expand Up @@ -137,7 +213,7 @@ def main():
try:
t0 = time.time()
res = pygpubench.do_bench_isolated(
f"{module}.kernel", generate_test_case, TEST_ARGS, REPEATS, SEED, discard=True
f"{module}.kernel", generate_test_case, TEST_ARGS, REPEATS, SEED, **bench_kwargs,
)
elapsed = time.time() - t0

Expand Down
8 changes: 6 additions & 2 deletions python/pygpubench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
def _do_bench_impl(out_fd: "multiprocessing.connection.Connection", in_fd: "multiprocessing.connection.Connection", supervisor_sock: "socket.socket",
qualname: str, test_generator: TestGeneratorInterface,
test_args: dict, stream: int = None, discard: bool = True,
nvtx: bool = False, tb_conn: "multiprocessing.connection.Connection" = None, landlock=True, mseal=True):
nvtx: bool = False, tb_conn: "multiprocessing.connection.Connection" = None, landlock=True, mseal=True, allow_root=False):
"""
Benchmarks the kernel referred to by `qualname` against the test case returned by `test_generator`.
:param out_fd: Writable file descriptor to which benchmark results are written.
Expand All @@ -45,6 +45,7 @@ def _do_bench_impl(out_fd: "multiprocessing.connection.Connection", in_fd: "mult
:param tb_conn: A connection to a multiprocessing pipe for sending tracebacks to the parent process.
:param landlock: Whether to enable landlock. Enabled by default, prevents write access to the file system outside /tmp.
:param mseal: Whether to enable memory sealing. Enabled by default, prevents making executable mappings writable.
:param allow_root: Whether to allow the benchmark to run as root (opt-in via ``allow_root=True``). When run as root, the benchmark process's memory can be read through /proc/self/mem despite being protected.
"""
if stream is None:
import torch
Expand All @@ -55,6 +56,7 @@ def _do_bench_impl(out_fd: "multiprocessing.connection.Connection", in_fd: "mult
_pygpubench.do_bench(
out_fd.fileno(),
in_fd.fileno(),
supervisor_sock.fileno(),
qualname,
test_generator,
test_args,
Expand All @@ -63,7 +65,7 @@ def _do_bench_impl(out_fd: "multiprocessing.connection.Connection", in_fd: "mult
nvtx,
landlock,
mseal,
supervisor_sock.fileno(),
allow_root,
)
except BaseException:
if tb_conn is not None:
Expand Down Expand Up @@ -154,6 +156,7 @@ def do_bench_isolated(
timeout: int = 300,
landlock = True,
mseal = True,
allow_root = False,
) -> BenchmarkResult:
"""
Runs kernel benchmark (`do_bench_impl`) in a subprocess for proper isolation.
Expand Down Expand Up @@ -200,6 +203,7 @@ def do_bench_isolated(
child_tb_conn,
landlock,
mseal,
allow_root,
),
)

Expand Down
4 changes: 2 additions & 2 deletions test/grayscale.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ def generate_test_case(**kwargs):
kernels = ["valid_custom_kernel_eager", "valid_custom_kernel_compiled", "valid_custom_kernel_stream"]
for kernel in kernels:
print(kernel)
res = pygpubench.do_bench_isolated(f"submission.{kernel}", generate_test_case, {"size": 1024}, 100, 5, discard=True, landlock=False, mseal=False)
res = pygpubench.do_bench_isolated(f"submission.{kernel}", generate_test_case, {"size": 1024}, 100, 5, discard=True, landlock=False, mseal=False, allow_root=True)
print("❌" if not res.success else "✅", pygpubench.basic_stats(res.time_us))
broken = ["wrong_custom_kernel_backward_race", "wrong_custom_kernel_forward_race"]
for kernel in broken:
print(kernel)
res = pygpubench.do_bench_isolated(f"submission.{kernel}", generate_test_case, {"size": 1024}, 100, 5, discard=True, landlock=False, mseal=False)
res = pygpubench.do_bench_isolated(f"submission.{kernel}", generate_test_case, {"size": 1024}, 100, 5, discard=True, landlock=False, mseal=False, allow_root=True)
print("❌" if not res.success else "✅",pygpubench.basic_stats(res.time_us))
print("done")
Loading