Skip to content

Commit e112a3d

Browse files
committed
Add rlimits
1 parent d2aee98 commit e112a3d

4 files changed

Lines changed: 200 additions & 10 deletions

File tree

AGENTS.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,14 @@ Environment variables:
8585
## Isolation and Safety
8686
- The execution occurs in a temporary directory under `/tmp` built by copying the current workspace.
8787
- The service runs commands in a separate process group for reliable termination.
88+
- On Linux, each run is executed in new namespaces (PID, mount, IPC, UTS, NET) and drops to UID/GID 10001.
89+
- Network is disabled by placing the run in a new network namespace with no interfaces.
90+
- Resource limits (rlimits) are applied in the sandboxed child:
91+
- `RLIMIT_CPU`: equals the request timeout (rounded up to whole seconds).
92+
- `RLIMIT_NOFILE`: 256 open files.
93+
- `RLIMIT_NPROC`: 256 processes/threads.
94+
- `RLIMIT_CORE`: 0 (no core dumps).
95+
- Rationale for `256` limits: high enough for typical compilers/builds while preventing FD/process exhaustion; these can be tuned in code if needed.
8896

8997
## Runtime Notes
9098
- The service is stateless; each request creates a fresh temp workspace and cleans it up after execution.

Containerfile

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,14 @@ ARG RUN_INPUT_MAX
5555
ARG RUN_OUTPUT_MAX
5656
ARG DEBUG
5757
ARG ALLOW_SHUTDOWN
58-
RUN adduser -S -u 10001 app
59-
COPY --from=builder /build/zig-out/bin/runner-zig /app/codebattle_runner
58+
RUN apk add --no-cache make
59+
COPY --from=builder /build /app
60+
RUN cp /app/zig-out/bin/runner-zig /app/codebattle_runner
6061
ENV PORT=$PORT \
6162
RUN_CONCURRENCY=$RUN_CONCURRENCY \
6263
RUN_INPUT_MAX=$RUN_INPUT_MAX \
6364
RUN_OUTPUT_MAX=$RUN_OUTPUT_MAX \
6465
DEBUG=$DEBUG \
6566
ALLOW_SHUTDOWN=$ALLOW_SHUTDOWN
66-
USER app
67-
6867
EXPOSE 4040
6968
ENTRYPOINT ["/app/codebattle_runner"]

Makefile

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ PLATFORMS ?= linux/amd64,linux/arm64
44
CONTAINER ?= docker
55
TEST_IMAGE ?= runner-zig-test
66

7-
.PHONY: build lint lint-fix test test-unit test-integration container-build container-push curl-local-health curl-local-test start
7+
.PHONY: build lint lint-fix test test-unit test-integration container-build container-push container-start curl-local-health curl-local-test start
88

99
## Build multi-arch image directly for GHCR
1010
build:
@@ -38,7 +38,7 @@ test-integration:
3838
sleep 1; \
3939
done; \
4040
$(MAKE) --no-print-directory curl-local-health && \
41-
seq 1 40 | xargs -n1 -P40 sh -c 'curl -sS -o /dev/null -w "%{http_code}\n" http://localhost:4040/run -H "content-type: application/json" -d @test-payload.json'; \
41+
seq 1 60 | xargs -n1 -P60 sh -c 'curl -sS -o /dev/null -w "%{http_code}\n" http://localhost:4040/run -H "content-type: application/json" -d @test-payload.json'; \
4242
if [ $$container_started -eq 1 ]; then $(CONTAINER) stop $$container_name >/dev/null 2>&1 || true; fi
4343

4444
## Build and push multi-arch image (linux/amd64 + linux/arm64) for GHCR
@@ -63,6 +63,14 @@ container-push:
6363
--tag $(IMAGE):$(TAG) \
6464
.
6565

66+
## Start the container locally on port 4040
67+
container-start:
68+
$(CONTAINER) run --rm -p 4040:4040 \
69+
--cap-add=SYS_ADMIN \
70+
--cap-add=SYS_CHROOT \
71+
--security-opt=no-new-privileges=false \
72+
$(IMAGE):$(TAG)
73+
6674
## Quick local smoke check against the running server
6775
curl-local-health:
6876
@curl -fsS http://localhost:4040/health && echo

src/main.zig

Lines changed: 179 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
const std = @import("std");
2+
const builtin = @import("builtin");
23

34
pub const std_options: std.Options = .{
45
.log_level = .info,
@@ -547,8 +548,11 @@ fn makeTempDir(allocator: Allocator) !TempDir {
547548
std.mem.copyForwards(u8, name_buf[7..], &suffix);
548549
const name = name_buf[0..];
549550

550-
const status = try tmp_parent.makePathStatus(name);
551-
if (status == .existed) continue;
551+
std.posix.mkdirat(tmp_parent.fd, name, 0o777) catch |err| switch (err) {
552+
error.PathAlreadyExists => continue,
553+
else => return err,
554+
};
555+
_ = std.posix.fchmodat(tmp_parent.fd, name, 0o777, 0) catch {};
552556

553557
const dir = try tmp_parent.openDir(name, .{ .iterate = true });
554558
const path = try tmp_parent.realpathAlloc(allocator, name);
@@ -572,10 +576,14 @@ fn copyDirRecursive(src: std.fs.Dir, dst: std.fs.Dir, ignore: []const []const u8
572576

573577
switch (entry.kind) {
574578
.directory => {
575-
try dst.makePath(entry.name);
579+
std.posix.mkdirat(dst.fd, entry.name, 0o777) catch |err| switch (err) {
580+
error.PathAlreadyExists => {},
581+
else => return err,
582+
};
583+
_ = std.posix.fchmodat(dst.fd, entry.name, 0o777, 0) catch {};
576584
var src_child = try src.openDir(entry.name, .{ .iterate = true });
577585
defer src_child.close();
578-
var dst_child = try dst.openDir(entry.name, .{});
586+
var dst_child = try dst.openDir(entry.name, .{ .iterate = true });
579587
defer dst_child.close();
580588
try copyDirRecursive(src_child, dst_child, ignore);
581589
},
@@ -640,6 +648,19 @@ fn runCommand(
640648
timeout_ns: u64,
641649
output_max: usize,
642650
argv: []const []const u8,
651+
) !RunResult {
652+
if (builtin.os.tag == .linux) {
653+
return runCommandSandboxed(allocator, cwd_path, timeout_ns, output_max, argv);
654+
}
655+
return runCommandSimple(allocator, cwd_path, timeout_ns, output_max, argv);
656+
}
657+
658+
fn runCommandSimple(
659+
allocator: Allocator,
660+
cwd_path: []const u8,
661+
timeout_ns: u64,
662+
output_max: usize,
663+
argv: []const []const u8,
643664
) !RunResult {
644665
var child = std.process.Child.init(argv, allocator);
645666
child.cwd = cwd_path;
@@ -719,6 +740,160 @@ fn runCommand(
719740
};
720741
}
721742

743+
fn runCommandSandboxed(
744+
allocator: Allocator,
745+
cwd_path: []const u8,
746+
timeout_ns: u64,
747+
output_max: usize,
748+
argv: []const []const u8,
749+
) !RunResult {
750+
const linux = std.os.linux;
751+
var argv_storage = try allocator.alloc([:0]u8, argv.len);
752+
defer {
753+
for (argv_storage) |item| allocator.free(item);
754+
allocator.free(argv_storage);
755+
}
756+
var argv_z = try allocator.allocSentinel(?[*:0]const u8, argv.len, null);
757+
defer allocator.free(argv_z);
758+
for (argv, 0..) |arg, i| {
759+
argv_storage[i] = try allocator.dupeZ(u8, arg);
760+
argv_z[i] = argv_storage[i].ptr;
761+
}
762+
argv_z[argv.len] = null;
763+
764+
const env_in = std.os.environ;
765+
var envp = try allocator.allocSentinel(?[*:0]const u8, env_in.len, null);
766+
defer allocator.free(envp);
767+
for (env_in, 0..) |item, i| envp[i] = item;
768+
envp[env_in.len] = null;
769+
770+
const stdout_pipe = try std.posix.pipe();
771+
const stderr_pipe = try std.posix.pipe();
772+
773+
const pid = try std.posix.fork();
774+
if (pid == 0) {
775+
std.posix.close(stdout_pipe[0]);
776+
std.posix.close(stderr_pipe[0]);
777+
_ = std.posix.setpgid(0, 0) catch {};
778+
779+
try unshareNamespaces();
780+
781+
const child_pid = try std.posix.fork();
782+
if (child_pid == 0) {
783+
_ = std.posix.dup2(stdout_pipe[1], std.posix.STDOUT_FILENO) catch {};
784+
_ = std.posix.dup2(stderr_pipe[1], std.posix.STDERR_FILENO) catch {};
785+
const devnull = std.posix.open("/dev/null", .{ .ACCMODE = .RDONLY }, 0) catch -1;
786+
if (devnull >= 0) {
787+
_ = std.posix.dup2(devnull, std.posix.STDIN_FILENO) catch {};
788+
std.posix.close(devnull);
789+
}
790+
std.posix.close(stdout_pipe[1]);
791+
std.posix.close(stderr_pipe[1]);
792+
793+
std.posix.chdir(cwd_path) catch {};
794+
_ = std.posix.setgid(10001) catch {};
795+
_ = std.posix.setuid(10001) catch {};
796+
_ = linux.prctl(@intFromEnum(linux.PR.SET_NO_NEW_PRIVS), 1, 0, 0, 0);
797+
applyRlimits(timeout_ns) catch {};
798+
799+
const argv_ptr = @as([*:null]const ?[*:0]const u8, @ptrCast(argv_z.ptr));
800+
const envp_ptr = @as([*:null]const ?[*:0]const u8, @ptrCast(envp.ptr));
801+
_ = std.posix.execvpeZ(argv_z[0].?, argv_ptr, envp_ptr) catch {};
802+
std.posix.exit(127);
803+
}
804+
805+
const wait_result = std.posix.waitpid(child_pid, 0);
806+
const exit_code = decodeExitCode(wait_result.status) orelse 255;
807+
std.posix.exit(@as(u8, @intCast(@min(exit_code, 255))));
808+
}
809+
810+
std.posix.close(stdout_pipe[1]);
811+
std.posix.close(stderr_pipe[1]);
812+
813+
var stdout: std.ArrayList(u8) = .empty;
814+
var stderr: std.ArrayList(u8) = .empty;
815+
defer stdout.deinit(allocator);
816+
defer stderr.deinit(allocator);
817+
818+
var out_truncated = false;
819+
var err_truncated = false;
820+
var out_ctx = ReadPipeCtx{
821+
.file = std.fs.File{ .handle = stdout_pipe[0] },
822+
.list = &stdout,
823+
.allocator = allocator,
824+
.max_bytes = output_max,
825+
.truncated = &out_truncated,
826+
};
827+
var err_ctx = ReadPipeCtx{
828+
.file = std.fs.File{ .handle = stderr_pipe[0] },
829+
.list = &stderr,
830+
.allocator = allocator,
831+
.max_bytes = output_max,
832+
.truncated = &err_truncated,
833+
};
834+
835+
var out_thread = try std.Thread.spawn(.{}, readPipe, .{&out_ctx});
836+
var err_thread = try std.Thread.spawn(.{}, readPipe, .{&err_ctx});
837+
838+
var wait_state = WaitState{};
839+
var waiter = try std.Thread.spawn(.{}, waiterThread, .{ pid, &wait_state });
840+
841+
var exit_code: ?i32 = null;
842+
var timed_out = false;
843+
844+
wait_state.mutex.lock();
845+
if (!wait_state.done) {
846+
wait_state.cond.timedWait(&wait_state.mutex, timeout_ns) catch |err| switch (err) {
847+
error.Timeout => timed_out = true,
848+
};
849+
}
850+
if (!timed_out and wait_state.done) {
851+
exit_code = decodeExitCode(wait_state.status);
852+
}
853+
wait_state.mutex.unlock();
854+
855+
if (timed_out) {
856+
std.log.warn("run timed out; killing process group", .{});
857+
_ = std.posix.kill(-pid, std.posix.SIG.KILL) catch {};
858+
wait_state.mutex.lock();
859+
while (!wait_state.done) {
860+
wait_state.cond.wait(&wait_state.mutex);
861+
}
862+
wait_state.mutex.unlock();
863+
exit_code = null;
864+
}
865+
866+
out_thread.join();
867+
err_thread.join();
868+
waiter.join();
869+
870+
if (out_truncated or err_truncated) {
871+
return error.OutputTooLarge;
872+
}
873+
874+
return RunResult{
875+
.exit_code = exit_code,
876+
.stdout = try stdout.toOwnedSlice(allocator),
877+
.stderr = try stderr.toOwnedSlice(allocator),
878+
};
879+
}
880+
881+
fn unshareNamespaces() !void {
882+
const linux = std.os.linux;
883+
const flags = linux.CLONE.NEWNS | linux.CLONE.NEWPID | linux.CLONE.NEWIPC | linux.CLONE.NEWUTS | linux.CLONE.NEWNET;
884+
if (linux.unshare(flags) != 0) return error.UnshareFailed;
885+
const root: [:0]const u8 = "/";
886+
if (linux.mount(null, root.ptr, null, linux.MS.REC | linux.MS.PRIVATE, 0) != 0) return error.MountPrivateFailed;
887+
}
888+
889+
fn applyRlimits(timeout_ns: u64) !void {
890+
const cpu_seconds: u64 = @max(1, @divTrunc(timeout_ns + std.time.ns_per_s - 1, std.time.ns_per_s));
891+
_ = std.posix.setrlimit(.CPU, .{ .cur = cpu_seconds, .max = cpu_seconds }) catch {};
892+
_ = std.posix.setrlimit(.NOFILE, .{ .cur = 256, .max = 256 }) catch {};
893+
_ = std.posix.setrlimit(.NPROC, .{ .cur = 256, .max = 256 }) catch {};
894+
_ = std.posix.setrlimit(.CORE, .{ .cur = 0, .max = 0 }) catch {};
895+
}
896+
722897
fn decodeExitCode(status: u32) ?i32 {
723898
if (std.posix.W.IFEXITED(status)) {
724899
return @as(i32, @intCast(std.posix.W.EXITSTATUS(status)));

0 commit comments

Comments
 (0)