diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 52e953c..cd4ea6e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -113,3 +113,34 @@ jobs: - name: Run Python tests working-directory: python run: pytest tests/ -v + + go: + name: Go SDK (${{ matrix.runner }}) + runs-on: ${{ matrix.runner }} + needs: rust + strategy: + fail-fast: false + matrix: + runner: [ubuntu-latest, ubuntu-24.04-arm] + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Rust cache + uses: Swatinem/rust-cache@v2 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.23" + + - name: Build FFI library + run: cargo build --release -p sandlock-ffi + + - name: Vet and test + working-directory: go + run: | + go vet ./... + go test ./... -v diff --git a/go/README.md b/go/README.md new file mode 100644 index 0000000..7bbf1de --- /dev/null +++ b/go/README.md @@ -0,0 +1,149 @@ +# sandlock Go SDK + +Go bindings for [sandlock](https://github.com/multikernel/sandlock), a +lightweight Linux process sandbox built on Landlock, seccomp-bpf, and seccomp +user notification. No root, no Docker, no namespaces. + +The bindings bind the sandlock C ABI (`libsandlock_ffi`) via cgo, mirroring the +Python SDK's `Sandbox` surface. **Linux only**; the runtime requires Linux +6.12+ (Landlock ABI v6). + +```go +import sandlock "github.com/multikernel/sandlock/go" +``` + +## Building + +cgo links against `libsandlock_ffi`, produced by the Rust workspace. The +default link flags resolve the library relative to this package +(`../target/release`), so build from a checkout of the sandlock repository: + +```bash +cargo build --release # writes target/release/libsandlock_ffi.so +cd go && go test ./... +``` + +To use the SDK from another module, point cgo at an installed library, e.g.: + +```bash +CGO_LDFLAGS="-L/usr/local/lib -Wl,-rpath,/usr/local/lib" go build +``` + +## Quick start + +```go +package main + +import ( + "context" + "fmt" + "log" + + sandlock "github.com/multikernel/sandlock/go" +) + +func main() { + sb := &sandlock.Sandbox{ + FSReadable: []string{"/usr", "/lib", "/lib64", "/bin", "/etc"}, + FSWritable: []string{"/tmp"}, + } + res, err := sb.Run(context.Background(), "echo", "hello") + if err != nil { + log.Fatal(err) + } + fmt.Printf("exit=%d: %s", res.ExitCode, res.Stdout) // exit=0: hello +} +``` + +## API + +### Sandbox + +`Sandbox` is a plain configuration struct; every field is optional and an unset +field means "no restriction" unless noted. sandlock's default syscall blocklist +is always applied. A `Sandbox` carries no runtime state, so it is safe to reuse +and share across goroutines — `Run`, `RunInteractive`, and `DryRun` build a +fresh native policy on each call. + +| Group | Fields | +|---|---| +| Filesystem | `FSReadable`, `FSWritable`, `FSDenied`, `Workdir`, `Cwd`, `Chroot`, `FSMount` | +| Network | `NetAllow`, `NetBind`, `PortRemap` | +| HTTP ACL | `HTTPAllow`, `HTTPDeny`, `HTTPPorts`, `HTTPCAFile`, `HTTPKeyFile` | +| Resources | `MaxMemory`, `MaxDisk`, `MaxProcesses`, `MaxCPU`, `MaxOpenFiles`, `CPUCores`, `NumCPUs`, `GPUDevices` | +| Syscalls | `ExtraAllowSyscalls`, `ExtraDenySyscalls` | +| Determinism | `RandomSeed`, `TimeStart`, `NoRandomizeMemory`, `NoHugePages`, `DeterministicDirs` | +| Environment | `CleanEnv`, `Env` | +| Misc | `UID`, `NoCoredump`, `Name` | +| COW branch | `FSStorage`, `OnExit`, `OnError` | + +`NetAllow` entries follow sandlock's rule grammar: bare `host:port` is TCP +(`"api.openai.com:443"`, `"github.com:22,443"`, `":53"`); scheme prefixes opt +other protocols in (`"udp://1.1.1.1:53"`, `"udp://*:*"`, `"icmp://host"`, +`"icmp://*"`). `NetBind` entries are single ports (`"8080"`) or inclusive +ranges (`"3000-3010"`). + +### Execution + +```go +func (s *Sandbox) Run(ctx context.Context, cmd ...string) (*Result, error) +func (s *Sandbox) RunInteractive(ctx context.Context, cmd ...string) (int, error) +func (s *Sandbox) DryRun(ctx context.Context, cmd ...string) (*DryRunResult, error) +func (s *Sandbox) Spawn(cmd ...string) (*Process, error) +``` + +- **Run** captures stdout/stderr and waits. A `ctx` deadline kills the process + and returns a result with `ExitCode == -1`. `ctx` cancellation without a + deadline does not preempt a running child. +- **RunInteractive** inherits the caller's stdio and returns the exit code. +- **DryRun** runs against a temporary copy-on-write layer, reports the + filesystem `Changes` it would have made, and discards them. Requires + `Workdir`. +- **Spawn** starts a process without waiting, returning a `*Process`. + +### Process lifecycle + +```go +func (p *Process) Pid() int +func (p *Process) Wait() (*Result, error) +func (p *Process) Pause() error // SIGSTOP to the process group +func (p *Process) Resume() error // SIGCONT +func (p *Process) Kill() error // SIGKILL +func (p *Process) Ports() (map[int]int, error) // virtual→real, with PortRemap +func (p *Process) Close() error // release the handle (kills if running) +``` + +### Confine the current process + +```go +func Confine(s *Sandbox) error +``` + +Applies the sandbox's Landlock filesystem rules to the **current** process, in +place and irreversibly — no fork, no exec. Only filesystem fields are honored; +configuration that needs a supervisor or a fresh child (seccomp, network, +resource limits, environment, ...) is rejected rather than silently ignored. +This is something the `sandlock` CLI cannot do. + +### Platform + +```go +func LandlockABIVersion() int // kernel's Landlock ABI, or -1 +func MinLandlockABI() int // minimum this build requires +func SyscallNr(name string) (int, error) +``` + +## Status + +This SDK covers the static policy surface plus in-process `Confine`. The +following sandlock features are not yet bound and are tracked as follow-ups: +dynamic `policy_fn` callbacks, custom seccomp handlers, pipelines, gather +(fan-in), COW `fork`/`reduce`, and `checkpoint`/restore. + +`policy_fn` in particular needs a small upstream addition — a `void *user_data` +parameter on `sandlock_sandbox_builder_policy_fn` — before Go can route the +callback to a per-`Sandbox` closure. See the SDK's tracking issue. + +## License + +Apache-2.0 diff --git a/go/errors.go b/go/errors.go new file mode 100644 index 0000000..9d9684e --- /dev/null +++ b/go/errors.go @@ -0,0 +1,11 @@ +package sandlock + +import "errors" + +// ErrInvalidString is returned when a string passed to the SDK contains an +// interior NUL byte, which cannot cross the C ABI boundary intact. +var ErrInvalidString = errors.New("sandlock: string contains NUL byte") + +// ErrNotRunning is returned by *Process lifecycle methods when no process is +// currently running in the handle. +var ErrNotRunning = errors.New("sandlock: process is not running") diff --git a/go/examples/basic/main.go b/go/examples/basic/main.go new file mode 100644 index 0000000..0cd4fb1 --- /dev/null +++ b/go/examples/basic/main.go @@ -0,0 +1,44 @@ +// Command basic demonstrates running a command under a sandlock sandbox with a +// read-only root filesystem and a single writable directory. +// +// Build the FFI library first, then run from a sandlock checkout: +// +// cargo build --release +// go run ./go/examples/basic +package main + +import ( + "context" + "fmt" + "log" + "os" + "time" + + sandlock "github.com/multikernel/sandlock/go" +) + +func main() { + if v, min := sandlock.LandlockABIVersion(), sandlock.MinLandlockABI(); v < min { + log.Fatalf("kernel Landlock ABI v%d < required v%d", v, min) + } + + sb := &sandlock.Sandbox{ + FSReadable: []string{"/usr", "/lib", "/lib64", "/bin", "/etc"}, + FSWritable: []string{"/tmp"}, + MaxMemory: "256M", + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + res, err := sb.Run(ctx, "sh", "-c", "echo hello from $(uname -s); ls /tmp >/dev/null") + if err != nil { + log.Fatalf("run: %v", err) + } + + fmt.Printf("exit=%d success=%v\n", res.ExitCode, res.Success) + os.Stdout.Write(res.Stdout) + if len(res.Stderr) > 0 { + fmt.Fprintf(os.Stderr, "stderr: %s", res.Stderr) + } +} diff --git a/go/go.mod b/go/go.mod new file mode 100644 index 0000000..643f4e2 --- /dev/null +++ b/go/go.mod @@ -0,0 +1,3 @@ +module github.com/multikernel/sandlock/go + +go 1.21 diff --git a/go/internal/policy/spec.go b/go/internal/policy/spec.go new file mode 100644 index 0000000..fb1e6d3 --- /dev/null +++ b/go/internal/policy/spec.go @@ -0,0 +1,99 @@ +// Package policy holds pure, platform-independent parsing helpers shared by +// the sandlock Go SDK. It deliberately has no cgo dependency so the logic can +// be unit-tested on any OS, separate from the Linux-only FFI bindings. +package policy + +import ( + "fmt" + "regexp" + "slices" + "strconv" + "strings" + "time" +) + +var ( + sizeRe = regexp.MustCompile(`^\s*(\d+(?:\.\d+)?)\s*([KMGTkmgt])?\s*$`) + portRe = regexp.MustCompile(`^(\d+)(?:-(\d+))?$`) +) + +var sizeUnits = map[byte]uint64{ + 'K': 1 << 10, + 'M': 1 << 20, + 'G': 1 << 30, + 'T': 1 << 40, +} + +// ParseMemory parses a human-friendly size string into bytes. It accepts a +// plain integer (bytes) or a value suffixed with K, M, G, or T (case +// insensitive), e.g. "512M", "1G", "100K". Mirrors the Python SDK's +// parse_memory_size so the two SDKs agree byte-for-byte. +func ParseMemory(s string) (uint64, error) { + m := sizeRe.FindStringSubmatch(s) + if m == nil { + return 0, fmt.Errorf("invalid memory size: %q", s) + } + value, err := strconv.ParseFloat(m[1], 64) + if err != nil { + return 0, fmt.Errorf("invalid memory size: %q", s) + } + if m[2] != "" { + unit := sizeUnits[strings.ToUpper(m[2])[0]] + value *= float64(unit) + } + return uint64(value), nil +} + +// ParsePorts expands a list of port specs into a sorted, de-duplicated list of +// individual port numbers. Each spec is a single port ("80") or an inclusive +// range ("8000-9000"). Values must fall in [0, 65535]. +func ParsePorts(specs []string) ([]uint16, error) { + set := map[uint16]struct{}{} + for _, spec := range specs { + m := portRe.FindStringSubmatch(strings.TrimSpace(spec)) + if m == nil { + return nil, fmt.Errorf("invalid port spec: %q", spec) + } + lo, err := strconv.Atoi(m[1]) + if err != nil { + return nil, fmt.Errorf("invalid port spec: %q", spec) + } + hi := lo + if m[2] != "" { + hi, err = strconv.Atoi(m[2]) + if err != nil { + return nil, fmt.Errorf("invalid port spec: %q", spec) + } + } + if lo > hi || lo < 0 || hi > 65535 { + return nil, fmt.Errorf("invalid port range: %q", spec) + } + for p := lo; p <= hi; p++ { + set[uint16(p)] = struct{}{} + } + } + out := make([]uint16, 0, len(set)) + for p := range set { + out = append(out, p) + } + slices.Sort(out) + return out, nil +} + +// ParseTimeStart resolves a time-virtualization start point to whole seconds +// since the Unix epoch. It accepts an RFC 3339 / ISO 8601 timestamp +// (e.g. "2000-01-01T00:00:00Z") or a plain integer/float number of seconds. +func ParseTimeStart(s string) (uint64, error) { + s = strings.TrimSpace(s) + if f, err := strconv.ParseFloat(s, 64); err == nil { + if f < 0 { + return 0, fmt.Errorf("invalid time_start: %q", s) + } + return uint64(f), nil + } + t, err := time.Parse(time.RFC3339, s) + if err != nil { + return 0, fmt.Errorf("invalid time_start: %q (want RFC3339 or unix seconds)", s) + } + return uint64(t.Unix()), nil +} diff --git a/go/internal/policy/spec_test.go b/go/internal/policy/spec_test.go new file mode 100644 index 0000000..8d205dd --- /dev/null +++ b/go/internal/policy/spec_test.go @@ -0,0 +1,106 @@ +package policy + +import ( + "reflect" + "testing" +) + +func TestParseMemory(t *testing.T) { + cases := []struct { + in string + want uint64 + wantErr bool + }{ + {"1024", 1024, false}, + {"512M", 512 << 20, false}, + {"1G", 1 << 30, false}, + {"100K", 100 << 10, false}, + {"2T", 2 << 40, false}, + {"1g", 1 << 30, false}, + {" 256M ", 256 << 20, false}, + {"1.5G", uint64(1.5 * float64(1<<30)), false}, + {"", 0, true}, + {"abc", 0, true}, + {"10X", 0, true}, + } + for _, c := range cases { + got, err := ParseMemory(c.in) + if c.wantErr { + if err == nil { + t.Errorf("ParseMemory(%q): expected error, got %d", c.in, got) + } + continue + } + if err != nil { + t.Errorf("ParseMemory(%q): unexpected error: %v", c.in, err) + continue + } + if got != c.want { + t.Errorf("ParseMemory(%q) = %d, want %d", c.in, got, c.want) + } + } +} + +func TestParsePorts(t *testing.T) { + cases := []struct { + in []string + want []uint16 + wantErr bool + }{ + {[]string{"80"}, []uint16{80}, false}, + {[]string{"8000-8002"}, []uint16{8000, 8001, 8002}, false}, + {[]string{"443", "80", "443"}, []uint16{80, 443}, false}, + {[]string{"3000-3001", "3001-3002"}, []uint16{3000, 3001, 3002}, false}, + {nil, []uint16{}, false}, + {[]string{"70000"}, nil, true}, + {[]string{"10-5"}, nil, true}, + {[]string{"x"}, nil, true}, + } + for _, c := range cases { + got, err := ParsePorts(c.in) + if c.wantErr { + if err == nil { + t.Errorf("ParsePorts(%v): expected error, got %v", c.in, got) + } + continue + } + if err != nil { + t.Errorf("ParsePorts(%v): unexpected error: %v", c.in, err) + continue + } + if !reflect.DeepEqual(got, c.want) { + t.Errorf("ParsePorts(%v) = %v, want %v", c.in, got, c.want) + } + } +} + +func TestParseTimeStart(t *testing.T) { + cases := []struct { + in string + want uint64 + wantErr bool + }{ + {"0", 0, false}, + {"946684800", 946684800, false}, + {"2000-01-01T00:00:00Z", 946684800, false}, + {"", 0, true}, + {"not-a-time", 0, true}, + {"-5", 0, true}, + } + for _, c := range cases { + got, err := ParseTimeStart(c.in) + if c.wantErr { + if err == nil { + t.Errorf("ParseTimeStart(%q): expected error, got %d", c.in, got) + } + continue + } + if err != nil { + t.Errorf("ParseTimeStart(%q): unexpected error: %v", c.in, err) + continue + } + if got != c.want { + t.Errorf("ParseTimeStart(%q) = %d, want %d", c.in, got, c.want) + } + } +} diff --git a/go/sandbox.go b/go/sandbox.go new file mode 100644 index 0000000..50a3c3c --- /dev/null +++ b/go/sandbox.go @@ -0,0 +1,157 @@ +// Package sandlock provides Go bindings for sandlock, a lightweight Linux +// process sandbox built on Landlock, seccomp-bpf, and seccomp user +// notification. It binds the sandlock C ABI (libsandlock_ffi) via cgo and +// mirrors the Python SDK's Sandbox surface. +// +// The bindings are Linux-only. The runtime requires Linux 6.12+ (Landlock +// ABI v6); see the project README for the full kernel feature matrix. +// +// # Building +// +// cgo links against libsandlock_ffi, which is produced by the Rust workspace: +// +// cargo build --release # writes target/release/libsandlock_ffi.so +// cd go && go test ./... +// +// The default cgo link flags resolve the library relative to this package +// (../target/release). Build from a checkout of the sandlock repository, or +// adjust the link flags for an installed library. +// +// # Quick start +// +// sb := &sandlock.Sandbox{ +// FSReadable: []string{"/usr", "/lib", "/lib64", "/bin", "/etc"}, +// FSWritable: []string{"/tmp"}, +// } +// res, err := sb.Run(context.Background(), "echo", "hello") +// if err != nil { +// log.Fatal(err) +// } +// fmt.Printf("%d: %s", res.ExitCode, res.Stdout) +package sandlock + +// BranchAction is the action taken on a copy-on-write working-directory +// branch when the sandbox exits. The zero value, BranchActionDefault, leaves +// the choice to sandlock's own defaults (commit on success, abort on error). +type BranchAction uint8 + +const ( + // BranchActionDefault defers to sandlock's built-in default. + BranchActionDefault BranchAction = iota + // BranchActionCommit merges the branch's writes into the parent on exit. + BranchActionCommit + // BranchActionAbort discards all of the branch's writes on exit. + BranchActionAbort + // BranchActionKeep leaves the branch in place for the caller to handle. + BranchActionKeep +) + +// Sandbox holds the policy configuration for confining a process. Every field +// is optional; an unset field means "no restriction" unless documented +// otherwise. sandlock's default syscall blocklist is always applied. +// +// A Sandbox value carries no runtime state: Run, RunInteractive, and DryRun +// build a fresh native policy on each call, so a single Sandbox may be reused +// and shared across goroutines. Use Spawn for explicit process lifecycle +// control, which returns an independent *Process handle. +type Sandbox struct { + // Filesystem (Landlock). + FSReadable []string // paths the sandbox may read (and execute) + FSWritable []string // paths the sandbox may write + FSDenied []string // paths explicitly denied + + Workdir string // copy-on-write root; enables COW protection of this tree + Cwd string // child working directory (chdir target) + Chroot string // path to chroot into before applying confinement + + // FSMount maps virtual paths inside the chroot to host directories, + // like a bind mount without kernel mounts or root. + FSMount map[string]string + + // Network. + // + // NetAllow entries are outbound endpoint rules. The bare form is TCP + // ("api.openai.com:443", "github.com:22,443", ":53"); scheme prefixes opt + // other protocols in ("tcp://", "udp://host:port", "udp://*:*", + // "icmp://host", "icmp://*"). Empty denies all outbound. + NetAllow []string + // NetBind lists TCP ports the sandbox may bind. Each entry is a single + // port ("8080") or an inclusive range ("3000-3010"). Empty denies all. + NetBind []string + PortRemap bool // transparent per-sandbox TCP port virtualization + + // HTTP ACL (method + host + path rules via a transparent proxy). + HTTPAllow []string // allow rules, "METHOD host/path" + HTTPDeny []string // deny rules, checked before allow rules + HTTPPorts []int // ports to intercept (defaults to 80, plus 443 with a CA) + HTTPCAFile string // PEM CA certificate for HTTPS MITM + HTTPKeyFile string // PEM CA private key (required with HTTPCAFile) + + // Resource limits. + MaxMemory string // e.g. "512M"; empty = unlimited + MaxDisk string // disk quota for COW storage, e.g. "1G" + MaxProcesses uint32 // lifetime fork cap; 0 = sandlock default + MaxCPU uint8 // CPU throttle, percent of one core (1-100); 0 = unset + MaxOpenFiles uint32 // RLIMIT_NOFILE; 0 = inherit system default + CPUCores []uint32 // cores to pin to via sched_setaffinity + NumCPUs uint32 // synthetic /proc/cpuinfo processor count; 0 = unset + GPUDevices []uint32 // GPU device indices to expose; nil = none + + // Syscall filtering (on top of sandlock's default blocklist). + ExtraAllowSyscalls []string // syscall groups to allow, e.g. "sysv_ipc" + ExtraDenySyscalls []string // extra syscall names to block + + // Determinism. + RandomSeed *uint64 // seed getrandom() deterministically + TimeStart string // virtual clock start: RFC3339 or unix seconds + NoRandomizeMemory bool // disable ASLR + NoHugePages bool // disable transparent huge pages + DeterministicDirs bool // sort readdir() entries + + // Environment. + CleanEnv bool // start from a minimal environment + Env map[string]string // variables to set/override in the child + + // Misc. + UID *int // map to this UID inside a user namespace; nil = unset + NoCoredump bool // disable core dumps and restrict /proc/pid access + + // Copy-on-write branch handling. + FSStorage string // storage directory for COW deltas + OnExit BranchAction // branch action on normal exit + OnError BranchAction // branch action on error exit + + // Name is the sandbox name and its virtual hostname inside the sandbox. + // Empty auto-generates "sandbox-{pid}". + Name string +} + +// Result is the outcome of a captured run. +type Result struct { + ExitCode int // process exit code, or -1 if terminated abnormally + Success bool // true when the process exited 0 + Stdout []byte // captured standard output + Stderr []byte // captured standard error +} + +// ChangeKind classifies a filesystem change observed during a dry run. +type ChangeKind byte + +const ( + ChangeAdded ChangeKind = 'A' + ChangeModified ChangeKind = 'M' + ChangeDeleted ChangeKind = 'D' +) + +// Change is a single filesystem change detected by DryRun. +type Change struct { + Kind ChangeKind // 'A' added, 'M' modified, 'D' deleted + Path string // path relative to the working directory +} + +// DryRunResult is the outcome of a dry run: a normal Result plus the list of +// filesystem changes the command would have made, all of which are discarded. +type DryRunResult struct { + Result + Changes []Change +} diff --git a/go/sandlock_linux.go b/go/sandlock_linux.go new file mode 100644 index 0000000..37ed41b --- /dev/null +++ b/go/sandlock_linux.go @@ -0,0 +1,749 @@ +//go:build linux + +package sandlock + +/* +#cgo CFLAGS: -I${SRCDIR}/../crates/sandlock-ffi/include +#cgo LDFLAGS: -L${SRCDIR}/../target/release -Wl,-rpath,${SRCDIR}/../target/release -lsandlock_ffi -lpthread -ldl -lm + +// The C declarations come from the cbindgen-generated header, so the cgo +// prototypes stay in lock-step with crates/sandlock-ffi automatically. +#include +#include "sandlock.h" +*/ +import "C" + +import ( + "context" + "encoding/json" + "fmt" + "runtime" + "strings" + "sync" + "syscall" + "time" + "unsafe" + + "github.com/multikernel/sandlock/go/internal/policy" +) + +// hasNUL reports whether s contains an interior NUL byte, which cannot survive +// the conversion to a C string. +func hasNUL(s string) bool { return strings.IndexByte(s, 0) >= 0 } + +func cbool(v bool) C.bool { return C.bool(v) } + +// validateStrings rejects any configuration string carrying a NUL byte before +// a builder is allocated. The FFI has no builder-free entry point, so a failure +// partway through building would leak the builder; validating up front keeps +// buildPolicy infallible with respect to string conversion. +func (s *Sandbox) validateStrings() error { + groups := [][]string{ + s.FSReadable, s.FSWritable, s.FSDenied, + s.NetAllow, s.NetBind, + s.HTTPAllow, s.HTTPDeny, + s.ExtraAllowSyscalls, s.ExtraDenySyscalls, + {s.Workdir, s.Cwd, s.Chroot, s.FSStorage, s.MaxMemory, s.MaxDisk, + s.TimeStart, s.HTTPCAFile, s.HTTPKeyFile, s.Name}, + } + for _, g := range groups { + for _, v := range g { + if hasNUL(v) { + return ErrInvalidString + } + } + } + for k, v := range s.FSMount { + if hasNUL(k) || hasNUL(v) { + return ErrInvalidString + } + } + for k, v := range s.Env { + if hasNUL(k) || hasNUL(v) { + return ErrInvalidString + } + } + return nil +} + +// buildPolicy translates the Sandbox configuration into a native policy handle. +// The returned pointer must be released with C.sandlock_sandbox_free. +func (s *Sandbox) buildPolicy() (*C.sandlock_sandbox_t, error) { + if err := s.validateStrings(); err != nil { + return nil, err + } + + b := C.sandlock_sandbox_builder_new() + + // str calls a one-string builder setter, freeing the C string afterward. + str := func(fn func(*C.sandlock_builder_t, *C.char) *C.sandlock_builder_t, val string) { + c := C.CString(val) + b = fn(b, c) + C.free(unsafe.Pointer(c)) + } + + for _, p := range s.FSReadable { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_fs_read(b, c) + }, p) + } + for _, p := range s.FSWritable { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_fs_write(b, c) + }, p) + } + for _, p := range s.FSDenied { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_fs_deny(b, c) + }, p) + } + if s.Workdir != "" { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_workdir(b, c) + }, s.Workdir) + } + if s.Cwd != "" { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_cwd(b, c) + }, s.Cwd) + } + if s.Chroot != "" { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_chroot(b, c) + }, s.Chroot) + } + for vp, hp := range s.FSMount { + cv, ch := C.CString(vp), C.CString(hp) + b = C.sandlock_sandbox_builder_fs_mount(b, cv, ch) + C.free(unsafe.Pointer(cv)) + C.free(unsafe.Pointer(ch)) + } + + // Network. + for _, spec := range s.NetAllow { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_net_allow(b, c) + }, spec) + } + if len(s.NetBind) > 0 { + ports, err := policy.ParsePorts(s.NetBind) + if err != nil { + freeBuilderViaBuild(b) + return nil, err + } + for _, p := range ports { + b = C.sandlock_sandbox_builder_net_bind_port(b, C.uint16_t(p)) + } + } + if s.PortRemap { + b = C.sandlock_sandbox_builder_port_remap(b, cbool(true)) + } + + // HTTP ACL. + for _, r := range s.HTTPAllow { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_http_allow(b, c) + }, r) + } + for _, r := range s.HTTPDeny { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_http_deny(b, c) + }, r) + } + for _, p := range s.HTTPPorts { + b = C.sandlock_sandbox_builder_http_port(b, C.uint16_t(p)) + } + if s.HTTPCAFile != "" { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_http_ca(b, c) + }, s.HTTPCAFile) + } + if s.HTTPKeyFile != "" { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_http_key(b, c) + }, s.HTTPKeyFile) + } + + // Resource limits. + if s.MaxMemory != "" { + v, err := policy.ParseMemory(s.MaxMemory) + if err != nil { + freeBuilderViaBuild(b) + return nil, err + } + b = C.sandlock_sandbox_builder_max_memory(b, C.uint64_t(v)) + } + if s.MaxDisk != "" { + v, err := policy.ParseMemory(s.MaxDisk) + if err != nil { + freeBuilderViaBuild(b) + return nil, err + } + b = C.sandlock_sandbox_builder_max_disk(b, C.uint64_t(v)) + } + if s.MaxProcesses > 0 { + b = C.sandlock_sandbox_builder_max_processes(b, C.uint32_t(s.MaxProcesses)) + } + if s.MaxCPU > 0 { + b = C.sandlock_sandbox_builder_max_cpu(b, C.uint8_t(s.MaxCPU)) + } + if s.MaxOpenFiles > 0 { + b = C.sandlock_sandbox_builder_max_open_files(b, C.uint(s.MaxOpenFiles)) + } + if s.NumCPUs > 0 { + b = C.sandlock_sandbox_builder_num_cpus(b, C.uint32_t(s.NumCPUs)) + } + if len(s.CPUCores) > 0 { + b = C.sandlock_sandbox_builder_cpu_cores(b, (*C.uint32_t)(unsafe.Pointer(&s.CPUCores[0])), C.uint32_t(len(s.CPUCores))) + } + if s.GPUDevices != nil { + var ptr *C.uint32_t + if len(s.GPUDevices) > 0 { + ptr = (*C.uint32_t)(unsafe.Pointer(&s.GPUDevices[0])) + } + b = C.sandlock_sandbox_builder_gpu_devices(b, ptr, C.uint32_t(len(s.GPUDevices))) + } + + // Syscall filtering. + if len(s.ExtraDenySyscalls) > 0 { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_extra_deny_syscalls(b, c) + }, strings.Join(s.ExtraDenySyscalls, ",")) + } + if len(s.ExtraAllowSyscalls) > 0 { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_extra_allow_syscalls(b, c) + }, strings.Join(s.ExtraAllowSyscalls, ",")) + } + + // Determinism. + if s.RandomSeed != nil { + b = C.sandlock_sandbox_builder_random_seed(b, C.uint64_t(*s.RandomSeed)) + } + if s.TimeStart != "" { + secs, err := policy.ParseTimeStart(s.TimeStart) + if err != nil { + freeBuilderViaBuild(b) + return nil, err + } + b = C.sandlock_sandbox_builder_time_start(b, C.uint64_t(secs)) + } + if s.NoRandomizeMemory { + b = C.sandlock_sandbox_builder_no_randomize_memory(b, cbool(true)) + } + if s.NoHugePages { + b = C.sandlock_sandbox_builder_no_huge_pages(b, cbool(true)) + } + if s.DeterministicDirs { + b = C.sandlock_sandbox_builder_deterministic_dirs(b, cbool(true)) + } + + // Environment. + if s.CleanEnv { + b = C.sandlock_sandbox_builder_clean_env(b, cbool(true)) + } + for k, v := range s.Env { + ck, cv := C.CString(k), C.CString(v) + b = C.sandlock_sandbox_builder_env_var(b, ck, cv) + C.free(unsafe.Pointer(ck)) + C.free(unsafe.Pointer(cv)) + } + + // Misc. + if s.UID != nil { + b = C.sandlock_sandbox_builder_uid(b, C.uint32_t(*s.UID)) + } + if s.NoCoredump { + b = C.sandlock_sandbox_builder_no_coredump(b, cbool(true)) + } + + // Copy-on-write branch handling. + if s.FSStorage != "" { + str(func(b *C.sandlock_builder_t, c *C.char) *C.sandlock_builder_t { + return C.sandlock_sandbox_builder_fs_storage(b, c) + }, s.FSStorage) + } + if s.OnExit != BranchActionDefault { + b = C.sandlock_sandbox_builder_on_exit(b, C.uint8_t(s.OnExit-1)) + } + if s.OnError != BranchActionDefault { + b = C.sandlock_sandbox_builder_on_error(b, C.uint8_t(s.OnError-1)) + } + + var errCode C.int + var errMsg *C.char + policyPtr := C.sandlock_sandbox_build(b, &errCode, &errMsg) + if policyPtr == nil { + msg := "sandlock: failed to build sandbox policy" + if errMsg != nil { + msg = "sandlock: " + C.GoString(errMsg) + C.sandlock_string_free(errMsg) + } + return nil, fmt.Errorf("%s", msg) + } + return policyPtr, nil +} + +// freeBuilderViaBuild consumes a builder that will not be used, so it is not +// leaked. The FFI exposes no builder-free entry point; build() is the only +// consumer, so we build and immediately free the resulting policy (or discard +// a build error). Reached only on the rare numeric-parse error paths after the +// builder already exists. +func freeBuilderViaBuild(b *C.sandlock_builder_t) { + var errCode C.int + var errMsg *C.char + p := C.sandlock_sandbox_build(b, &errCode, &errMsg) + if errMsg != nil { + C.sandlock_string_free(errMsg) + } + if p != nil { + C.sandlock_sandbox_free(p) + } +} + +// cArgv converts a command into a C argv array. Each element and the array +// itself live in Go memory; the elements are C strings that the caller must +// free with freeArgv. Returns an error if any argument carries a NUL byte. +func cArgv(args []string) ([]*C.char, error) { + out := make([]*C.char, len(args)) + for i, a := range args { + if hasNUL(a) { + for j := 0; j < i; j++ { + C.free(unsafe.Pointer(out[j])) + } + return nil, ErrInvalidString + } + out[i] = C.CString(a) + } + return out, nil +} + +func freeArgv(argv []*C.char) { + for _, p := range argv { + C.free(unsafe.Pointer(p)) + } +} + +// argvPtr returns the pointer/count pair for an argv slice. +func argvPtr(argv []*C.char) (**C.char, C.uint) { + if len(argv) == 0 { + return nil, 0 + } + return (**C.char)(unsafe.Pointer(&argv[0])), C.uint(len(argv)) +} + +// cName converts the sandbox name to a C string, returning nil for the empty +// name (which tells the FFI to auto-generate one). +func (s *Sandbox) cName() *C.char { + if s.Name == "" { + return nil + } + return C.CString(s.Name) +} + +func freeName(c *C.char) { + if c != nil { + C.free(unsafe.Pointer(c)) + } +} + +// timeoutMs derives an FFI wait timeout from a context. A zero return means +// "no timeout"; a context with a deadline maps to the remaining milliseconds +// (at least 1, so an already-expired deadline does not become "no timeout"). +func timeoutMs(ctx context.Context) C.uint64_t { + deadline, ok := ctx.Deadline() + if !ok { + return 0 + } + d := time.Until(deadline) + if d <= 0 { + return 1 + } + ms := d.Milliseconds() + if ms < 1 { + ms = 1 + } + return C.uint64_t(ms) +} + +func readResult(r *C.sandlock_result_t) *Result { + res := &Result{ + ExitCode: int(C.sandlock_result_exit_code(r)), + Success: bool(C.sandlock_result_success(r)), + } + res.Stdout = readBytes(r, true) + res.Stderr = readBytes(r, false) + return res +} + +func readBytes(r *C.sandlock_result_t, stdout bool) []byte { + var n C.uintptr_t + var p *C.uint8_t + if stdout { + p = C.sandlock_result_stdout_bytes(r, &n) + } else { + p = C.sandlock_result_stderr_bytes(r, &n) + } + if p == nil || n == 0 { + return nil + } + return C.GoBytes(unsafe.Pointer(p), C.int(n)) +} + +// Run executes cmd in the sandbox, capturing stdout and stderr, and waits for +// it to finish. If ctx carries a deadline, the process is killed and a result +// with ExitCode -1 is returned once it elapses. ctx cancellation without a +// deadline does not preempt an already-running child. +func (s *Sandbox) Run(ctx context.Context, cmd ...string) (*Result, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + if len(cmd) == 0 { + return nil, fmt.Errorf("sandlock: empty command") + } + policyPtr, err := s.buildPolicy() + if err != nil { + return nil, err + } + defer C.sandlock_sandbox_free(policyPtr) + + argv, err := cArgv(cmd) + if err != nil { + return nil, err + } + defer freeArgv(argv) + ap, ac := argvPtr(argv) + name := s.cName() + defer freeName(name) + + h := C.sandlock_create_for_run(policyPtr, name, ap, ac) + if h == nil { + return nil, fmt.Errorf("sandlock: failed to create sandbox") + } + if C.sandlock_start(h) != 0 { + C.sandlock_handle_free(h) + return nil, fmt.Errorf("sandlock: failed to start sandbox") + } + r := C.sandlock_handle_wait_timeout(h, timeoutMs(ctx)) + C.sandlock_handle_free(h) + if r == nil { + return nil, fmt.Errorf("sandlock: wait failed") + } + res := readResult(r) + C.sandlock_result_free(r) + return res, nil +} + +// RunInteractive executes cmd with the calling process's stdio inherited (no +// capture) and returns the exit code. The context is honored only as a +// pre-run cancellation check; interactive runs are not interrupted by a +// deadline. +func (s *Sandbox) RunInteractive(ctx context.Context, cmd ...string) (int, error) { + if err := ctx.Err(); err != nil { + return -1, err + } + if len(cmd) == 0 { + return -1, fmt.Errorf("sandlock: empty command") + } + policyPtr, err := s.buildPolicy() + if err != nil { + return -1, err + } + defer C.sandlock_sandbox_free(policyPtr) + + argv, err := cArgv(cmd) + if err != nil { + return -1, err + } + defer freeArgv(argv) + ap, ac := argvPtr(argv) + name := s.cName() + defer freeName(name) + + code := int(C.sandlock_run_interactive(policyPtr, name, ap, ac)) + return code, nil +} + +// DryRun executes cmd against a temporary copy-on-write layer, collects the +// filesystem changes it would have made, then discards them. It requires +// Workdir to be set. +func (s *Sandbox) DryRun(ctx context.Context, cmd ...string) (*DryRunResult, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + if len(cmd) == 0 { + return nil, fmt.Errorf("sandlock: empty command") + } + policyPtr, err := s.buildPolicy() + if err != nil { + return nil, err + } + defer C.sandlock_sandbox_free(policyPtr) + + argv, err := cArgv(cmd) + if err != nil { + return nil, err + } + defer freeArgv(argv) + ap, ac := argvPtr(argv) + name := s.cName() + defer freeName(name) + + r := C.sandlock_dry_run(policyPtr, name, ap, ac) + if r == nil { + return nil, fmt.Errorf("sandlock: dry run failed (Workdir is required; check that readable paths exist)") + } + defer C.sandlock_dry_run_result_free(r) + + out := &DryRunResult{Result: Result{ + ExitCode: int(C.sandlock_dry_run_result_exit_code(r)), + Success: bool(C.sandlock_dry_run_result_success(r)), + }} + var n C.uintptr_t + if p := C.sandlock_dry_run_result_stdout_bytes(r, &n); p != nil && n > 0 { + out.Stdout = C.GoBytes(unsafe.Pointer(p), C.int(n)) + } + if p := C.sandlock_dry_run_result_stderr_bytes(r, &n); p != nil && n > 0 { + out.Stderr = C.GoBytes(unsafe.Pointer(p), C.int(n)) + } + count := int(C.sandlock_dry_run_result_changes_len(r)) + for i := 0; i < count; i++ { + kind := byte(C.sandlock_dry_run_result_change_kind(r, C.uintptr_t(i))) + var path string + if pc := C.sandlock_dry_run_result_change_path(r, C.uintptr_t(i)); pc != nil { + path = C.GoString(pc) + C.sandlock_string_free(pc) + } + out.Changes = append(out.Changes, Change{Kind: ChangeKind(kind), Path: path}) + } + return out, nil +} + +// Confine applies the Sandbox's Landlock filesystem rules to the current +// process, in place and irreversibly. Only filesystem fields are honored; +// configuration that requires a supervisor or a fresh child (seccomp, +// network, resource limits, environment, etc.) is rejected by the core rather +// than silently ignored. +func Confine(s *Sandbox) error { + policyPtr, err := s.buildPolicy() + if err != nil { + return err + } + defer C.sandlock_sandbox_free(policyPtr) + if C.sandlock_confine(policyPtr) != 0 { + return fmt.Errorf("sandlock: confine failed") + } + return nil +} + +// LandlockABIVersion returns the Landlock ABI version supported by the running +// kernel, or -1 if Landlock is unavailable. +func LandlockABIVersion() int { return int(C.sandlock_landlock_abi_version()) } + +// MinLandlockABI returns the minimum Landlock ABI version this build requires. +func MinLandlockABI() int { return int(C.sandlock_min_landlock_abi()) } + +// SyscallNr resolves a syscall name (e.g. "openat") to its kernel syscall +// number for the host architecture. It returns an error for names sandlock +// cannot resolve (syscalls outside the set it filters or supervises). +func SyscallNr(name string) (int, error) { + if hasNUL(name) { + return -1, ErrInvalidString + } + c := C.CString(name) + defer C.free(unsafe.Pointer(c)) + nr := int64(C.sandlock_syscall_nr(c)) + if nr < 0 { + return -1, fmt.Errorf("sandlock: unknown syscall %q", name) + } + return int(nr), nil +} + +// Process is a live sandboxed process started by Spawn. It supports PID +// inspection, pause/resume/kill via the process group, and Wait. A Process +// holds at most one running command; create separate Spawns for concurrency. +// +// The underlying FFI handle is not safe for concurrent access, so all handle +// operations are serialized. Pause/Resume/Kill act on the OS process group by +// PID and touch no handle state, so they remain usable while Wait blocks on +// the handle — that is how Kill interrupts a blocked Wait. Ports, by contrast, +// reads the handle and is reported as empty while a Wait is in flight. +type Process struct { + mu sync.Mutex + h *C.sandlock_handle_t + pid int + waiting bool // a Wait owns the handle; other handle ops must defer to it +} + +// Spawn forks the sandboxed child, installs the policy, and releases it to +// exec cmd without waiting. Use the returned Process to manage its lifecycle. +func (s *Sandbox) Spawn(cmd ...string) (*Process, error) { + if len(cmd) == 0 { + return nil, fmt.Errorf("sandlock: empty command") + } + policyPtr, err := s.buildPolicy() + if err != nil { + return nil, err + } + defer C.sandlock_sandbox_free(policyPtr) + + argv, err := cArgv(cmd) + if err != nil { + return nil, err + } + defer freeArgv(argv) + ap, ac := argvPtr(argv) + name := s.cName() + defer freeName(name) + + h := C.sandlock_create(policyPtr, name, ap, ac) + if h == nil { + return nil, fmt.Errorf("sandlock: failed to create sandbox") + } + if C.sandlock_start(h) != 0 { + C.sandlock_handle_free(h) + return nil, fmt.Errorf("sandlock: failed to start sandbox") + } + p := &Process{h: h, pid: int(C.sandlock_handle_pid(h))} + // Last-resort cleanup if the caller drops the Process without Wait/Close: + // kill the child and release the handle so neither is leaked. Wait and + // Close clear this once they have done the cleanup themselves. + runtime.SetFinalizer(p, (*Process).finalize) + return p, nil +} + +// finalize is the SetFinalizer cleanup for a Process abandoned without +// Wait/Close. It can only run once the Process is unreachable, which implies +// no Wait is in flight (a blocked Wait keeps the Process reachable), so the +// handle is not concurrently borrowed and is safe to free here. +func (p *Process) finalize() { + p.mu.Lock() + defer p.mu.Unlock() + if p.h == nil { + return + } + if p.pid > 0 { + _ = syscall.Kill(-p.pid, syscall.SIGKILL) + } + C.sandlock_handle_free(p.h) + p.h = nil +} + +// Pid returns the child process ID, or 0 if it is not available. +func (p *Process) Pid() int { + p.mu.Lock() + defer p.mu.Unlock() + return p.pid +} + +// Wait blocks until the process exits, returns its captured Result, and +// releases the handle. After Wait the Process is no longer running. +// +// The blocking native wait runs without holding the mutex so that Kill (and +// Pause/Resume), which signal the process group by PID, can run concurrently +// and interrupt it. The waiting flag reserves exclusive use of the handle for +// the duration, so no other handle operation aliases it. +func (p *Process) Wait() (*Result, error) { + p.mu.Lock() + if p.h == nil || p.waiting { + p.mu.Unlock() + return nil, ErrNotRunning + } + h := p.h + p.waiting = true + p.mu.Unlock() + + r := C.sandlock_handle_wait(h) + + p.mu.Lock() + defer p.mu.Unlock() + p.waiting = false + C.sandlock_handle_free(h) + p.h = nil + runtime.SetFinalizer(p, nil) + if r == nil { + return nil, fmt.Errorf("sandlock: wait failed") + } + res := readResult(r) + C.sandlock_result_free(r) + return res, nil +} + +func (p *Process) signal(sig syscall.Signal) error { + p.mu.Lock() + defer p.mu.Unlock() + if p.h == nil || p.pid <= 0 { + return ErrNotRunning + } + // The sandbox child leads its own process group; signal the whole group. + return syscall.Kill(-p.pid, sig) +} + +// Pause sends SIGSTOP to the sandbox process group. +func (p *Process) Pause() error { return p.signal(syscall.SIGSTOP) } + +// Resume sends SIGCONT to the sandbox process group. +func (p *Process) Resume() error { return p.signal(syscall.SIGCONT) } + +// Kill sends SIGKILL to the sandbox process group. +func (p *Process) Kill() error { + err := p.signal(syscall.SIGKILL) + if err == syscall.ESRCH { + return nil + } + return err +} + +// Ports returns the current virtual-to-real TCP port mappings while the +// process is running. It is non-empty only when PortRemap is enabled and at +// least one port has been remapped. +func (p *Process) Ports() (map[int]int, error) { + p.mu.Lock() + defer p.mu.Unlock() + // While a Wait holds the handle, reading port mappings would alias it; + // report empty rather than touch the handle concurrently. + if p.h == nil || p.waiting { + return map[int]int{}, nil + } + c := C.sandlock_handle_port_mappings(p.h) + if c == nil { + return map[int]int{}, nil + } + raw := C.GoString(c) + C.sandlock_string_free(c) + + var m map[string]int + if err := json.Unmarshal([]byte(raw), &m); err != nil { + return nil, fmt.Errorf("sandlock: parsing port mappings: %w", err) + } + out := make(map[int]int, len(m)) + for k, v := range m { + var vp int + if _, err := fmt.Sscanf(k, "%d", &vp); err == nil { + out[vp] = v + } + } + return out, nil +} + +// Close releases the process handle, killing the process if it is still +// running. It is safe to call multiple times. +func (p *Process) Close() error { + p.mu.Lock() + defer p.mu.Unlock() + if p.h == nil { + return nil + } + if p.waiting { + // A Wait owns the handle and will free it; just kill the process + // group by PID to unblock that Wait, without touching the handle. + if p.pid > 0 { + _ = syscall.Kill(-p.pid, syscall.SIGKILL) + } + return nil + } + C.sandlock_handle_free(p.h) + p.h = nil + runtime.SetFinalizer(p, nil) + return nil +} diff --git a/go/sandlock_linux_test.go b/go/sandlock_linux_test.go new file mode 100644 index 0000000..7085607 --- /dev/null +++ b/go/sandlock_linux_test.go @@ -0,0 +1,173 @@ +//go:build linux + +package sandlock_test + +import ( + "context" + "os" + "strings" + "testing" + "time" + + sandlock "github.com/multikernel/sandlock/go" +) + +// rootfs is a minimal read-only set covering a typical dynamic binary, +// filtered to paths that actually exist on the host. sandlock errors on a +// readable path that does not exist, and the set differs across architectures +// (for example /lib64 is absent on arm64), so this is computed at startup. +var rootfs = existingPaths( + "/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc", "/proc", "/dev", +) + +func existingPaths(candidates ...string) []string { + var out []string + for _, p := range candidates { + if _, err := os.Stat(p); err == nil { + out = append(out, p) + } + } + return out +} + +// requireLandlock skips a test when the running kernel cannot satisfy +// sandlock's minimum Landlock ABI, so the suite stays green on older CI images. +func requireLandlock(t *testing.T) { + t.Helper() + have, want := sandlock.LandlockABIVersion(), sandlock.MinLandlockABI() + if have < want { + t.Skipf("kernel Landlock ABI v%d < required v%d", have, want) + } +} + +func TestLandlockABI(t *testing.T) { + t.Logf("Landlock ABI: have v%d, require v%d", sandlock.LandlockABIVersion(), sandlock.MinLandlockABI()) + if sandlock.MinLandlockABI() < 1 { + t.Fatalf("MinLandlockABI() = %d, want >= 1", sandlock.MinLandlockABI()) + } +} + +func TestRunEcho(t *testing.T) { + requireLandlock(t) + sb := &sandlock.Sandbox{FSReadable: rootfs} + res, err := sb.Run(context.Background(), "echo", "hello") + if err != nil { + t.Fatalf("Run: %v", err) + } + if !res.Success || res.ExitCode != 0 { + t.Fatalf("expected success, got exit=%d stderr=%q", res.ExitCode, res.Stderr) + } + if got := string(res.Stdout); got != "hello\n" { + t.Fatalf("stdout = %q, want %q", got, "hello\n") + } +} + +func TestRunExitCode(t *testing.T) { + requireLandlock(t) + sb := &sandlock.Sandbox{FSReadable: rootfs} + res, err := sb.Run(context.Background(), "sh", "-c", "exit 3") + if err != nil { + t.Fatalf("Run: %v", err) + } + if res.Success { + t.Fatalf("expected failure") + } + if res.ExitCode != 3 { + t.Fatalf("exit code = %d, want 3", res.ExitCode) + } +} + +func TestRunEmptyCommand(t *testing.T) { + sb := &sandlock.Sandbox{} + if _, err := sb.Run(context.Background()); err == nil { + t.Fatal("expected error for empty command") + } +} + +func TestRunNULRejected(t *testing.T) { + sb := &sandlock.Sandbox{} + if _, err := sb.Run(context.Background(), "echo", "a\x00b"); err != sandlock.ErrInvalidString { + t.Fatalf("err = %v, want ErrInvalidString", err) + } +} + +func TestDryRun(t *testing.T) { + requireLandlock(t) + dir := t.TempDir() + sb := &sandlock.Sandbox{ + FSReadable: rootfs, + FSWritable: []string{dir}, + Workdir: dir, + } + res, err := sb.DryRun(context.Background(), "sh", "-c", "echo hi > "+dir+"/out.txt") + if err != nil { + t.Fatalf("DryRun: %v", err) + } + if !res.Success { + t.Fatalf("dry run failed: exit=%d stderr=%q", res.ExitCode, res.Stderr) + } + // The write is discarded; the file must not exist on the host afterward. + if _, statErr := os.Stat(dir + "/out.txt"); statErr == nil { + t.Fatalf("dry run leaked a write to the host") + } + t.Logf("changes: %+v", res.Changes) +} + +func TestProcessKillInterruptsWait(t *testing.T) { + requireLandlock(t) + sb := &sandlock.Sandbox{FSReadable: rootfs} + p, err := sb.Spawn("sleep", "60") + if err != nil { + t.Fatalf("Spawn: %v", err) + } + defer p.Close() + + done := make(chan error, 1) + go func() { + _, werr := p.Wait() + done <- werr + }() + + // Wait is now blocked in the native wait. Kill must acquire the mutex and + // signal the process group even though Wait is in flight; if Wait still + // held the mutex across the blocking call, this would block until timeout. + if err := p.Kill(); err != nil { + t.Fatalf("Kill: %v", err) + } + + select { + case <-done: + // Wait returned promptly after the kill, as intended. + case <-time.After(5 * time.Second): + t.Fatal("Kill did not interrupt a blocked Wait within 5s") + } +} + +func TestSyscallNr(t *testing.T) { + nr, err := sandlock.SyscallNr("openat") + if err != nil { + t.Fatalf("SyscallNr(openat): %v", err) + } + if nr < 0 { + t.Fatalf("SyscallNr(openat) = %d, want >= 0", nr) + } + if _, err := sandlock.SyscallNr("definitely_not_a_real_syscall"); err == nil { + t.Fatal("expected error for unknown syscall") + } +} + +func TestConfineRejectsSupervisorConfig(t *testing.T) { + // Confine only honors Landlock fields; a field requiring a supervisor + // must be rejected rather than silently ignored. Asserting the rejection + // avoids irreversibly confining the test process. + err := sandlock.Confine(&sandlock.Sandbox{ + FSReadable: rootfs, + MaxMemory: "256M", + }) + if err == nil { + t.Fatal("expected Confine to reject supervisor-only config") + } + if !strings.Contains(err.Error(), "confine") { + t.Logf("Confine rejected with: %v", err) + } +}