From c5df3899c9d2443736b84100d7287c318c1383a4 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Tue, 2 Jun 2026 22:53:23 -0400 Subject: [PATCH 1/2] feat(sandbox): add workflow-sandbox-runner agent + remote runner config wiring (PR8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 15: cmd/workflow-sandbox-runner — gRPC server (SandboxExecService) with mTLS + bearer-token auth, profile clamping (permissive→standard), agent-side secret:// resolution via secrets.Provider, streaming stdout/stderr/exit_code chunks. Version via ldflags. Task 16: module/sandbox_remote_runners — new sandbox.remote_runners module type (SandboxRemoteRunnersModule) that parses named RemoteRunnerSpec list from config and exposes a RemoteRunnerRegistry service. resolveSandboxRunner treats any unknown exec_env as a named remote runner — looks up the registry via app.GetService, builds sandbox/remote.RemoteRunner from the spec + per-exec SandboxConfig. pipeline_step_sandbox_exec.go: exec_env validation deferred to Execute time. sandbox.SandboxConfig gains a Profile field. Registration: pipelinesteps plugin, schema coreModuleTypes + registerBuiltins, cmd/wfctl/type_registry.go, DOCUMENTATION.md, golden editor-schemas.json. Release: .github/workflows/release.yml build-binaries matrix gains sandbox-runner. Security review fixes (CHANGES_REQUESTED): - CRITICAL: secret:// runner tokens are now resolved through the module's configured secrets_provider (new config key) before dialing — a literal "secret://..." string is no longer sent as the Bearer header. A secret:// token with no provider is a hard error; literal tokens pass through. - CRITICAL: the agent refuses to start as an unauthenticated executor (no token AND no mTLS) unless --allow-unauthenticated is passed, which logs a loud warning. - IMPORTANT: bearer-token compare uses crypto/subtle.ConstantTimeCompare. - IMPORTANT: SandboxRemoteRunnersModule.Init rejects reserved names (""/local-docker/ephemeral), duplicate names, and no-auth-no-TLS runners (unless per-spec allow_insecure: true). - MINOR: drainStream now fails the test on unexpected stream errors; added a non-zero exit-code (exit 7) streaming test. - MINOR: removed the bogus Inputs port from the sandbox.remote_runners editor schema (provider-only module); regenerated golden. - MINOR: dropped windows/amd64 from the sandbox-runner release matrix (Linux Docker daemon target). Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/release.yml | 5 + DOCUMENTATION.md | 1 + cmd/wfctl/type_registry.go | 8 + cmd/workflow-sandbox-runner/main.go | 162 ++++++++ cmd/workflow-sandbox-runner/main_test.go | 35 ++ cmd/workflow-sandbox-runner/server.go | 188 +++++++++ cmd/workflow-sandbox-runner/server_test.go | 451 +++++++++++++++++++++ module/execenv_factory.go | 138 ++++++- module/execenv_factory_test.go | 201 ++++++++- module/pipeline_step_sandbox_exec.go | 14 +- module/sandbox_remote_runners.go | 279 +++++++++++++ module/sandbox_remote_runners_test.go | 252 ++++++++++++ plugins/pipelinesteps/plugin.go | 10 + sandbox/docker.go | 14 + sandbox/profile.go | 6 +- schema/module_schema.go | 16 + schema/schema.go | 1 + schema/testdata/editor-schemas.golden.json | 27 ++ 18 files changed, 1770 insertions(+), 38 deletions(-) create mode 100644 cmd/workflow-sandbox-runner/main.go create mode 100644 cmd/workflow-sandbox-runner/main_test.go create mode 100644 cmd/workflow-sandbox-runner/server.go create mode 100644 cmd/workflow-sandbox-runner/server_test.go create mode 100644 module/sandbox_remote_runners.go create mode 100644 module/sandbox_remote_runners_test.go diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b80bf0c4e..cc69c9a80 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -180,6 +180,11 @@ jobs: cmd: ./cmd/workflow-lsp-server prefix: workflow-lsp-server platforms: linux/amd64 linux/arm64 darwin/amd64 darwin/arm64 windows/amd64 + - name: sandbox-runner + cmd: ./cmd/workflow-sandbox-runner + prefix: workflow-sandbox-runner + # No windows: the agent shells to a Linux Docker daemon to run sandboxed containers. + platforms: linux/amd64 linux/arm64 darwin/amd64 darwin/arm64 steps: - name: Check out code diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md index d89aaf339..6a49fc95a 100644 --- a/DOCUMENTATION.md +++ b/DOCUMENTATION.md @@ -512,6 +512,7 @@ Use the generic `infra.*` module types with `provider: aws` and `step.iac_*` pip See [v0.53.0 migration guide](docs/migrations/v0.53.0-aws-iac-removal.md). | `iac.provider` | Cloud provider configuration (aws, gcp, azure, digitalocean) for IaC operations | platform | | `iac.state` | IaC state persistence (memory + filesystem + postgres in-core; spaces / s3 / gcs / azure_blob via plugins) | platform | +| `sandbox.remote_runners` | Named remote sandbox agent registry; exposes RemoteRunnerRegistry service so `step.sandbox_exec` can dispatch to remote agents via `exec_env: ` | pipelinesteps | | `infra.vpc` | Virtual Private Cloud and subnet management | platform | | `infra.database` | Managed database instance provisioning and configuration | platform | | `infra.cache` | In-memory cache cluster provisioning (Redis, Memcached) | platform | diff --git a/cmd/wfctl/type_registry.go b/cmd/wfctl/type_registry.go index 1a71827c0..c42ca1e70 100644 --- a/cmd/wfctl/type_registry.go +++ b/cmd/wfctl/type_registry.go @@ -364,6 +364,14 @@ func KnownModuleTypes() map[string]ModuleTypeInfo { ConfigKeys: []string{"componentId", "successTransition", "compensateTransition", "maxRetries", "retryBackoffMs", "timeoutSeconds"}, }, + // pipelinesteps plugin (sandbox runner config module) + "sandbox.remote_runners": { + Type: "sandbox.remote_runners", + Plugin: "pipelinesteps", + Stateful: false, + ConfigKeys: []string{"remote_runners", "secrets_provider"}, + }, + // secrets plugin "secrets.vault": { Type: "secrets.vault", diff --git a/cmd/workflow-sandbox-runner/main.go b/cmd/workflow-sandbox-runner/main.go new file mode 100644 index 000000000..1ee7c2d66 --- /dev/null +++ b/cmd/workflow-sandbox-runner/main.go @@ -0,0 +1,162 @@ +// Package main is the entrypoint for the workflow-sandbox-runner agent. +// +// The agent serves the SandboxExecService gRPC interface over mTLS + bearer-token auth. +// It resolves secret:// references in env values server-side before launching commands, +// and clamps requested security profiles to a safe maximum (permissive → standard). +// +// Design: docs/decisions/0019-remote-sandbox-agent.md (ADR 0019) +package main + +import ( + "crypto/tls" + "crypto/x509" + "flag" + "fmt" + "log/slog" + "net" + "os" + "runtime/debug" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + + pb "github.com/GoCodeAlone/workflow/plugin/external/proto" + "github.com/GoCodeAlone/workflow/secrets" +) + +// version is set at build time via -ldflags "-X main.version=". +// When built without ldflags (e.g. go run), buildVersion() reads the module +// version from the embedded build info, falling back to "dev". +var version = buildVersion() + +func buildVersion() string { + if info, ok := debug.ReadBuildInfo(); ok && info.Main.Version != "" && info.Main.Version != "(devel)" { + return info.Main.Version + } + return "dev" +} + +func main() { + showVersion := flag.Bool("version", false, "Print version and exit") + listenAddr := flag.String("addr", ":50051", "gRPC listen address (host:port)") + certFile := flag.String("tls-cert", "", "TLS certificate file (PEM)") + keyFile := flag.String("tls-key", "", "TLS private key file (PEM)") + caFile := flag.String("tls-ca", "", "CA certificate for mTLS client auth (PEM; enables mTLS when set). REQUIRED (or --token) unless --allow-unauthenticated.") + bearerToken := flag.String("token", "", "Expected bearer token for RPC auth (from SANDBOX_RUNNER_TOKEN env if unset). REQUIRED (or --tls-ca) unless --allow-unauthenticated.") + allowUnauth := flag.Bool("allow-unauthenticated", false, "Permit startup with NO auth (no token AND no mTLS). DANGEROUS — local/dev only, never production.") + secretsBackend := flag.String("secrets-backend", "env", "Secrets backend for secret:// resolution: env, file") + secretsDir := flag.String("secrets-dir", "", "Directory for 'file' secrets backend") + secretsEnvPrefix := flag.String("secrets-env-prefix", "", "Env-var prefix for 'env' secrets backend") + flag.Parse() + + if *showVersion { + fmt.Println(version) + os.Exit(0) + } + + // Allow token to come from environment for 12-factor deployment. + token := *bearerToken + if token == "" { + token = os.Getenv("SANDBOX_RUNNER_TOKEN") + } + + // Refuse to start as an unauthenticated remote code executor unless the + // operator explicitly opts in. "Authenticated" means a bearer token OR mTLS + // (a CA configured for client-cert verification). + if err := checkAuthRequirement(token, *caFile, *allowUnauth); err != nil { + slog.Error("sandbox-runner: refusing to start", "err", err) + os.Exit(1) + } + if *allowUnauth && token == "" && *caFile == "" { + slog.Warn("sandbox-runner: WARNING — running with NO authentication (no token, no mTLS); do NOT use in production") + } + + // Build secrets provider for server-side secret:// resolution. + var provider secrets.Provider + switch *secretsBackend { + case "file": + if *secretsDir == "" { + slog.Error("sandbox-runner: --secrets-dir is required for file backend") + os.Exit(1) + } + provider = secrets.NewFileProvider(*secretsDir) + default: // "env" + provider = secrets.NewEnvProvider(*secretsEnvPrefix) + } + + // Build gRPC server options. + serverOpts, err := buildServerOptions(*certFile, *keyFile, *caFile) + if err != nil { + slog.Error("sandbox-runner: failed to build TLS options", "err", err) + os.Exit(1) + } + + // Stream interceptor for bearer-token auth. + serverOpts = append(serverOpts, grpc.StreamInterceptor(newBearerStreamInterceptor(token))) + + grpcServer := grpc.NewServer(serverOpts...) + srv := newSandboxExecServer(provider, slog.Default()) + pb.RegisterSandboxExecServiceServer(grpcServer, srv) + + lis, err := net.Listen("tcp", *listenAddr) + if err != nil { + slog.Error("sandbox-runner: listen failed", "addr", *listenAddr, "err", err) + os.Exit(1) + } + + slog.Info("sandbox-runner: starting", "addr", *listenAddr, "version", version) + if err := grpcServer.Serve(lis); err != nil { + slog.Error("sandbox-runner: serve error", "err", err) + os.Exit(1) + } +} + +// checkAuthRequirement enforces that the agent — a remote command executor — +// has SOME authentication configured. Auth is satisfied by either a non-empty +// bearer token or mTLS (a CA file for client-cert verification). If neither is +// present, startup is refused unless allowUnauth is explicitly set. +func checkAuthRequirement(token, caFile string, allowUnauth bool) error { + if token != "" || caFile != "" { + return nil + } + if allowUnauth { + return nil + } + return fmt.Errorf("no authentication configured: set --token (bearer auth) or --tls-ca (mTLS), " + + "or pass --allow-unauthenticated for local/dev only (never production)") +} + +// buildServerOptions constructs gRPC server credentials from the supplied TLS files. +// If certFile and keyFile are empty, the server starts without TLS (test/dev only). +// If caFile is also set, mTLS client authentication is enabled. +func buildServerOptions(certFile, keyFile, caFile string) ([]grpc.ServerOption, error) { + if certFile == "" && keyFile == "" { + // No TLS — insecure mode for local development / testing. + return []grpc.ServerOption{grpc.Creds(insecure.NewCredentials())}, nil + } + if certFile == "" || keyFile == "" { + return nil, fmt.Errorf("both --tls-cert and --tls-key must be set together") + } + cert, err := tls.LoadX509KeyPair(certFile, keyFile) + if err != nil { + return nil, fmt.Errorf("load server cert/key: %w", err) + } + tlsCfg := &tls.Config{ + Certificates: []tls.Certificate{cert}, + MinVersion: tls.VersionTLS13, + } + if caFile != "" { + caPEM, err := os.ReadFile(caFile) //nolint:gosec // G304: path is operator-supplied via flag + if err != nil { + return nil, fmt.Errorf("read CA cert: %w", err) + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(caPEM) { + return nil, fmt.Errorf("failed to parse CA cert from %q", caFile) + } + tlsCfg.ClientCAs = pool + tlsCfg.ClientAuth = tls.RequireAndVerifyClientCert + } + return []grpc.ServerOption{grpc.Creds(credentials.NewTLS(tlsCfg))}, nil +} diff --git a/cmd/workflow-sandbox-runner/main_test.go b/cmd/workflow-sandbox-runner/main_test.go new file mode 100644 index 000000000..7f240ba48 --- /dev/null +++ b/cmd/workflow-sandbox-runner/main_test.go @@ -0,0 +1,35 @@ +package main + +import "testing" + +// TestCheckAuthRequirement covers the security gate that refuses to start an +// unauthenticated remote code executor unless --allow-unauthenticated is set. +func TestCheckAuthRequirement(t *testing.T) { + tests := []struct { + name string + token string + caFile string + allowUnauth bool + wantErr bool + }{ + {name: "token only", token: "tok", wantErr: false}, + {name: "mTLS only", caFile: "/etc/ca.crt", wantErr: false}, + {name: "token and mTLS", token: "tok", caFile: "/etc/ca.crt", wantErr: false}, + {name: "no auth — refused", wantErr: true}, + {name: "no auth + allow-unauthenticated — permitted", allowUnauth: true, wantErr: false}, + // allow-unauthenticated is a no-op when auth IS configured (no error either way). + {name: "token + allow-unauthenticated", token: "tok", allowUnauth: true, wantErr: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := checkAuthRequirement(tt.token, tt.caFile, tt.allowUnauth) + if tt.wantErr && err == nil { + t.Errorf("checkAuthRequirement(%q,%q,%v): expected error, got nil", tt.token, tt.caFile, tt.allowUnauth) + } + if !tt.wantErr && err != nil { + t.Errorf("checkAuthRequirement(%q,%q,%v): unexpected error: %v", tt.token, tt.caFile, tt.allowUnauth, err) + } + }) + } +} diff --git a/cmd/workflow-sandbox-runner/server.go b/cmd/workflow-sandbox-runner/server.go new file mode 100644 index 000000000..7fcc569a6 --- /dev/null +++ b/cmd/workflow-sandbox-runner/server.go @@ -0,0 +1,188 @@ +package main + +import ( + "context" + "crypto/subtle" + "fmt" + "log/slog" + "strings" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" + + pb "github.com/GoCodeAlone/workflow/plugin/external/proto" + "github.com/GoCodeAlone/workflow/sandbox" + "github.com/GoCodeAlone/workflow/secrets" +) + +// allowedProfiles is the set of accepted security profiles that the agent will honour. +// Any profile NOT in this set (e.g. "permissive", or unknown values) is clamped to "standard". +var allowedProfiles = map[string]bool{ + "strict": true, + "standard": true, +} + +// clampProfile enforces the agent-side profile allow-set (ADR 0019). +// "permissive" and any unknown profile are clamped to "standard". +// "strict" and "standard" are accepted unchanged. +func clampProfile(requested string) string { + if allowedProfiles[requested] { + return requested + } + return "standard" +} + +// sandboxRunnerFactory is a function that creates a SandboxRunner from a SandboxConfig. +// The default is sandbox.NewLocalDockerRunner; tests inject a fake. +type sandboxRunnerFactory func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) + +// sandboxExecServer implements pb.SandboxExecServiceServer. +type sandboxExecServer struct { + pb.UnimplementedSandboxExecServiceServer + provider secrets.Provider + log *slog.Logger + runnerFactory sandboxRunnerFactory +} + +// newSandboxExecServer creates a new sandboxExecServer using the default +// (local Docker) runner factory. +func newSandboxExecServer(provider secrets.Provider, log *slog.Logger) *sandboxExecServer { + return &sandboxExecServer{ + provider: provider, + log: log, + runnerFactory: sandbox.NewLocalDockerRunner, + } +} + +// newSandboxExecServerWithFactory creates a sandboxExecServer with an injected +// runner factory — used by tests to avoid requiring a Docker daemon. +func newSandboxExecServerWithFactory(provider secrets.Provider, log *slog.Logger, factory sandboxRunnerFactory) *sandboxExecServer { + return &sandboxExecServer{ + provider: provider, + log: log, + runnerFactory: factory, + } +} + +// Exec implements SandboxExecServiceServer. It: +// 1. Clamps the requested security profile to the allowed set. +// 2. Resolves any secret:// references in req.Env server-side. +// 3. Runs the command via a local Docker runner. +// 4. Streams stdout/stderr chunks and terminates with an exit_code chunk. +func (s *sandboxExecServer) Exec(req *pb.SandboxExecRequest, stream grpc.ServerStreamingServer[pb.SandboxExecChunk]) error { + ctx := stream.Context() + + // 1. Profile clamping. + clampedProfile := clampProfile(req.GetProfile()) + if clampedProfile != req.GetProfile() { + s.log.Warn("sandbox-runner: clamped requested profile", "requested", req.GetProfile(), "effective", clampedProfile) + } + + // 2. Agent-side secret resolution. + resolvedEnv, err := s.resolveEnv(ctx, req.GetEnv()) + if err != nil { + // Non-leaky error: don't include the key value or the secret ref in the + // gRPC status (it would be visible to the caller and could leak context). + return status.Errorf(codes.InvalidArgument, "failed to resolve one or more secret references in env") + } + + // 3. Build sandbox config and create runner. + sandboxCfg := sandbox.BuildSandboxConfig(clampedProfile, req.GetImage()) + if len(resolvedEnv) > 0 { + sandboxCfg.Env = resolvedEnv + } + if req.GetWorkdir() != "" { + sandboxCfg.WorkDir = req.GetWorkdir() + } + + runner, err := s.runnerFactory(sandboxCfg) + if err != nil { + return status.Errorf(codes.Internal, "failed to create sandbox runner") + } + defer runner.Close() //nolint:errcheck + + // 4. Execute the command and collect results. + result, err := runner.Exec(ctx, req.GetCommand()) + if err != nil { + return status.Errorf(codes.Internal, "sandbox execution failed") + } + + // Stream stdout if non-empty. + if result.Stdout != "" { + if sendErr := stream.Send(&pb.SandboxExecChunk{ + Chunk: &pb.SandboxExecChunk_Stdout{Stdout: []byte(result.Stdout)}, + }); sendErr != nil { + return sendErr + } + } + + // Stream stderr if non-empty. + if result.Stderr != "" { + if sendErr := stream.Send(&pb.SandboxExecChunk{ + Chunk: &pb.SandboxExecChunk_Stderr{Stderr: []byte(result.Stderr)}, + }); sendErr != nil { + return sendErr + } + } + + // Terminal exit_code chunk. + // ExitCode is an OS process exit code (0-255); int32 can hold the full range safely. //nolint:gosec // G115: safe cast, OS exit codes fit in int32 + return stream.Send(&pb.SandboxExecChunk{ + Chunk: &pb.SandboxExecChunk_ExitCode{ExitCode: int32(result.ExitCode)}, //nolint:gosec // G115 + }) +} + +// resolveEnv resolves all secret:// references in the env map. +// Returns a new map with resolved values. Non-secret values are passed through. +// If any reference cannot be resolved, an error is returned. +// NEVER log the resolved value — do not leak secret material to logs. +func (s *sandboxExecServer) resolveEnv(ctx context.Context, env map[string]string) (map[string]string, error) { + if len(env) == 0 { + return env, nil + } + resolver := secrets.NewResolver(s.provider) + resolved := make(map[string]string, len(env)) + for k, v := range env { + r, err := resolver.Resolve(ctx, v) + if err != nil { + // Log the key only (never the value or the resolved secret). + s.log.Error("sandbox-runner: failed to resolve secret ref", "env_key", k) + return nil, fmt.Errorf("resolve env key %q: %w", k, err) + } + resolved[k] = r + } + return resolved, nil +} + +// newBearerStreamInterceptor returns a gRPC StreamServerInterceptor that checks +// the "authorization: Bearer " metadata on every incoming streaming RPC. +// If token is empty, auth is disabled (useful for local development without a token). +func newBearerStreamInterceptor(token string) grpc.StreamServerInterceptor { + return func(srv any, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { + if token == "" { + // No auth configured — permit all. + return handler(srv, ss) + } + md, ok := metadata.FromIncomingContext(ss.Context()) + if !ok { + return status.Error(codes.Unauthenticated, "missing metadata") + } + authValues := md.Get("authorization") + if len(authValues) == 0 { + return status.Error(codes.Unauthenticated, "missing authorization header") + } + const prefix = "Bearer " + authHeader := authValues[0] + if !strings.HasPrefix(authHeader, prefix) { + return status.Error(codes.Unauthenticated, "authorization header must use Bearer scheme") + } + // Constant-time comparison to avoid leaking the token via timing. + got := authHeader[len(prefix):] + if subtle.ConstantTimeCompare([]byte(got), []byte(token)) != 1 { + return status.Error(codes.Unauthenticated, "invalid bearer token") + } + return handler(srv, ss) + } +} diff --git a/cmd/workflow-sandbox-runner/server_test.go b/cmd/workflow-sandbox-runner/server_test.go new file mode 100644 index 000000000..c9dd015c2 --- /dev/null +++ b/cmd/workflow-sandbox-runner/server_test.go @@ -0,0 +1,451 @@ +package main + +import ( + "context" + "io" + "log/slog" + "net" + "testing" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" + "google.golang.org/grpc/test/bufconn" + + pb "github.com/GoCodeAlone/workflow/plugin/external/proto" + "github.com/GoCodeAlone/workflow/sandbox" + "github.com/GoCodeAlone/workflow/secrets" +) + +const testBufSize = 1024 * 1024 + +// fakeRunner is an injectable sandbox.SandboxRunner that returns canned results. +type fakeRunner struct { + result *sandbox.ExecResult + err error + // capturedCmd is the command passed to Exec. + capturedCmd []string +} + +func (f *fakeRunner) Exec(_ context.Context, cmd []string) (*sandbox.ExecResult, error) { + f.capturedCmd = cmd + if f.err != nil { + return nil, f.err + } + if f.result == nil { + return &sandbox.ExecResult{ExitCode: 0, Stdout: "ok"}, nil + } + return f.result, nil +} + +func (f *fakeRunner) Close() error { return nil } + +// fakeProvider is a secrets.Provider that resolves from a fixed map. +type fakeProvider struct { + values map[string]string +} + +func (p *fakeProvider) Name() string { return "fake" } +func (p *fakeProvider) Get(_ context.Context, key string) (string, error) { + if v, ok := p.values[key]; ok { + return v, nil + } + return "", secrets.ErrNotFound +} +func (p *fakeProvider) Set(_ context.Context, _, _ string) error { return secrets.ErrUnsupported } +func (p *fakeProvider) Delete(_ context.Context, _ string) error { return secrets.ErrUnsupported } +func (p *fakeProvider) List(_ context.Context) ([]string, error) { return nil, secrets.ErrUnsupported } + +// buildTestServer starts an in-process gRPC server using a bufconn listener and +// returns a client connected to it. The provided runnerFactory is injected into +// the server so tests don't require a Docker daemon. +func buildTestServer(t *testing.T, provider secrets.Provider, token string, factory sandboxRunnerFactory) (pb.SandboxExecServiceClient, func()) { + t.Helper() + + lis := bufconn.Listen(testBufSize) + srv := grpc.NewServer(grpc.StreamInterceptor(newBearerStreamInterceptor(token))) + s := newSandboxExecServerWithFactory(provider, slog.Default(), factory) + pb.RegisterSandboxExecServiceServer(srv, s) + + go func() { _ = srv.Serve(lis) }() + + conn, err := grpc.NewClient("passthrough://bufnet", + grpc.WithContextDialer(func(_ context.Context, _ string) (net.Conn, error) { + return lis.Dial() + }), + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + if err != nil { + t.Fatalf("dial bufconn: %v", err) + } + + cleanup := func() { + conn.Close() //nolint:errcheck + srv.GracefulStop() + } + return pb.NewSandboxExecServiceClient(conn), cleanup +} + +// drainStream collects all chunks from a streaming exec RPC and returns them. +// It is used by success-path tests, so any non-EOF stream error is a test failure +// (t.Error) rather than being silently swallowed. Error-path tests use their own +// inline Recv loops and do not call drainStream. +func drainStream(t *testing.T, stream pb.SandboxExecService_ExecClient) []*pb.SandboxExecChunk { + t.Helper() + var chunks []*pb.SandboxExecChunk + for { + chunk, err := stream.Recv() + if err == io.EOF { + break + } + if err != nil { + t.Errorf("drainStream: unexpected stream error: %v", err) + return chunks + } + chunks = append(chunks, chunk) + } + return chunks +} + +// --- clampProfile unit tests --- + +func TestClampProfile_AllowedProfiles(t *testing.T) { + for _, p := range []string{"strict", "standard"} { + got := clampProfile(p) + if got != p { + t.Errorf("clampProfile(%q) = %q; want %q (should be unchanged)", p, got, p) + } + } +} + +func TestClampProfile_PermissiveClamped(t *testing.T) { + got := clampProfile("permissive") + if got != "standard" { + t.Errorf("clampProfile(permissive) = %q; want %q", got, "standard") + } +} + +func TestClampProfile_UnknownClamped(t *testing.T) { + for _, p := range []string{"", "root", "unsafe", "anything-else"} { + got := clampProfile(p) + if got != "standard" { + t.Errorf("clampProfile(%q) = %q; want standard", p, got) + } + } +} + +// --- Profile-clamping integration test --- + +// TestExec_ProfileClamp verifies that a "permissive" request is clamped to +// "standard" on the server side (the runner sees the standard SandboxConfig, +// not a permissive one). +func TestExec_ProfileClamp(t *testing.T) { + var capturedConfig sandbox.SandboxConfig + factory := func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + capturedConfig = cfg + return &fakeRunner{}, nil + } + + client, cleanup := buildTestServer(t, &fakeProvider{values: map[string]string{}}, "", factory) + defer cleanup() + + stream, err := client.Exec(context.Background(), &pb.SandboxExecRequest{ + Profile: "permissive", + Image: "alpine:3.19", + Command: []string{"echo", "hi"}, + }) + if err != nil { + t.Fatalf("Exec RPC: %v", err) + } + _ = drainStream(t, stream) + + // The server should have built a "standard" SandboxConfig, not "permissive". + // Standard profile sets MemoryLimit to 256MB (see sandbox.BuildSandboxConfig). + expectedStdCfg := sandbox.BuildSandboxConfig("standard", "alpine:3.19") + if capturedConfig.MemoryLimit != expectedStdCfg.MemoryLimit { + t.Errorf("profile clamp: got MemoryLimit=%d (permissive=0), want %d (standard)", capturedConfig.MemoryLimit, expectedStdCfg.MemoryLimit) + } + if capturedConfig.NetworkMode != expectedStdCfg.NetworkMode { + t.Errorf("profile clamp: got NetworkMode=%q, want %q", capturedConfig.NetworkMode, expectedStdCfg.NetworkMode) + } +} + +// --- Secret resolution tests --- + +func TestExec_SecretResolution(t *testing.T) { + provider := &fakeProvider{ + values: map[string]string{ + "db/password": "s3cr3t!", + }, + } + + var capturedConfig sandbox.SandboxConfig + factory := func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + capturedConfig = cfg + return &fakeRunner{}, nil + } + + client, cleanup := buildTestServer(t, provider, "", factory) + defer cleanup() + + stream, err := client.Exec(context.Background(), &pb.SandboxExecRequest{ + Profile: "standard", + Image: "alpine:3.19", + Command: []string{"env"}, + Env: map[string]string{ + "DB_PASS": "secret://db/password", + "PLAIN": "no-secret-here", + }, + }) + if err != nil { + t.Fatalf("Exec RPC: %v", err) + } + _ = drainStream(t, stream) + + // The env the runner receives must have the resolved value. + if capturedConfig.Env["DB_PASS"] != "s3cr3t!" { + t.Errorf("DB_PASS not resolved: got %q, want %q", capturedConfig.Env["DB_PASS"], "s3cr3t!") + } + // Non-secret values must be passed through unchanged. + if capturedConfig.Env["PLAIN"] != "no-secret-here" { + t.Errorf("PLAIN: got %q, want %q", capturedConfig.Env["PLAIN"], "no-secret-here") + } +} + +func TestExec_SecretResolution_MissingRef_Error(t *testing.T) { + // Provider has no entry for the requested key. + provider := &fakeProvider{values: map[string]string{}} + + factory := func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + return &fakeRunner{}, nil + } + + client, cleanup := buildTestServer(t, provider, "", factory) + defer cleanup() + + stream, err := client.Exec(context.Background(), &pb.SandboxExecRequest{ + Profile: "standard", + Image: "alpine:3.19", + Command: []string{"env"}, + Env: map[string]string{ + "MISSING": "secret://does-not-exist", + }, + }) + if err != nil { + // Some gRPC versions return the error directly from the client call. + st, ok := status.FromError(err) + if !ok || st.Code() != codes.InvalidArgument { + t.Fatalf("expected InvalidArgument, got: %v", err) + } + return + } + // If the error comes from the stream, drainStream will capture it. + for { + _, recvErr := stream.Recv() + if recvErr == io.EOF { + t.Fatal("expected error from stream, got EOF") + } + if recvErr != nil { + st, ok := status.FromError(recvErr) + if !ok { + t.Fatalf("non-status error: %v", recvErr) + } + if st.Code() != codes.InvalidArgument { + t.Errorf("expected InvalidArgument, got %v: %v", st.Code(), recvErr) + } + return + } + } +} + +// --- Auth interceptor tests --- + +// TestAuth_NoToken_Configured_PermitsAll covers the interceptor behavior when the +// agent is started WITHOUT a token. NOTE: as of the security review fix, the agent +// only reaches this state when the operator passed --allow-unauthenticated (see +// checkAuthRequirement / TestCheckAuthRequirement in main_test.go) — otherwise main +// refuses to start. The interceptor itself permits all when no token is configured. +func TestAuth_NoToken_Configured_PermitsAll(t *testing.T) { + client, cleanup := buildTestServer(t, &fakeProvider{}, "" /* empty = no auth; requires --allow-unauthenticated to reach */, func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + return &fakeRunner{}, nil + }) + defer cleanup() + + // No auth header — should be allowed when no token is configured. + stream, err := client.Exec(context.Background(), &pb.SandboxExecRequest{ + Profile: "standard", Image: "alpine", Command: []string{"true"}, + }) + if err != nil { + t.Fatalf("expected success without auth configured, got: %v", err) + } + _ = drainStream(t, stream) +} + +func TestAuth_CorrectToken_Accepted(t *testing.T) { + const token = "secret-token-abc" + client, cleanup := buildTestServer(t, &fakeProvider{}, token, func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + return &fakeRunner{}, nil + }) + defer cleanup() + + ctx := metadata.NewOutgoingContext(context.Background(), metadata.Pairs("authorization", "Bearer "+token)) + stream, err := client.Exec(ctx, &pb.SandboxExecRequest{ + Profile: "standard", Image: "alpine", Command: []string{"true"}, + }) + if err != nil { + t.Fatalf("Exec with correct token: %v", err) + } + _ = drainStream(t, stream) +} + +func TestAuth_WrongToken_Unauthenticated(t *testing.T) { + const token = "secret-token-abc" + client, cleanup := buildTestServer(t, &fakeProvider{}, token, func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + return &fakeRunner{}, nil + }) + defer cleanup() + + ctx := metadata.NewOutgoingContext(context.Background(), metadata.Pairs("authorization", "Bearer wrong-token")) + stream, err := client.Exec(ctx, &pb.SandboxExecRequest{ + Profile: "standard", Image: "alpine", Command: []string{"true"}, + }) + if err != nil { + st, _ := status.FromError(err) + if st.Code() != codes.Unauthenticated { + t.Errorf("expected Unauthenticated, got %v", st.Code()) + } + return + } + // Error may arrive on first Recv. + _, recvErr := stream.Recv() + if recvErr == nil || recvErr == io.EOF { + t.Fatal("expected Unauthenticated error") + } + st, _ := status.FromError(recvErr) + if st.Code() != codes.Unauthenticated { + t.Errorf("expected Unauthenticated, got %v: %v", st.Code(), recvErr) + } +} + +func TestAuth_MissingToken_Unauthenticated(t *testing.T) { + const token = "secret-token-abc" + client, cleanup := buildTestServer(t, &fakeProvider{}, token, func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + return &fakeRunner{}, nil + }) + defer cleanup() + + // No auth header at all. + stream, err := client.Exec(context.Background(), &pb.SandboxExecRequest{ + Profile: "standard", Image: "alpine", Command: []string{"true"}, + }) + if err != nil { + st, _ := status.FromError(err) + if st.Code() != codes.Unauthenticated { + t.Errorf("expected Unauthenticated, got %v", st.Code()) + } + return + } + _, recvErr := stream.Recv() + if recvErr == nil || recvErr == io.EOF { + t.Fatal("expected Unauthenticated error") + } + st, _ := status.FromError(recvErr) + if st.Code() != codes.Unauthenticated { + t.Errorf("expected Unauthenticated, got %v: %v", st.Code(), recvErr) + } +} + +// --- Stream output test --- + +func TestExec_OutputChunks(t *testing.T) { + factory := func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + return &fakeRunner{ + result: &sandbox.ExecResult{ + ExitCode: 0, + Stdout: "hello stdout", + Stderr: "hello stderr", + }, + }, nil + } + + client, cleanup := buildTestServer(t, &fakeProvider{}, "", factory) + defer cleanup() + + stream, err := client.Exec(context.Background(), &pb.SandboxExecRequest{ + Profile: "standard", Image: "alpine", Command: []string{"echo", "hi"}, + }) + if err != nil { + t.Fatalf("Exec: %v", err) + } + + chunks := drainStream(t, stream) + var gotStdout, gotStderr bool + var gotExitCode bool + for _, chunk := range chunks { + switch v := chunk.Chunk.(type) { + case *pb.SandboxExecChunk_Stdout: + gotStdout = true + if string(v.Stdout) != "hello stdout" { + t.Errorf("stdout: got %q, want %q", v.Stdout, "hello stdout") + } + case *pb.SandboxExecChunk_Stderr: + gotStderr = true + if string(v.Stderr) != "hello stderr" { + t.Errorf("stderr: got %q, want %q", v.Stderr, "hello stderr") + } + case *pb.SandboxExecChunk_ExitCode: + gotExitCode = true + if v.ExitCode != 0 { + t.Errorf("exit_code: got %d, want 0", v.ExitCode) + } + } + } + if !gotStdout { + t.Error("expected stdout chunk, got none") + } + if !gotStderr { + t.Error("expected stderr chunk, got none") + } + if !gotExitCode { + t.Error("expected exit_code chunk, got none") + } +} + +// TestExec_NonZeroExitCode verifies a non-zero command exit code is faithfully +// streamed as the terminal exit_code chunk (a failing command must not be +// reported as exit 0). +func TestExec_NonZeroExitCode(t *testing.T) { + factory := func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + return &fakeRunner{ + result: &sandbox.ExecResult{ExitCode: 7, Stderr: "boom"}, + }, nil + } + + client, cleanup := buildTestServer(t, &fakeProvider{}, "", factory) + defer cleanup() + + stream, err := client.Exec(context.Background(), &pb.SandboxExecRequest{ + Profile: "standard", Image: "alpine", Command: []string{"false"}, + }) + if err != nil { + t.Fatalf("Exec: %v", err) + } + + chunks := drainStream(t, stream) + var exitCode int32 = -1 + var gotExit bool + for _, chunk := range chunks { + if v, ok := chunk.Chunk.(*pb.SandboxExecChunk_ExitCode); ok { + gotExit = true + exitCode = v.ExitCode + } + } + if !gotExit { + t.Fatal("expected terminal exit_code chunk, got none") + } + if exitCode != 7 { + t.Errorf("exit_code: got %d, want 7", exitCode) + } +} diff --git a/module/execenv_factory.go b/module/execenv_factory.go index bbd9fc2c8..49c2d34de 100644 --- a/module/execenv_factory.go +++ b/module/execenv_factory.go @@ -1,36 +1,142 @@ package module import ( + "context" "fmt" + "strings" "github.com/GoCodeAlone/modular" "github.com/GoCodeAlone/workflow/sandbox" + "github.com/GoCodeAlone/workflow/sandbox/remote" + "github.com/GoCodeAlone/workflow/secrets" ) // resolveSandboxRunner returns a SandboxRunner for the requested execution environment. // // Supported values for execEnv: -// - "" or "local-docker" — local Docker daemon (current default behaviour). +// - "" or "local-docker" — local Docker daemon (default). +// - any registered runner name — dispatches to the named RemoteRunner from the +// sandbox.remote_runners registry (PR8). The registry is looked up from the +// modular service registry via app. If the name is not registered, a clear +// error is returned at runtime (step Execute time). // -// Deferred values (will be wired in later PRs): -// - "remote" — remote runner (PR7/PR8) -// - "ephemeral" — ephemeral/cloud runner (PR8/PR9) +// Deferred values: +// - "ephemeral" — ephemeral/cloud runner (PR9) // -// Any other value returns a clear error at runner-resolution (step Execute) -// time. Note: the step factory ALSO rejects unsupported exec_env values at -// pipeline-construction time, so a mis-spelled value normally fails earlier; -// this runtime guard is the backstop. -// NOTE: the `app` parameter is intentionally reserved (named `_` today) — PR7/PR8 -// (remote runner) + PR9 (Argo) resolve their runner config from the service -// registry via `app`. Do NOT drop it from the signature when wiring those cases. -func resolveSandboxRunner(_ modular.Application, execEnv string, cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { +// Validation strategy: the step factory (pipeline_step_sandbox_exec.go) no longer +// rejects non-local-docker exec_env values at construction time — named runners are +// determined by config at runtime, not build time. Any unresolved name (not in the +// registry) returns an error at Execute time, which is the appropriate gate (same as +// other service-name references in the pipeline). +func resolveSandboxRunner(ctx context.Context, app modular.Application, execEnv string, cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { switch execEnv { case "", "local-docker": return sandbox.NewLocalDockerRunner(cfg) - case "remote", "ephemeral": - // TODO(PR7/PR8/PR9): wire remote and ephemeral runners here. - return nil, fmt.Errorf("sandbox_exec: exec_env %q not yet configured (deferred to PR7/PR8/PR9)", execEnv) + case "ephemeral": + // TODO(PR9): wire ephemeral/cloud runner here. + return nil, fmt.Errorf("sandbox_exec: exec_env %q not yet configured (deferred to PR9)", execEnv) default: - return nil, fmt.Errorf("sandbox_exec: exec_env %q not configured", execEnv) + // Treat execEnv as a named remote runner. Look it up in the service registry. + return resolveNamedRemoteRunner(ctx, app, execEnv, cfg) } } + +// resolveNamedRemoteRunner looks up a RemoteRunnerSpec by name from the +// RemoteRunnerRegistry service, resolves the spec's bearer token through the +// configured secrets provider, builds a RemoteRunner from the spec, and returns +// it wired with the per-exec SandboxConfig (profile, image, env, workdir). +// +// The app parameter may be nil in unit tests that don't exercise the remote path; +// a nil app with a non-local execEnv returns a clear "no registry" error. +func resolveNamedRemoteRunner(ctx context.Context, app modular.Application, name string, cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + if app == nil { + return nil, fmt.Errorf("sandbox_exec: exec_env %q not configured (no application context)", name) + } + + var registry *RemoteRunnerRegistry + if err := app.GetService(SandboxRemoteRunnerServiceName, ®istry); err != nil || registry == nil { + return nil, fmt.Errorf("sandbox_exec: exec_env %q not configured (no sandbox.remote_runners module)", name) + } + + spec, ok := registry.Get(name) + if !ok { + return nil, fmt.Errorf("sandbox_exec: exec_env %q not configured (no runner named %q in sandbox.remote_runners)", name, name) + } + + // Resolve the bearer token. It may be a secret:// reference, in which case it + // MUST be resolved to its literal value before being sent as the Bearer header. + resolvedToken, err := resolveRunnerToken(ctx, app, registry.SecretsProvider(), spec.Token, name) + if err != nil { + return nil, err + } + + runnerCfg := remote.RemoteRunnerConfig{ + Profile: cfg.GetProfile(), + Image: cfg.Image, + Env: cfg.Env, + WorkDir: cfg.WorkDir, + } + + runner, err := buildRemoteRunnerFromSpec(spec, resolvedToken, runnerCfg) + if err != nil { + return nil, fmt.Errorf("sandbox_exec: exec_env %q: build remote runner: %w", name, err) + } + return runner, nil +} + +// resolveRunnerToken resolves a runner's bearer token. A literal (non-secret://) +// token passes through unchanged. A secret:// reference is resolved through the +// named secrets provider; if no provider is configured (providerName == ""), a +// secret:// token is a configuration error — we MUST NOT send the literal +// "secret://..." string as the Bearer header (the agent would reject it). +// +// The resolved token value is NEVER logged (it is a credential). +func resolveRunnerToken(ctx context.Context, app modular.Application, providerName, token, runnerName string) (string, error) { + if token == "" { + return "", nil + } + if !strings.HasPrefix(token, secrets.SecretPrefix) { + // Literal token — pass through unchanged. + return token, nil + } + if providerName == "" { + return "", fmt.Errorf("sandbox_exec: exec_env %q: token is a %s reference but the sandbox.remote_runners module has no 'secrets_provider' configured to resolve it", runnerName, secrets.SecretPrefix) + } + + provider, err := resolveSecretsProviderFromRegistry(app, providerName) + if err != nil { + return "", fmt.Errorf("sandbox_exec: exec_env %q: %w", runnerName, err) + } + + resolved, err := secrets.NewResolver(provider).Resolve(ctx, token) + if err != nil { + // Do not echo the token value or the resolved secret into the error. + return "", fmt.Errorf("sandbox_exec: exec_env %q: failed to resolve token secret reference", runnerName) + } + return resolved, nil +} + +// resolveSecretsProviderFromRegistry resolves a secrets.Provider from the service +// registry by name. The service may implement secrets.Provider directly or expose +// a Provider() accessor (mirrors resolveSecretsProvider in +// pipeline_step_iac_secret_reachability.go). +func resolveSecretsProviderFromRegistry(app modular.Application, providerName string) (secrets.Provider, error) { + svc, ok := app.SvcRegistry()[providerName] + if !ok { + return nil, fmt.Errorf("secrets provider %q not found in service registry", providerName) + } + if p, ok := svc.(secrets.Provider); ok { + return p, nil + } + type providerAccessor interface { + Provider() secrets.Provider + } + if acc, ok := svc.(providerAccessor); ok { + p := acc.Provider() + if p == nil { + return nil, fmt.Errorf("secrets provider %q exposes Provider() accessor but returned nil; module may not be started", providerName) + } + return p, nil + } + return nil, fmt.Errorf("secrets provider %q does not implement secrets.Provider directly or via Provider() accessor", providerName) +} diff --git a/module/execenv_factory_test.go b/module/execenv_factory_test.go index 1d37df92a..fdf387ad2 100644 --- a/module/execenv_factory_test.go +++ b/module/execenv_factory_test.go @@ -1,10 +1,12 @@ package module import ( + "context" "strings" "testing" "github.com/GoCodeAlone/workflow/sandbox" + "github.com/GoCodeAlone/workflow/secrets" ) // TestExecEnvFactory_DefaultLocalDocker verifies that an empty execEnv or @@ -13,7 +15,7 @@ func TestExecEnvFactory_DefaultLocalDocker(t *testing.T) { cfg := sandbox.SandboxConfig{Image: "alpine:3.19"} for _, execEnv := range []string{"", "local-docker"} { - runner, err := resolveSandboxRunner(nil, execEnv, cfg) + runner, err := resolveSandboxRunner(context.Background(), nil, execEnv, cfg) if err != nil { // Runner creation uses the Docker client env (DOCKER_HOST/TLS); a // failure here is a Docker-availability issue, not an exec_env-routing @@ -30,6 +32,11 @@ func TestExecEnvFactory_DefaultLocalDocker(t *testing.T) { // TestExecEnvFactory_UnknownExecEnv_Error verifies that unknown or deferred // exec_env values return a clear error rather than silently falling through. +// +// As of PR8, exec_env values other than "" / "local-docker" / "ephemeral" are +// treated as named remote runner names. Without an application context (nil app), +// they all return a "no application context" error. The reserved "ephemeral" +// value is still explicitly deferred to PR9 with its own message. func TestExecEnvFactory_UnknownExecEnv_Error(t *testing.T) { cfg := sandbox.SandboxConfig{Image: "alpine:3.19"} @@ -37,14 +44,16 @@ func TestExecEnvFactory_UnknownExecEnv_Error(t *testing.T) { execEnv string errContains string }{ - {"remote", "not yet configured"}, + // "ephemeral" is still explicitly deferred. {"ephemeral", "not yet configured"}, + // Named runner names fail with "no application context" when app is nil. + {"remote", "not configured"}, {"nope", "not configured"}, {"argo", "not configured"}, } for _, tt := range tests { - runner, err := resolveSandboxRunner(nil, tt.execEnv, cfg) + runner, err := resolveSandboxRunner(context.Background(), nil, tt.execEnv, cfg) if err == nil { t.Errorf("execEnv=%q: expected error, got nil runner=%v", tt.execEnv, runner) if runner != nil { @@ -79,7 +88,7 @@ func TestSandboxExec_ExecEnvAbsent_Unchanged(t *testing.T) { // Confirm the factory resolves it to a local runner without error. cfg := s.buildSandboxConfig() - runner, err := resolveSandboxRunner(s.app, s.execEnv, cfg) + runner, err := resolveSandboxRunner(context.Background(), s.app, s.execEnv, cfg) if err != nil { t.Skipf("resolveSandboxRunner with empty execEnv: docker client unavailable: %v", err) } @@ -106,7 +115,7 @@ func TestSandboxExec_ExecEnvLocalDocker_ExplicitlySet(t *testing.T) { } cfg := s.buildSandboxConfig() - runner, err := resolveSandboxRunner(s.app, s.execEnv, cfg) + runner, err := resolveSandboxRunner(context.Background(), s.app, s.execEnv, cfg) if err != nil { t.Skipf("docker client unavailable: %v", err) } @@ -116,18 +125,182 @@ func TestSandboxExec_ExecEnvLocalDocker_ExplicitlySet(t *testing.T) { _ = runner.Close() } -// TestSandboxExec_Factory_InvalidExecEnv verifies the step factory rejects an -// unsupported exec_env at construction time (fail-early), not at Execute time. -func TestSandboxExec_Factory_InvalidExecEnv(t *testing.T) { +// TestSandboxExec_Factory_ValidExecEnv verifies the step factory accepts exec_env +// values at construction time. As of PR8, named remote runner names are allowed at +// construction time (validation is deferred to Execute time, when the service registry +// is consulted). Only "local-docker" and the empty string are local runners; all +// other non-empty strings are accepted as potential remote runner names. +func TestSandboxExec_Factory_ValidExecEnv(t *testing.T) { app := NewMockApplication() factory := NewSandboxExecStepFactory() - for _, ee := range []string{"remote", "ephemeral", "nope"} { - if _, err := factory("sb", map[string]any{"image": "alpine", "exec_env": ee}, app); err == nil { - t.Errorf("exec_env %q: expected factory error, got nil", ee) + + // All of these must now succeed at construction time. + // "remote", "ephemeral", and arbitrary names are accepted — errors appear at Execute time. + for _, ee := range []string{"local-docker", "", "remote", "ephemeral", "nope", "prod-runner"} { + if _, err := factory("sb", map[string]any{"image": "alpine", "exec_env": ee}, app); err != nil { + t.Errorf("exec_env %q: unexpected factory error: %v", ee, err) } } - // local-docker + absent must still succeed. - if _, err := factory("sb", map[string]any{"image": "alpine", "exec_env": "local-docker"}, app); err != nil { - t.Errorf("exec_env local-docker: unexpected factory error: %v", err) +} + +// mapSecretsProvider is an in-memory secrets.Provider for token-resolution tests. +type mapSecretsProvider struct { + vals map[string]string +} + +func (p *mapSecretsProvider) Name() string { return "map" } +func (p *mapSecretsProvider) Get(_ context.Context, key string) (string, error) { + if v, ok := p.vals[key]; ok { + return v, nil + } + return "", secrets.ErrNotFound +} +func (p *mapSecretsProvider) Set(_ context.Context, _, _ string) error { return secrets.ErrUnsupported } +func (p *mapSecretsProvider) Delete(_ context.Context, _ string) error { return secrets.ErrUnsupported } +func (p *mapSecretsProvider) List(_ context.Context) ([]string, error) { + return nil, secrets.ErrUnsupported +} + +// TestResolveRunnerToken_SecretRefResolvesThroughProvider is the CRITICAL test: +// a spec token "secret://x" + a configured provider must resolve to the literal +// secret value — NOT pass the "secret://x" string through verbatim. +func TestResolveRunnerToken_SecretRefResolvesThroughProvider(t *testing.T) { + app := NewMockApplication() + provider := &mapSecretsProvider{vals: map[string]string{"runner/prod-token": "RESOLVED-SECRET-VALUE"}} + if err := app.RegisterService("my-vault", provider); err != nil { + t.Fatalf("RegisterService: %v", err) + } + + got, err := resolveRunnerToken(context.Background(), app, "my-vault", "secret://runner/prod-token", "prod-runner") + if err != nil { + t.Fatalf("resolveRunnerToken: %v", err) + } + if got != "RESOLVED-SECRET-VALUE" { + t.Errorf("token: got %q, want %q (must NOT pass secret:// through verbatim)", got, "RESOLVED-SECRET-VALUE") + } +} + +// TestResolveRunnerToken_LiteralPassesThrough verifies a literal (non-secret://) +// token is returned unchanged even when no provider is configured. +func TestResolveRunnerToken_LiteralPassesThrough(t *testing.T) { + app := NewMockApplication() + got, err := resolveRunnerToken(context.Background(), app, "", "literal-token", "r1") + if err != nil { + t.Fatalf("resolveRunnerToken: %v", err) + } + if got != "literal-token" { + t.Errorf("token: got %q, want %q", got, "literal-token") + } +} + +// TestResolveRunnerToken_Empty returns empty without error. +func TestResolveRunnerToken_Empty(t *testing.T) { + app := NewMockApplication() + got, err := resolveRunnerToken(context.Background(), app, "", "", "r1") + if err != nil { + t.Fatalf("resolveRunnerToken: %v", err) + } + if got != "" { + t.Errorf("token: got %q, want empty", got) + } +} + +// TestResolveRunnerToken_SecretRefNoProvider_Error verifies a secret:// token +// with NO configured provider is a hard error — we must NOT send the literal +// "secret://..." string as the Bearer header. +func TestResolveRunnerToken_SecretRefNoProvider_Error(t *testing.T) { + app := NewMockApplication() + _, err := resolveRunnerToken(context.Background(), app, "", "secret://runner/token", "r1") + if err == nil { + t.Fatal("expected error for secret:// token with no provider, got nil") + } +} + +// TestResolveRunnerToken_ProviderMissingRef_Error verifies a non-leaky error +// when the provider cannot resolve the reference. +func TestResolveRunnerToken_ProviderMissingRef_Error(t *testing.T) { + app := NewMockApplication() + provider := &mapSecretsProvider{vals: map[string]string{}} + if err := app.RegisterService("my-vault", provider); err != nil { + t.Fatalf("RegisterService: %v", err) + } + _, err := resolveRunnerToken(context.Background(), app, "my-vault", "secret://missing", "r1") + if err == nil { + t.Fatal("expected error for unresolvable secret ref, got nil") + } + // The error must not echo the secret reference value. + if strings.Contains(err.Error(), "missing") { + t.Errorf("error leaks the secret key/ref: %v", err) + } +} + +// TestResolveNamedRemoteRunner_SecretTokenReachesConfig is the end-to-end CRITICAL +// check: a registered runner with a secret:// token resolves the token through the +// provider before the RemoteRunner dials (allow_insecure lets the no-TLS+token build +// succeed in-test). We assert the runner builds without error, proving the resolved +// (non-secret://) value reached RemoteRunnerConfig.Token — a verbatim secret:// would +// not change the build outcome here, so we additionally cover the value path via the +// resolveRunnerToken unit test above. +func TestResolveNamedRemoteRunner_SecretTokenBuilds(t *testing.T) { + app := NewMockApplication() + provider := &mapSecretsProvider{vals: map[string]string{"runner/tok": "REAL-TOKEN"}} + if err := app.RegisterService("vault", provider); err != nil { + t.Fatalf("RegisterService: %v", err) + } + + mod := NewSandboxRemoteRunnersModule("rn", map[string]any{ + "secrets_provider": "vault", + "remote_runners": []any{ + map[string]any{ + "name": "prod", + "address": "localhost:50051", + "token": "secret://runner/tok", + "allow_insecure": true, // permit token over no-TLS in-test + }, + }, + }) + if err := mod.Init(app); err != nil { + t.Fatalf("module Init: %v", err) + } + if err := app.RegisterService(SandboxRemoteRunnerServiceName, mod.registry); err != nil { + t.Fatalf("RegisterService(registry): %v", err) + } + + runner, err := resolveSandboxRunner(context.Background(), app, "prod", sandbox.SandboxConfig{Image: "alpine", Profile: "standard"}) + if err != nil { + t.Fatalf("resolveSandboxRunner: %v", err) + } + if runner == nil { + t.Fatal("expected non-nil runner") + } + _ = runner.Close() +} + +// TestResolveNamedRemoteRunner_SecretTokenNoProvider_Errors verifies the +// end-to-end path refuses to build a runner when a secret:// token cannot be +// resolved (no provider) — instead of silently sending the literal ref. +func TestResolveNamedRemoteRunner_SecretTokenNoProvider_Errors(t *testing.T) { + app := NewMockApplication() + mod := NewSandboxRemoteRunnersModule("rn", map[string]any{ + // no secrets_provider configured + "remote_runners": []any{ + map[string]any{ + "name": "prod", + "address": "localhost:50051", + "token": "secret://runner/tok", + "allow_insecure": true, + }, + }, + }) + if err := mod.Init(app); err != nil { + t.Fatalf("module Init: %v", err) + } + if err := app.RegisterService(SandboxRemoteRunnerServiceName, mod.registry); err != nil { + t.Fatalf("RegisterService(registry): %v", err) + } + + _, err := resolveSandboxRunner(context.Background(), app, "prod", sandbox.SandboxConfig{Image: "alpine"}) + if err == nil { + t.Fatal("expected error: secret:// token with no provider must not build a runner") } } diff --git a/module/pipeline_step_sandbox_exec.go b/module/pipeline_step_sandbox_exec.go index e28701ada..cc53acf52 100644 --- a/module/pipeline_step_sandbox_exec.go +++ b/module/pipeline_step_sandbox_exec.go @@ -95,12 +95,12 @@ func NewSandboxExecStepFactory() StepFactory { } if ee, ok := cfg["exec_env"].(string); ok && ee != "" { - // Validate at factory time (fail-early, like security_profile). Only - // local-docker is wired in this phase; remote/ephemeral are added to - // this allow-list when PR7/PR8 (remote-runner) and PR9 (Argo) wire them. - if ee != "local-docker" { - return nil, fmt.Errorf("sandbox_exec step %q: exec_env %q not supported (only local-docker is wired; remote/ephemeral arrive in later phases)", name, ee) - } + // exec_env validation: "local-docker" is the local runner; + // any other non-empty string is treated as a named remote runner and + // validated at Execute time by resolveSandboxRunner (PR8). We no longer + // reject unknown values at construction time since named runner + // registrations are config-driven and not known until runtime. + // The reserved "ephemeral" value is still deferred to PR9. step.execEnv = ee } @@ -139,7 +139,7 @@ func (s *SandboxExecStep) Name() string { return s.name } func (s *SandboxExecStep) Execute(ctx context.Context, _ *PipelineContext) (*StepResult, error) { sbCfg := s.buildSandboxConfig() - sb, err := resolveSandboxRunner(s.app, s.execEnv, sbCfg) + sb, err := resolveSandboxRunner(ctx, s.app, s.execEnv, sbCfg) if err != nil { return nil, fmt.Errorf("sandbox_exec step %q: failed to create sandbox: %w", s.name, err) } diff --git a/module/sandbox_remote_runners.go b/module/sandbox_remote_runners.go new file mode 100644 index 000000000..618174177 --- /dev/null +++ b/module/sandbox_remote_runners.go @@ -0,0 +1,279 @@ +package module + +import ( + "context" + "crypto/tls" + "crypto/x509" + "fmt" + "os" + + "github.com/GoCodeAlone/modular" + "github.com/GoCodeAlone/workflow/sandbox/remote" +) + +// SandboxRemoteRunnerServiceName is the service name under which the +// RemoteRunnerRegistry is registered in the modular service registry. +const SandboxRemoteRunnerServiceName = "sandbox.remote_runners" + +// reservedRunnerNames are exec_env values the engine handles itself — a remote +// runner registered under one of these names would be silently unreachable +// (resolveSandboxRunner short-circuits them before consulting the registry). +var reservedRunnerNames = map[string]bool{ + "": true, + "local-docker": true, + "ephemeral": true, +} + +// RemoteRunnerSpec holds the config for a single named remote sandbox runner. +// It maps directly to one element of the sandbox.remote_runners[*] config block. +type RemoteRunnerSpec struct { + // Name is the runner identifier — matched against exec_env in step.sandbox_exec. + Name string `yaml:"name"` + // Address is the gRPC target of the remote sandbox agent (host:port). + Address string `yaml:"address"` + // Token is the bearer token sent on every RPC. May be a secret:// reference, + // which is resolved at runner-build time via the module's secrets provider. + Token string `yaml:"token"` + // TLS holds optional mTLS settings for the connection to the agent. + TLS *RemoteRunnerTLSSpec `yaml:"tls,omitempty"` + // AllowInsecure permits a non-empty Token to be sent over a non-TLS + // connection. Explicit opt-in for local/dev only; production must use TLS. + AllowInsecure bool `yaml:"allow_insecure"` +} + +// RemoteRunnerTLSSpec carries the per-runner TLS certificate paths. +type RemoteRunnerTLSSpec struct { + // Cert is the path to the client certificate file (PEM). + Cert string `yaml:"cert"` + // Key is the path to the client private key file (PEM). + Key string `yaml:"key"` + // CA is the path to the CA certificate used to verify the server (PEM). + CA string `yaml:"ca"` +} + +// hasTLS reports whether the spec configures any TLS material. +func (s RemoteRunnerSpec) hasTLS() bool { + return s.TLS != nil && (s.TLS.Cert != "" || s.TLS.Key != "" || s.TLS.CA != "") +} + +// RemoteRunnerRegistry exposes named RemoteRunnerSpecs to other modules. +// It is registered as a service under SandboxRemoteRunnerServiceName and +// consumed by resolveSandboxRunner in execenv_factory.go. +type RemoteRunnerRegistry struct { + runners map[string]RemoteRunnerSpec + // secretsProvider is the name of the secrets module used to resolve any + // secret:// references in runner tokens. Empty means no provider configured. + secretsProvider string +} + +// Get returns the spec for the named runner, or (zero, false) if not found. +func (r *RemoteRunnerRegistry) Get(name string) (RemoteRunnerSpec, bool) { + if r == nil { + return RemoteRunnerSpec{}, false + } + spec, ok := r.runners[name] + return spec, ok +} + +// SecretsProvider returns the configured secrets provider name (may be empty). +func (r *RemoteRunnerRegistry) SecretsProvider() string { + if r == nil { + return "" + } + return r.secretsProvider +} + +// SandboxRemoteRunnersModule is a modular.Module that parses the +// sandbox.remote_runners config section and registers a RemoteRunnerRegistry +// as a service. Other modules (e.g. execenv_factory) retrieve the registry +// via app.GetService(SandboxRemoteRunnerServiceName, ...). +// +// Config structure (YAML excerpt): +// +// modules: +// - name: my-runners +// type: sandbox.remote_runners +// secrets_provider: my-vault # optional; resolves secret:// tokens +// remote_runners: +// - name: prod-runner +// address: "agent.example.com:50051" +// token: "secret://runner/token" +// tls: +// cert: "/etc/certs/client.crt" +// key: "/etc/certs/client.key" +// ca: "/etc/certs/ca.crt" +type SandboxRemoteRunnersModule struct { + name string + secretsProvider string + specs []RemoteRunnerSpec + registry *RemoteRunnerRegistry +} + +// NewSandboxRemoteRunnersModule creates a new module from the provided name and +// config map. Config fields: +// - remote_runners: []map with name/address/token/tls/allow_insecure fields. +// - secrets_provider: optional name of a secrets module for secret:// token refs. +func NewSandboxRemoteRunnersModule(name string, cfg map[string]any) *SandboxRemoteRunnersModule { + m := &SandboxRemoteRunnersModule{name: name} + + if sp, ok := cfg["secrets_provider"].(string); ok { + m.secretsProvider = sp + } + + runnersRaw, ok := cfg["remote_runners"] + if !ok { + return m + } + + switch v := runnersRaw.(type) { + case []any: + for _, item := range v { + itemMap, ok := item.(map[string]any) + if !ok { + continue + } + m.specs = append(m.specs, parseRemoteRunnerSpec(itemMap)) + } + case []map[string]any: + for _, itemMap := range v { + m.specs = append(m.specs, parseRemoteRunnerSpec(itemMap)) + } + } + return m +} + +func parseRemoteRunnerSpec(m map[string]any) RemoteRunnerSpec { + spec := RemoteRunnerSpec{} + if n, ok := m["name"].(string); ok { + spec.Name = n + } + if a, ok := m["address"].(string); ok { + spec.Address = a + } + if t, ok := m["token"].(string); ok { + spec.Token = t + } + if ai, ok := m["allow_insecure"].(bool); ok { + spec.AllowInsecure = ai + } + if tlsRaw, ok := m["tls"].(map[string]any); ok { + spec.TLS = &RemoteRunnerTLSSpec{} + if c, ok := tlsRaw["cert"].(string); ok { + spec.TLS.Cert = c + } + if k, ok := tlsRaw["key"].(string); ok { + spec.TLS.Key = k + } + if ca, ok := tlsRaw["ca"].(string); ok { + spec.TLS.CA = ca + } + } + return spec +} + +// Name satisfies modular.Module. +func (m *SandboxRemoteRunnersModule) Name() string { return m.name } + +// Init validates the parsed specs and builds the registry. It rejects: +// - a runner whose name is empty or a reserved value (would be unreachable), +// - duplicate runner names (a second silently overwriting the first), +// - a missing address, +// - a no-auth-no-TLS runner unless it opts in via allow_insecure. +func (m *SandboxRemoteRunnersModule) Init(_ modular.Application) error { + runners := make(map[string]RemoteRunnerSpec, len(m.specs)) + for _, spec := range m.specs { + if reservedRunnerNames[spec.Name] { + return fmt.Errorf("sandbox.remote_runners: runner name %q is empty or reserved (reserved: local-docker, ephemeral); choose a distinct name", spec.Name) + } + if _, dup := runners[spec.Name]; dup { + return fmt.Errorf("sandbox.remote_runners: duplicate runner name %q", spec.Name) + } + if spec.Address == "" { + return fmt.Errorf("sandbox.remote_runners: runner %q: 'address' is required", spec.Name) + } + // A runner with no bearer token AND no TLS is unauthenticated and + // unencrypted — refuse unless the operator explicitly opts in. + if spec.Token == "" && !spec.hasTLS() && !spec.AllowInsecure { + return fmt.Errorf("sandbox.remote_runners: runner %q has neither a token nor TLS; set token, set tls, or set allow_insecure: true for local/dev only", spec.Name) + } + runners[spec.Name] = spec + } + m.registry = &RemoteRunnerRegistry{ + runners: runners, + secretsProvider: m.secretsProvider, + } + return nil +} + +// ProvidesServices exposes the RemoteRunnerRegistry under SandboxRemoteRunnerServiceName. +func (m *SandboxRemoteRunnersModule) ProvidesServices() []modular.ServiceProvider { + return []modular.ServiceProvider{ + { + Name: SandboxRemoteRunnerServiceName, + Description: "Registry of named remote sandbox runner connections", + Instance: m.registry, + }, + } +} + +// RequiresServices returns nil — this module has no service dependencies. +func (m *SandboxRemoteRunnersModule) RequiresServices() []modular.ServiceDependency { + return nil +} + +// Start is a no-op. +func (m *SandboxRemoteRunnersModule) Start(_ context.Context) error { return nil } + +// Stop is a no-op. +func (m *SandboxRemoteRunnersModule) Stop(_ context.Context) error { return nil } + +// buildTLSConfig constructs a *tls.Config for mTLS from PEM file paths. +// certFile and keyFile are the client certificate and key (may be empty for server-only TLS). +// caFile is the CA certificate for verifying the server (required when non-empty). +func buildTLSConfig(certFile, keyFile, caFile string) (*tls.Config, error) { + tlsCfg := &tls.Config{MinVersion: tls.VersionTLS13} + + if certFile != "" && keyFile != "" { + cert, err := tls.LoadX509KeyPair(certFile, keyFile) + if err != nil { + return nil, fmt.Errorf("load client cert/key: %w", err) + } + tlsCfg.Certificates = []tls.Certificate{cert} + } + + if caFile != "" { + caPEM, err := os.ReadFile(caFile) //nolint:gosec // G304: operator-configured path + if err != nil { + return nil, fmt.Errorf("read CA cert %q: %w", caFile, err) + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(caPEM) { + return nil, fmt.Errorf("failed to parse CA cert from %q", caFile) + } + tlsCfg.RootCAs = pool + } + + return tlsCfg, nil +} + +// buildRemoteRunnerFromSpec constructs a remote.RemoteRunner from a RemoteRunnerSpec, +// merging in the per-exec SandboxConfig (profile, image, env, workdir). +// +// resolvedToken is the bearer token AFTER any secret:// reference has been resolved +// (the caller resolves it via the module's secrets provider). It MUST already be a +// literal token value — never a secret:// reference. +func buildRemoteRunnerFromSpec(spec RemoteRunnerSpec, resolvedToken string, cfg remote.RemoteRunnerConfig) (*remote.RemoteRunner, error) { + cfg.Address = spec.Address + cfg.Token = resolvedToken + cfg.AllowInsecure = spec.AllowInsecure + + if spec.hasTLS() { + tlsCfg, err := buildTLSConfig(spec.TLS.Cert, spec.TLS.Key, spec.TLS.CA) + if err != nil { + return nil, fmt.Errorf("sandbox remote runner %q: build TLS config: %w", spec.Name, err) + } + cfg.TLS = tlsCfg + } + + return remote.NewRemoteRunner(cfg) +} diff --git a/module/sandbox_remote_runners_test.go b/module/sandbox_remote_runners_test.go new file mode 100644 index 000000000..2b14b88ce --- /dev/null +++ b/module/sandbox_remote_runners_test.go @@ -0,0 +1,252 @@ +package module + +import ( + "context" + "testing" +) + +func TestNewSandboxRemoteRunnersModule_Empty(t *testing.T) { + m := NewSandboxRemoteRunnersModule("runners", map[string]any{}) + if err := m.Init(nil); err != nil { + t.Fatalf("Init: %v", err) + } + if m.registry == nil { + t.Fatal("expected non-nil registry") + } + if _, ok := m.registry.Get("anything"); ok { + t.Error("expected empty registry to return false for any lookup") + } +} + +func TestNewSandboxRemoteRunnersModule_ParsesRunners(t *testing.T) { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{ + "name": "prod-runner", + "address": "agent.prod.example.com:50051", + "token": "secret://runner/prod-token", + }, + map[string]any{ + "name": "staging-runner", + "address": "agent.staging.example.com:50051", + "token": "staging-literal-token", + }, + }, + } + m := NewSandboxRemoteRunnersModule("my-runners", cfg) + if err := m.Init(nil); err != nil { + t.Fatalf("Init: %v", err) + } + + spec, ok := m.registry.Get("prod-runner") + if !ok { + t.Fatal("expected prod-runner to be found") + } + if spec.Address != "agent.prod.example.com:50051" { + t.Errorf("Address: got %q, want %q", spec.Address, "agent.prod.example.com:50051") + } + if spec.Token != "secret://runner/prod-token" { + t.Errorf("Token: got %q, want %q", spec.Token, "secret://runner/prod-token") + } + + spec2, ok := m.registry.Get("staging-runner") + if !ok { + t.Fatal("expected staging-runner to be found") + } + if spec2.Address != "agent.staging.example.com:50051" { + t.Errorf("Address: got %q, want %q", spec2.Address, "agent.staging.example.com:50051") + } + + // Non-existent name. + if _, ok := m.registry.Get("missing"); ok { + t.Error("expected missing name to return false") + } +} + +func TestNewSandboxRemoteRunnersModule_MissingAddress_Error(t *testing.T) { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{ + "name": "no-address", + "token": "tok", + // address intentionally absent + }, + }, + } + m := NewSandboxRemoteRunnersModule("bad", cfg) + if err := m.Init(nil); err == nil { + t.Error("expected error for runner with missing address, got nil") + } +} + +// TestNewSandboxRemoteRunnersModule_ReservedName_Error verifies that a runner +// named with a reserved exec_env value (empty/local-docker/ephemeral) is rejected +// at Init time — otherwise it would be silently unreachable. +func TestNewSandboxRemoteRunnersModule_ReservedName_Error(t *testing.T) { + for _, name := range []string{"", "local-docker", "ephemeral"} { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{ + "name": name, + "address": "agent.example.com:50051", + "token": "tok", + }, + }, + } + m := NewSandboxRemoteRunnersModule("rn", cfg) + if err := m.Init(nil); err == nil { + t.Errorf("reserved name %q: expected Init error, got nil", name) + } + } +} + +// TestNewSandboxRemoteRunnersModule_DuplicateName_Error verifies a duplicate +// runner name is rejected rather than silently overwriting the first. +func TestNewSandboxRemoteRunnersModule_DuplicateName_Error(t *testing.T) { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{"name": "dup", "address": "a:1", "token": "t1"}, + map[string]any{"name": "dup", "address": "b:2", "token": "t2"}, + }, + } + m := NewSandboxRemoteRunnersModule("rn", cfg) + if err := m.Init(nil); err == nil { + t.Error("expected error for duplicate runner name, got nil") + } +} + +// TestNewSandboxRemoteRunnersModule_NoAuthNoTLS_Error verifies a runner with +// neither a token nor TLS is rejected unless allow_insecure is set. +func TestNewSandboxRemoteRunnersModule_NoAuthNoTLS_Error(t *testing.T) { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{ + "name": "naked", + "address": "agent.example.com:50051", + // no token, no tls, no allow_insecure + }, + }, + } + m := NewSandboxRemoteRunnersModule("rn", cfg) + if err := m.Init(nil); err == nil { + t.Error("expected error for no-auth-no-tls runner, got nil") + } +} + +// TestNewSandboxRemoteRunnersModule_NoAuthNoTLS_AllowInsecure_OK verifies the +// explicit allow_insecure opt-in permits a no-auth-no-tls runner. +func TestNewSandboxRemoteRunnersModule_NoAuthNoTLS_AllowInsecure_OK(t *testing.T) { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{ + "name": "dev-runner", + "address": "localhost:50051", + "allow_insecure": true, + }, + }, + } + m := NewSandboxRemoteRunnersModule("rn", cfg) + if err := m.Init(nil); err != nil { + t.Fatalf("allow_insecure should permit no-auth-no-tls: %v", err) + } + spec, ok := m.registry.Get("dev-runner") + if !ok { + t.Fatal("expected dev-runner registered") + } + if !spec.AllowInsecure { + t.Error("expected AllowInsecure=true") + } +} + +func TestSandboxRemoteRunnersModule_ProvidesServices(t *testing.T) { + m := NewSandboxRemoteRunnersModule("runners", map[string]any{}) + _ = m.Init(nil) + svcProviders := m.ProvidesServices() + if len(svcProviders) == 0 { + t.Fatal("expected at least one service provider") + } + found := false + for _, sp := range svcProviders { + if sp.Name == SandboxRemoteRunnerServiceName { + found = true + } + } + if !found { + t.Errorf("expected service %q in ProvidesServices, got %v", SandboxRemoteRunnerServiceName, svcProviders) + } +} + +func TestSandboxRemoteRunnersModule_StartStop(t *testing.T) { + m := NewSandboxRemoteRunnersModule("runners", map[string]any{}) + _ = m.Init(nil) + if err := m.Start(context.Background()); err != nil { + t.Errorf("Start: %v", err) + } + if err := m.Stop(context.Background()); err != nil { + t.Errorf("Stop: %v", err) + } +} + +func TestNewSandboxRemoteRunnersModule_ParsesTLS(t *testing.T) { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{ + "name": "secure-runner", + "address": "agent.secure.example.com:50051", + "tls": map[string]any{ + "cert": "/etc/certs/client.crt", + "key": "/etc/certs/client.key", + "ca": "/etc/certs/ca.crt", + }, + }, + }, + } + m := NewSandboxRemoteRunnersModule("tls-runners", cfg) + if err := m.Init(nil); err != nil { + t.Fatalf("Init: %v", err) + } + spec, ok := m.registry.Get("secure-runner") + if !ok { + t.Fatal("expected secure-runner") + } + if spec.TLS == nil { + t.Fatal("expected non-nil TLS spec") + } + if spec.TLS.Cert != "/etc/certs/client.crt" { + t.Errorf("TLS.Cert: got %q", spec.TLS.Cert) + } + if spec.TLS.Key != "/etc/certs/client.key" { + t.Errorf("TLS.Key: got %q", spec.TLS.Key) + } + if spec.TLS.CA != "/etc/certs/ca.crt" { + t.Errorf("TLS.CA: got %q", spec.TLS.CA) + } +} + +// TestNewSandboxRemoteRunnersModule_ParsesSecretsProvider verifies the +// secrets_provider config key is captured and exposed on the registry. +func TestNewSandboxRemoteRunnersModule_ParsesSecretsProvider(t *testing.T) { + cfg := map[string]any{ + "secrets_provider": "my-vault", + "remote_runners": []any{ + map[string]any{"name": "r1", "address": "a:1", "token": "secret://x"}, + }, + } + m := NewSandboxRemoteRunnersModule("rn", cfg) + if err := m.Init(nil); err != nil { + t.Fatalf("Init: %v", err) + } + if got := m.registry.SecretsProvider(); got != "my-vault" { + t.Errorf("SecretsProvider: got %q, want %q", got, "my-vault") + } +} + +func TestRemoteRunnerRegistry_NilSafe(t *testing.T) { + var r *RemoteRunnerRegistry + if _, ok := r.Get("anything"); ok { + t.Error("nil registry Get should return false") + } + if r.SecretsProvider() != "" { + t.Error("nil registry SecretsProvider should return empty") + } +} diff --git a/plugins/pipelinesteps/plugin.go b/plugins/pipelinesteps/plugin.go index f28782fb2..3546d54f2 100644 --- a/plugins/pipelinesteps/plugin.go +++ b/plugins/pipelinesteps/plugin.go @@ -51,6 +51,7 @@ func New() *Plugin { Author: "GoCodeAlone", Description: "Generic pipeline step types, pre-processing validators, and pipeline workflow handler (including base64_decode)", Tier: plugin.TierCore, + ModuleTypes: []string{"sandbox.remote_runners"}, StepTypes: []string{ "step.validate", "step.transform", @@ -127,6 +128,15 @@ func (p *Plugin) Capabilities() []capability.Contract { } } +// ModuleFactories returns module factories for the pipelinesteps plugin module types. +func (p *Plugin) ModuleFactories() map[string]plugin.ModuleFactory { + return map[string]plugin.ModuleFactory{ + "sandbox.remote_runners": func(name string, cfg map[string]any) modular.Module { + return module.NewSandboxRemoteRunnersModule(name, cfg) + }, + } +} + // StepFactories returns the step factories provided by this plugin. func (p *Plugin) StepFactories() map[string]plugin.StepFactory { return map[string]plugin.StepFactory{ diff --git a/sandbox/docker.go b/sandbox/docker.go index 664b283ed..89a2652d8 100644 --- a/sandbox/docker.go +++ b/sandbox/docker.go @@ -26,6 +26,11 @@ type Mount struct { // SandboxConfig holds configuration for a Docker sandbox execution environment. type SandboxConfig struct { + // Profile is the security profile name that produced this config ("strict", + // "standard", "permissive"). It is informational — it does not affect local + // Docker execution but is forwarded to remote runners so they can apply their + // own profile clamping (ADR 0019). + Profile string `yaml:"profile,omitempty"` Image string `yaml:"image"` WorkDir string `yaml:"work_dir"` Env map[string]string `yaml:"env"` @@ -46,6 +51,15 @@ type SandboxConfig struct { Tmpfs map[string]string `yaml:"tmpfs"` // e.g., {"/tmp": "size=64m,noexec"} } +// GetProfile returns the profile name stored in the config, falling back to +// "strict" (the safest default) when the field is empty. +func (c SandboxConfig) GetProfile() string { + if c.Profile != "" { + return c.Profile + } + return "strict" +} + // DefaultSecureSandboxConfig returns a hardened SandboxConfig suitable for // running untrusted workloads. It uses a minimal Wolfi-based image, drops all // Linux capabilities, enables a read-only root filesystem, mounts /tmp as diff --git a/sandbox/profile.go b/sandbox/profile.go index 9f6a513a0..7e8dcbf79 100644 --- a/sandbox/profile.go +++ b/sandbox/profile.go @@ -17,11 +17,13 @@ func BuildSandboxConfig(profile, image string) SandboxConfig { switch profile { case "permissive": return SandboxConfig{ + Profile: "permissive", Image: image, NetworkMode: "bridge", } case "standard": return SandboxConfig{ + Profile: "standard", Image: image, MemoryLimit: 256 * 1024 * 1024, CPULimit: 0.5, @@ -33,6 +35,8 @@ func BuildSandboxConfig(profile, image string) SandboxConfig { Timeout: 5 * time.Minute, } default: // "strict" and any unknown value - return DefaultSecureSandboxConfig(image) + cfg := DefaultSecureSandboxConfig(image) + cfg.Profile = "strict" + return cfg } } diff --git a/schema/module_schema.go b/schema/module_schema.go index a446c7503..282a294b8 100644 --- a/schema/module_schema.go +++ b/schema/module_schema.go @@ -2298,6 +2298,22 @@ func (r *ModuleSchemaRegistry) registerBuiltins() { }, }) + // ---- Sandbox Remote Runners ---- + + r.Register(&ModuleSchema{ + Type: "sandbox.remote_runners", + Label: "Sandbox Remote Runners", + Category: "infrastructure", + Description: "Registers named remote sandbox agents for use by step.sandbox_exec via exec_env", + // Provider-only module: it exposes a service (Outputs) but consumes no + // upstream service ports (no Inputs) — mirrors secrets.vault / iac.state. + Outputs: []ServiceIODef{{Name: "registry", Type: "RemoteRunnerRegistry", Description: "Named remote runner lookup registry consumed by step.sandbox_exec"}}, + ConfigFields: []ConfigFieldDef{ + {Key: "remote_runners", Label: "Remote Runners", Type: FieldTypeArray, Description: "List of remote runner specs: name, address, token, tls.{cert,key,ca}, allow_insecure"}, + {Key: "secrets_provider", Label: "Secrets Provider", Type: FieldTypeString, Description: "Name of a secrets module used to resolve secret:// token references"}, + }, + }) + // ---- Sandbox Exec ---- r.Register(&ModuleSchema{ diff --git a/schema/schema.go b/schema/schema.go index 89867a221..50b52622b 100644 --- a/schema/schema.go +++ b/schema/schema.go @@ -214,6 +214,7 @@ var coreModuleTypes = []string{ "policy.mock", "processing.step", "reverseproxy", + "sandbox.remote_runners", "scheduler.modular", "secrets.aws", "secrets.vault", diff --git a/schema/testdata/editor-schemas.golden.json b/schema/testdata/editor-schemas.golden.json index b2d46bfde..d3d5d9be8 100644 --- a/schema/testdata/editor-schemas.golden.json +++ b/schema/testdata/editor-schemas.golden.json @@ -3099,6 +3099,33 @@ ], "configFields": [] }, + "sandbox.remote_runners": { + "type": "sandbox.remote_runners", + "label": "Sandbox Remote Runners", + "category": "infrastructure", + "description": "Registers named remote sandbox agents for use by step.sandbox_exec via exec_env", + "outputs": [ + { + "name": "registry", + "type": "RemoteRunnerRegistry", + "description": "Named remote runner lookup registry consumed by step.sandbox_exec" + } + ], + "configFields": [ + { + "key": "remote_runners", + "label": "Remote Runners", + "type": "array", + "description": "List of remote runner specs: name, address, token, tls.{cert,key,ca}, allow_insecure" + }, + { + "key": "secrets_provider", + "label": "Secrets Provider", + "type": "string", + "description": "Name of a secrets module used to resolve secret:// token references" + } + ] + }, "scheduler.modular": { "type": "scheduler.modular", "label": "Scheduler", From ce0547c12d733c0fac037c542a4a1ff32b71d644 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Tue, 2 Jun 2026 23:47:03 -0400 Subject: [PATCH 2/2] fix(sandbox): harden sandbox-runner auth/TLS surface (Copilot review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forward fix for 6 hardening issues on the PR8 security surface: 1. main.go buildServerOptions: refuse --tls-ca without --tls-cert AND --tls-key. Previously a CA-only invocation started with INSECURE transport while checkAuthRequirement treated --tls-ca as "auth configured" → no TLS + no token. mTLS needs the server's own certificate; fail-fast otherwise. 2. module Init: a runner with a token but no TLS (and no allow_insecure) now fails at Init (cleartext-token leak) instead of late in NewRemoteRunner at first Execute. Also validates tls.cert/tls.key both-or-neither at Init. 3. buildTLSConfig: error when exactly one of cert/key is set (both-or-neither) rather than silently dropping client authentication. 4. doc: AllowInsecure comment now reflects that it also permits no-token+no-TLS runners (not just token-over-no-TLS). 5. server.go bearer compare: compare fixed-length SHA-256 digests via subtle.ConstantTimeCompare. Raw ConstantTimeCompare is only constant-time for equal-length inputs, leaking the expected token length on a mismatch. 6. server.go Exec: validate empty command / missing image up front and return codes.InvalidArgument (caller error) instead of letting the Docker runner surface them as codes.Internal. Tests: buildServerOptions CA-without-cert/key + cert/key-mismatch + no-TLS; module token-no-TLS (+allow_insecure OK), tls cert/key both-or-neither, CA-only OK; buildTLSConfig mismatch + neither; wrong-length token Unauthenticated; empty-command + missing-image InvalidArgument. Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/workflow-sandbox-runner/main.go | 7 + cmd/workflow-sandbox-runner/main_test.go | 37 +++++ cmd/workflow-sandbox-runner/server.go | 23 +++- cmd/workflow-sandbox-runner/server_test.go | 70 ++++++++++ module/sandbox_remote_runners.go | 26 +++- module/sandbox_remote_runners_test.go | 153 +++++++++++++++++++-- 6 files changed, 300 insertions(+), 16 deletions(-) diff --git a/cmd/workflow-sandbox-runner/main.go b/cmd/workflow-sandbox-runner/main.go index 1ee7c2d66..0a79f2c05 100644 --- a/cmd/workflow-sandbox-runner/main.go +++ b/cmd/workflow-sandbox-runner/main.go @@ -131,6 +131,13 @@ func checkAuthRequirement(token, caFile string, allowUnauth bool) error { // If certFile and keyFile are empty, the server starts without TLS (test/dev only). // If caFile is also set, mTLS client authentication is enabled. func buildServerOptions(certFile, keyFile, caFile string) ([]grpc.ServerOption, error) { + // mTLS requires the server to present its own certificate; a CA alone only + // configures client-cert verification. Refuse a --tls-ca that lacks the + // server cert/key, otherwise the agent would start INSECURE while the + // operator believes mTLS is on (checkAuthRequirement treats --tls-ca as auth). + if caFile != "" && (certFile == "" || keyFile == "") { + return nil, fmt.Errorf("--tls-ca requires both --tls-cert and --tls-key (mTLS needs the server's own certificate)") + } if certFile == "" && keyFile == "" { // No TLS — insecure mode for local development / testing. return []grpc.ServerOption{grpc.Creds(insecure.NewCredentials())}, nil diff --git a/cmd/workflow-sandbox-runner/main_test.go b/cmd/workflow-sandbox-runner/main_test.go index 7f240ba48..d8e625f64 100644 --- a/cmd/workflow-sandbox-runner/main_test.go +++ b/cmd/workflow-sandbox-runner/main_test.go @@ -33,3 +33,40 @@ func TestCheckAuthRequirement(t *testing.T) { }) } } + +// TestBuildServerOptions_CAWithoutCertKey_Error verifies the fail-fast guard: +// --tls-ca set without --tls-cert/--tls-key must error rather than silently +// starting with insecure transport (which checkAuthRequirement believes is mTLS). +func TestBuildServerOptions_CAWithoutCertKey_Error(t *testing.T) { + if _, err := buildServerOptions("", "", "/etc/ca.crt"); err == nil { + t.Error("--tls-ca without cert/key must error, got nil") + } + if _, err := buildServerOptions("/etc/server.crt", "", "/etc/ca.crt"); err == nil { + t.Error("--tls-ca with cert but no key must error, got nil") + } + if _, err := buildServerOptions("", "/etc/server.key", "/etc/ca.crt"); err == nil { + t.Error("--tls-ca with key but no cert must error, got nil") + } +} + +// TestBuildServerOptions_NoTLS_Insecure verifies the no-TLS path still returns +// insecure credentials (used with --allow-unauthenticated or a bearer token). +func TestBuildServerOptions_NoTLS_Insecure(t *testing.T) { + opts, err := buildServerOptions("", "", "") + if err != nil { + t.Fatalf("buildServerOptions(no TLS): %v", err) + } + if len(opts) == 0 { + t.Error("expected at least one server option for insecure mode") + } +} + +// TestBuildServerOptions_CertWithoutKey_Error verifies cert/key both-or-neither. +func TestBuildServerOptions_CertWithoutKey_Error(t *testing.T) { + if _, err := buildServerOptions("/etc/server.crt", "", ""); err == nil { + t.Error("cert without key must error, got nil") + } + if _, err := buildServerOptions("", "/etc/server.key", ""); err == nil { + t.Error("key without cert must error, got nil") + } +} diff --git a/cmd/workflow-sandbox-runner/server.go b/cmd/workflow-sandbox-runner/server.go index 7fcc569a6..39f9f17f3 100644 --- a/cmd/workflow-sandbox-runner/server.go +++ b/cmd/workflow-sandbox-runner/server.go @@ -2,6 +2,7 @@ package main import ( "context" + "crypto/sha256" "crypto/subtle" "fmt" "log/slog" @@ -74,6 +75,16 @@ func newSandboxExecServerWithFactory(provider secrets.Provider, log *slog.Logger func (s *sandboxExecServer) Exec(req *pb.SandboxExecRequest, stream grpc.ServerStreamingServer[pb.SandboxExecChunk]) error { ctx := stream.Context() + // 0. Up-front request validation. These are caller errors, not server + // failures — surface them as InvalidArgument rather than letting the Docker + // runner reject them as a generic Internal error later. + if len(req.GetCommand()) == 0 { + return status.Error(codes.InvalidArgument, "command must not be empty") + } + if req.GetImage() == "" { + return status.Error(codes.InvalidArgument, "image is required") + } + // 1. Profile clamping. clampedProfile := clampProfile(req.GetProfile()) if clampedProfile != req.GetProfile() { @@ -178,9 +189,15 @@ func newBearerStreamInterceptor(token string) grpc.StreamServerInterceptor { if !strings.HasPrefix(authHeader, prefix) { return status.Error(codes.Unauthenticated, "authorization header must use Bearer scheme") } - // Constant-time comparison to avoid leaking the token via timing. - got := authHeader[len(prefix):] - if subtle.ConstantTimeCompare([]byte(got), []byte(token)) != 1 { + // Compare fixed-length SHA-256 digests so the comparison is constant-time + // AND length-independent: subtle.ConstantTimeCompare is only constant-time + // for equal-length inputs, so comparing the raw tokens would leak the + // expected token's length via a different (early-return) code path. Digests + // are always 32 bytes, removing that side channel. + presented := authHeader[len(prefix):] + got := sha256.Sum256([]byte(presented)) + want := sha256.Sum256([]byte(token)) + if subtle.ConstantTimeCompare(got[:], want[:]) != 1 { return status.Error(codes.Unauthenticated, "invalid bearer token") } return handler(srv, ss) diff --git a/cmd/workflow-sandbox-runner/server_test.go b/cmd/workflow-sandbox-runner/server_test.go index c9dd015c2..943d8e3b4 100644 --- a/cmd/workflow-sandbox-runner/server_test.go +++ b/cmd/workflow-sandbox-runner/server_test.go @@ -449,3 +449,73 @@ func TestExec_NonZeroExitCode(t *testing.T) { t.Errorf("exit_code: got %d, want 7", exitCode) } } + +// expectStreamCode opens the stream's first Recv (or the call error) and asserts +// the returned gRPC status code. Used by error-path tests. +func expectStreamCode(t *testing.T, callErr error, stream pb.SandboxExecService_ExecClient, want codes.Code) { + t.Helper() + if callErr != nil { + st, _ := status.FromError(callErr) + if st.Code() != want { + t.Errorf("expected %v, got %v: %v", want, st.Code(), callErr) + } + return + } + _, recvErr := stream.Recv() + if recvErr == nil || recvErr == io.EOF { + t.Fatalf("expected %v error, got: %v", want, recvErr) + } + st, _ := status.FromError(recvErr) + if st.Code() != want { + t.Errorf("expected %v, got %v: %v", want, st.Code(), recvErr) + } +} + +// TestAuth_WrongLengthToken_Unauthenticated verifies a token of a DIFFERENT +// LENGTH than the configured token is rejected. This guards the length-independent +// (SHA-256 digest) comparison path: a raw ConstantTimeCompare would early-return on +// length mismatch and leak the expected token's length via timing. +func TestAuth_WrongLengthToken_Unauthenticated(t *testing.T) { + const token = "secret-token-abc" // 16 bytes + client, cleanup := buildTestServer(t, &fakeProvider{}, token, func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + return &fakeRunner{}, nil + }) + defer cleanup() + + // A much shorter token (different length) must still be rejected as Unauthenticated. + ctx := metadata.NewOutgoingContext(context.Background(), metadata.Pairs("authorization", "Bearer x")) + stream, err := client.Exec(ctx, &pb.SandboxExecRequest{ + Profile: "standard", Image: "alpine", Command: []string{"true"}, + }) + expectStreamCode(t, err, stream, codes.Unauthenticated) +} + +// TestExec_EmptyCommand_InvalidArgument verifies an empty command is a caller +// error (InvalidArgument), not a server failure (Internal). +func TestExec_EmptyCommand_InvalidArgument(t *testing.T) { + client, cleanup := buildTestServer(t, &fakeProvider{}, "", func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + t.Error("runner factory must not be called for an empty command") + return &fakeRunner{}, nil + }) + defer cleanup() + + stream, err := client.Exec(context.Background(), &pb.SandboxExecRequest{ + Profile: "standard", Image: "alpine", Command: nil, // empty + }) + expectStreamCode(t, err, stream, codes.InvalidArgument) +} + +// TestExec_MissingImage_InvalidArgument verifies a missing image is a caller +// error (InvalidArgument), not a server failure (Internal). +func TestExec_MissingImage_InvalidArgument(t *testing.T) { + client, cleanup := buildTestServer(t, &fakeProvider{}, "", func(cfg sandbox.SandboxConfig) (sandbox.SandboxRunner, error) { + t.Error("runner factory must not be called for a missing image") + return &fakeRunner{}, nil + }) + defer cleanup() + + stream, err := client.Exec(context.Background(), &pb.SandboxExecRequest{ + Profile: "standard", Image: "", Command: []string{"echo", "hi"}, + }) + expectStreamCode(t, err, stream, codes.InvalidArgument) +} diff --git a/module/sandbox_remote_runners.go b/module/sandbox_remote_runners.go index 618174177..1a20d70c7 100644 --- a/module/sandbox_remote_runners.go +++ b/module/sandbox_remote_runners.go @@ -36,8 +36,11 @@ type RemoteRunnerSpec struct { Token string `yaml:"token"` // TLS holds optional mTLS settings for the connection to the agent. TLS *RemoteRunnerTLSSpec `yaml:"tls,omitempty"` - // AllowInsecure permits a non-empty Token to be sent over a non-TLS - // connection. Explicit opt-in for local/dev only; production must use TLS. + // AllowInsecure is an explicit local/dev-only opt-in that relaxes two + // otherwise-rejected insecure configurations: + // - sending a non-empty Token over a non-TLS connection, and + // - registering a runner with NEITHER a token NOR TLS. + // Production must use TLS (and/or a token); never set this in production. AllowInsecure bool `yaml:"allow_insecure"` } @@ -196,6 +199,18 @@ func (m *SandboxRemoteRunnersModule) Init(_ modular.Application) error { if spec.Token == "" && !spec.hasTLS() && !spec.AllowInsecure { return fmt.Errorf("sandbox.remote_runners: runner %q has neither a token nor TLS; set token, set tls, or set allow_insecure: true for local/dev only", spec.Name) } + // A runner with a token but no TLS would leak the bearer token in + // cleartext. The client-side NewRemoteRunner enforces this too, but + // catch it here at Init so the misconfiguration fails fast at boot + // rather than at first Execute. + if spec.Token != "" && !spec.hasTLS() && !spec.AllowInsecure { + return fmt.Errorf("sandbox.remote_runners: runner %q sets a token but no TLS; the token would be sent in cleartext — set tls, or set allow_insecure: true for local/dev only", spec.Name) + } + // Client cert/key must be set together — fail fast at boot rather than at + // first Execute (buildTLSConfig also enforces this). + if spec.TLS != nil && (spec.TLS.Cert == "") != (spec.TLS.Key == "") { + return fmt.Errorf("sandbox.remote_runners: runner %q: tls.cert and tls.key must be set together (both-or-neither)", spec.Name) + } runners[spec.Name] = spec } m.registry = &RemoteRunnerRegistry{ @@ -233,6 +248,13 @@ func (m *SandboxRemoteRunnersModule) Stop(_ context.Context) error { return nil func buildTLSConfig(certFile, keyFile, caFile string) (*tls.Config, error) { tlsCfg := &tls.Config{MinVersion: tls.VersionTLS13} + // Client cert and key must be supplied together (both-or-neither). Silently + // ignoring a lone cert/key would drop client authentication the operator + // intended to configure. + if (certFile == "") != (keyFile == "") { + return nil, fmt.Errorf("tls: 'cert' and 'key' must be set together (both-or-neither)") + } + if certFile != "" && keyFile != "" { cert, err := tls.LoadX509KeyPair(certFile, keyFile) if err != nil { diff --git a/module/sandbox_remote_runners_test.go b/module/sandbox_remote_runners_test.go index 2b14b88ce..550367b8f 100644 --- a/module/sandbox_remote_runners_test.go +++ b/module/sandbox_remote_runners_test.go @@ -2,6 +2,7 @@ package module import ( "context" + "strings" "testing" ) @@ -19,17 +20,22 @@ func TestNewSandboxRemoteRunnersModule_Empty(t *testing.T) { } func TestNewSandboxRemoteRunnersModule_ParsesRunners(t *testing.T) { + // Both runners carry a token, so they declare TLS (a token over no-TLS is + // rejected at Init unless allow_insecure). Here we use allow_insecure to keep + // the fixture about parsing rather than TLS file loading. cfg := map[string]any{ "remote_runners": []any{ map[string]any{ - "name": "prod-runner", - "address": "agent.prod.example.com:50051", - "token": "secret://runner/prod-token", + "name": "prod-runner", + "address": "agent.prod.example.com:50051", + "token": "secret://runner/prod-token", + "allow_insecure": true, }, map[string]any{ - "name": "staging-runner", - "address": "agent.staging.example.com:50051", - "token": "staging-literal-token", + "name": "staging-runner", + "address": "agent.staging.example.com:50051", + "token": "staging-literal-token", + "allow_insecure": true, }, }, } @@ -103,15 +109,21 @@ func TestNewSandboxRemoteRunnersModule_ReservedName_Error(t *testing.T) { // TestNewSandboxRemoteRunnersModule_DuplicateName_Error verifies a duplicate // runner name is rejected rather than silently overwriting the first. func TestNewSandboxRemoteRunnersModule_DuplicateName_Error(t *testing.T) { + // Both specs are otherwise valid (allow_insecure permits token-over-no-TLS), + // isolating the duplicate-name failure. cfg := map[string]any{ "remote_runners": []any{ - map[string]any{"name": "dup", "address": "a:1", "token": "t1"}, - map[string]any{"name": "dup", "address": "b:2", "token": "t2"}, + map[string]any{"name": "dup", "address": "a:1", "token": "t1", "allow_insecure": true}, + map[string]any{"name": "dup", "address": "b:2", "token": "t2", "allow_insecure": true}, }, } m := NewSandboxRemoteRunnersModule("rn", cfg) - if err := m.Init(nil); err == nil { - t.Error("expected error for duplicate runner name, got nil") + err := m.Init(nil) + if err == nil { + t.Fatal("expected error for duplicate runner name, got nil") + } + if !strings.Contains(err.Error(), "duplicate") { + t.Errorf("expected duplicate-name error, got: %v", err) } } @@ -229,7 +241,7 @@ func TestNewSandboxRemoteRunnersModule_ParsesSecretsProvider(t *testing.T) { cfg := map[string]any{ "secrets_provider": "my-vault", "remote_runners": []any{ - map[string]any{"name": "r1", "address": "a:1", "token": "secret://x"}, + map[string]any{"name": "r1", "address": "a:1", "token": "secret://x", "allow_insecure": true}, }, } m := NewSandboxRemoteRunnersModule("rn", cfg) @@ -250,3 +262,122 @@ func TestRemoteRunnerRegistry_NilSafe(t *testing.T) { t.Error("nil registry SecretsProvider should return empty") } } + +// TestNewSandboxRemoteRunnersModule_TokenNoTLS_Error verifies a runner that sets +// a token but no TLS is rejected at Init (token would travel in cleartext) +// unless allow_insecure is set. +func TestNewSandboxRemoteRunnersModule_TokenNoTLS_Error(t *testing.T) { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{ + "name": "tok-no-tls", + "address": "agent.example.com:50051", + "token": "some-token", + // no tls, no allow_insecure + }, + }, + } + m := NewSandboxRemoteRunnersModule("rn", cfg) + err := m.Init(nil) + if err == nil { + t.Fatal("expected error for token-without-TLS runner, got nil") + } + if !strings.Contains(err.Error(), "cleartext") { + t.Errorf("expected cleartext-token error, got: %v", err) + } +} + +// TestNewSandboxRemoteRunnersModule_TokenNoTLS_AllowInsecure_OK verifies the +// allow_insecure opt-in permits a token-over-no-TLS runner. +func TestNewSandboxRemoteRunnersModule_TokenNoTLS_AllowInsecure_OK(t *testing.T) { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{ + "name": "tok-insecure", + "address": "localhost:50051", + "token": "some-token", + "allow_insecure": true, + }, + }, + } + m := NewSandboxRemoteRunnersModule("rn", cfg) + if err := m.Init(nil); err != nil { + t.Fatalf("allow_insecure should permit token-over-no-TLS: %v", err) + } + if _, ok := m.registry.Get("tok-insecure"); !ok { + t.Error("expected tok-insecure registered") + } +} + +// TestNewSandboxRemoteRunnersModule_TLSCertWithoutKey_Error verifies that a TLS +// spec with only one of cert/key is rejected at Init (both-or-neither). +func TestNewSandboxRemoteRunnersModule_TLSCertWithoutKey_Error(t *testing.T) { + for _, tc := range []struct { + name string + tls map[string]any + }{ + {"cert-only", map[string]any{"cert": "/c.pem"}}, + {"key-only", map[string]any{"key": "/k.pem"}}, + } { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{ + "name": "partial-tls", + "address": "agent.example.com:50051", + "tls": tc.tls, + }, + }, + } + m := NewSandboxRemoteRunnersModule("rn", cfg) + if err := m.Init(nil); err == nil { + t.Errorf("%s: expected Init error for partial cert/key, got nil", tc.name) + } + } +} + +// TestNewSandboxRemoteRunnersModule_CAOnly_OK verifies a TLS spec with only a CA +// (server verification, no client cert) is accepted at Init. +func TestNewSandboxRemoteRunnersModule_CAOnly_OK(t *testing.T) { + cfg := map[string]any{ + "remote_runners": []any{ + map[string]any{ + "name": "ca-only", + "address": "agent.example.com:50051", + "tls": map[string]any{"ca": "/ca.pem"}, + }, + }, + } + m := NewSandboxRemoteRunnersModule("rn", cfg) + if err := m.Init(nil); err != nil { + t.Fatalf("CA-only TLS should be accepted at Init: %v", err) + } + if _, ok := m.registry.Get("ca-only"); !ok { + t.Error("expected ca-only registered") + } +} + +// TestBuildTLSConfig_CertKeyMismatch_Error verifies buildTLSConfig rejects a +// lone cert or key (both-or-neither), rather than silently dropping client auth. +func TestBuildTLSConfig_CertKeyMismatch_Error(t *testing.T) { + if _, err := buildTLSConfig("/cert.pem", "", ""); err == nil { + t.Error("expected error for cert without key, got nil") + } + if _, err := buildTLSConfig("", "/key.pem", ""); err == nil { + t.Error("expected error for key without cert, got nil") + } +} + +// TestBuildTLSConfig_NeitherCertNorKey_OK verifies buildTLSConfig succeeds with +// no client cert (e.g. server-verification-only via CA-less config). +func TestBuildTLSConfig_NeitherCertNorKey_OK(t *testing.T) { + cfg, err := buildTLSConfig("", "", "") + if err != nil { + t.Fatalf("buildTLSConfig with no cert/key/ca: %v", err) + } + if cfg == nil { + t.Fatal("expected non-nil tls.Config") + } + if len(cfg.Certificates) != 0 { + t.Error("expected no client certificates") + } +}