diff --git a/cmd/wfctl/infra.go b/cmd/wfctl/infra.go index befa1e33..445ec8ee 100644 --- a/cmd/wfctl/infra.go +++ b/cmd/wfctl/infra.go @@ -90,6 +90,12 @@ func runInfra(args []string) error { return fmt.Errorf("audit-secrets exited with code %d", rc) } return nil + case "audit-keys": + return runInfraAuditKeysCmd(args[1:]) + case "prune": + return runInfraPruneCmd(args[1:]) + case "rotate-and-prune": + return runInfraRotateAndPruneCmd(args[1:]) default: return infraUsage() } @@ -114,6 +120,9 @@ Actions: security-check Scan plan.json for security policy violations cleanup Tag-based force-cleanup across providers (--tag NAME [--fix]) audit-secrets Report provider_credential anti-patterns in secrets.generate + audit-keys List cloud-side resources of --type via the provider's EnumeratorAll + prune Destructively delete cloud resources by --created-before / --exclude-access-key (two-key opt-in) + rotate-and-prune All-in-one: rotate canonical credential, then prune older keys with the new key as exclusion target Options: --config Config file (default: infra.yaml or config/infra.yaml) diff --git a/cmd/wfctl/infra_audit_keys.go b/cmd/wfctl/infra_audit_keys.go new file mode 100644 index 00000000..9af69ac6 --- /dev/null +++ b/cmd/wfctl/infra_audit_keys.go @@ -0,0 +1,148 @@ +package main + +import ( + "context" + "flag" + "fmt" + "io" + "os" + + "github.com/GoCodeAlone/workflow/interfaces" +) + +// auditKeysStdout / auditKeysStderr are seam variables tests override to +// capture the subcommand's structured output without redirecting os.Stdout +// globally. Mirrors the cleanupStdout / cleanupStderr pattern. +var ( + auditKeysStdout io.Writer = os.Stdout + auditKeysStderr io.Writer = os.Stderr +) + +// auditKeysLoadProviders is the seam used by runInfraAuditKeysCmd to obtain +// the IaCProvider instances declared in the config's iac.provider modules. +// The default implementation defers to defaultCleanupLoadProviders so +// audit-keys inherits the same env-resolution + plugin-discovery contract +// established by `wfctl infra cleanup` (and R-A10). Tests override this var +// to inject fake providers without spinning up real plugin subprocesses. +var auditKeysLoadProviders = defaultCleanupLoadProviders + +// runInfraAuditKeys lists every cloud-side resource of `--type ` via the +// provider's interfaces.EnumeratorAll. This is the read-only surface for +// drift correction before the destructive `wfctl infra prune` (Task 19). +// +// Signature note: this function takes interfaces.EnumeratorAll directly +// (not the broader IaCProvider) so unit tests can pass a minimal fake +// without implementing every IaCProvider method. The dispatcher in +// runInfraAuditKeysCmd performs the IaCProvider → EnumeratorAll +// type-assertion at the boundary; providers that don't implement the +// optional interface are surfaced as a structured error there, not here. +// +// Exit codes: +// +// - 0: enumeration succeeded (zero or more resources rendered) +// - 1: enumeration failed (provider error) +// - 2: argument parse error or missing required --type +// +// Output goes to w as a fixed-width table — `audit-keys` is intended for +// human + CI consumption (the prune subcommand consumes the same shape). +func runInfraAuditKeys(args []string, enumerator interfaces.EnumeratorAll, w io.Writer) int { + fs := flag.NewFlagSet("infra audit-keys", flag.ContinueOnError) + fs.SetOutput(w) + var resourceType string + fs.StringVar(&resourceType, "type", "", "Resource type to enumerate (e.g. infra.spaces_key)") + if err := fs.Parse(args); err != nil { + return 2 + } + if resourceType == "" { + fmt.Fprintln(w, "audit-keys: --type is required") + return 2 + } + + // EnumerateAll returns []*ResourceOutput (full metadata) per the + // workflow contract, so audit-keys can render without a second Read. + outs, err := enumerator.EnumerateAll(context.Background(), resourceType) + if err != nil { + fmt.Fprintf(w, "audit-keys: %v\n", err) + return 1 + } + + fmt.Fprintf(w, "Found %d %s resource(s):\n\n", len(outs), resourceType) + fmt.Fprintf(w, "%-30s %-30s %s\n", "NAME", "ACCESS_KEY", "CREATED_AT") + for _, o := range outs { + // Prefer typed fields — the EnumeratorAll contract guarantees + // ProviderID + Name. Outputs is for additional metadata + // (created_at). Fall back to Outputs[*] for backward compat with + // providers that populate both, but typed fields take priority so + // audit-keys renders correctly even for providers that follow the + // strict contract without redundant Outputs writes. + name := o.Name + if name == "" { + name, _ = o.Outputs["name"].(string) + } + ak := o.ProviderID + if ak == "" { + ak, _ = o.Outputs["access_key"].(string) + } + ca, _ := o.Outputs["created_at"].(string) // metadata; legitimately Outputs-only + fmt.Fprintf(w, "%-30s %-30s %s\n", name, ak, ca) + } + return 0 +} + +// runInfraAuditKeysCmd is the production entry point for `wfctl infra +// audit-keys`. It loads iac.provider modules from the config (honoring +// --config / --env), finds the first one that implements +// interfaces.EnumeratorAll for the requested --type, and dispatches to +// runInfraAuditKeys. +// +// Splitting the dispatcher from runInfraAuditKeys keeps the testable +// function pure (no config / plugin I/O) while still presenting a single +// CLI surface to operators. +// +// Args-passing contract: this dispatcher captures EVERY flag it parses +// (including --type) and synthesizes a clean inner-args slice with only +// the flags runInfraAuditKeys understands. Forwarding the raw args slice +// would error inside runInfraAuditKeys with "flag provided but not +// defined: -config" because its inner FlagSet only declares --type. +func runInfraAuditKeysCmd(args []string) error { + fs := flag.NewFlagSet("infra audit-keys", flag.ContinueOnError) + fs.SetOutput(auditKeysStderr) + var configFile, envName, resourceType string + fs.StringVar(&configFile, "config", "", "Config file (default: infra.yaml or config/infra.yaml)") + fs.StringVar(&configFile, "c", "", "Config file (short for --config)") + fs.StringVar(&envName, "env", "", "Environment name for config resolution") + fs.StringVar(&resourceType, "type", "", "Resource type to enumerate (e.g. infra.spaces_key)") + if err := fs.Parse(args); err != nil { + return err + } + + ctx := context.Background() + providers, closers, err := auditKeysLoadProviders(ctx, fs, configFile, envName) + if err != nil { + return fmt.Errorf("load providers: %w", err) + } + defer func() { + for _, c := range closers { + if c == nil { + continue + } + if cerr := c.Close(); cerr != nil { + fmt.Fprintf(auditKeysStderr, "warning: provider shutdown: %v\n", cerr) + } + } + }() + + // Synthesize a clean inner-args slice — only flags runInfraAuditKeys + // declares. resourceType may be empty; runInfraAuditKeys handles the + // "--type required" error itself with its own structured message. + inner := []string{"--type", resourceType} + for _, p := range providers { + if enum, ok := p.(interfaces.EnumeratorAll); ok { + if rc := runInfraAuditKeys(inner, enum, auditKeysStdout); rc != 0 { + return fmt.Errorf("audit-keys exited with code %d", rc) + } + return nil + } + } + return fmt.Errorf("audit-keys: no loaded provider implements EnumeratorAll") +} diff --git a/cmd/wfctl/infra_audit_keys_test.go b/cmd/wfctl/infra_audit_keys_test.go new file mode 100644 index 00000000..dc5ed1dd --- /dev/null +++ b/cmd/wfctl/infra_audit_keys_test.go @@ -0,0 +1,173 @@ +package main + +import ( + "bytes" + "context" + "flag" + "io" + "strings" + "testing" + + "github.com/GoCodeAlone/workflow/interfaces" +) + +// fakeProviderEnumeratorAll is a test double implementing +// interfaces.EnumeratorAll. It returns a fixed list of *ResourceOutput per +// the workflow contract — full metadata so the audit-keys CLI can render +// without re-reading from the cloud. +type fakeProviderEnumeratorAll struct { + keys []*interfaces.ResourceOutput + // lastType records the resourceType passed to EnumerateAll so tests can + // assert the CLI forwarded the --type flag correctly. + lastType string +} + +func (f *fakeProviderEnumeratorAll) EnumerateAll(_ context.Context, resourceType string) ([]*interfaces.ResourceOutput, error) { + f.lastType = resourceType + return f.keys, nil +} + +// fakeIaCProviderForAuditKeys is the minimal IaCProvider implementation +// required to exercise runInfraAuditKeysCmd's dispatcher path. It +// implements interfaces.EnumeratorAll (so the dispatcher's type-assert +// succeeds) plus stubs of every other IaCProvider method (returning nil/ +// zero values) so go's type system is satisfied. Real cloud calls are +// never reached because audit-keys only invokes EnumerateAll. +type fakeIaCProviderForAuditKeys struct { + keys []*interfaces.ResourceOutput +} + +func (f *fakeIaCProviderForAuditKeys) Name() string { return "fake-do" } +func (f *fakeIaCProviderForAuditKeys) Version() string { return "0.0.0-test" } +func (f *fakeIaCProviderForAuditKeys) Initialize(_ context.Context, _ map[string]any) error { + return nil +} +func (f *fakeIaCProviderForAuditKeys) Capabilities() []interfaces.IaCCapabilityDeclaration { + return nil +} +func (f *fakeIaCProviderForAuditKeys) Plan(_ context.Context, _ []interfaces.ResourceSpec, _ []interfaces.ResourceState) (*interfaces.IaCPlan, error) { + return nil, nil +} +func (f *fakeIaCProviderForAuditKeys) Apply(_ context.Context, _ *interfaces.IaCPlan) (*interfaces.ApplyResult, error) { + return nil, nil +} +func (f *fakeIaCProviderForAuditKeys) Destroy(_ context.Context, _ []interfaces.ResourceRef) (*interfaces.DestroyResult, error) { + return nil, nil +} +func (f *fakeIaCProviderForAuditKeys) Status(_ context.Context, _ []interfaces.ResourceRef) ([]interfaces.ResourceStatus, error) { + return nil, nil +} +func (f *fakeIaCProviderForAuditKeys) DetectDrift(_ context.Context, _ []interfaces.ResourceRef) ([]interfaces.DriftResult, error) { + return nil, nil +} +func (f *fakeIaCProviderForAuditKeys) Import(_ context.Context, _, _ string) (*interfaces.ResourceState, error) { + return nil, nil +} +func (f *fakeIaCProviderForAuditKeys) ResolveSizing(_ string, _ interfaces.Size, _ *interfaces.ResourceHints) (*interfaces.ProviderSizing, error) { + return nil, nil +} +func (f *fakeIaCProviderForAuditKeys) ResourceDriver(_ string) (interfaces.ResourceDriver, error) { + return nil, nil +} +func (f *fakeIaCProviderForAuditKeys) SupportedCanonicalKeys() []string { return nil } +func (f *fakeIaCProviderForAuditKeys) BootstrapStateBackend(_ context.Context, _ map[string]any) (*interfaces.BootstrapResult, error) { + return nil, nil +} +func (f *fakeIaCProviderForAuditKeys) Close() error { return nil } +func (f *fakeIaCProviderForAuditKeys) EnumerateAll(_ context.Context, _ string) ([]*interfaces.ResourceOutput, error) { + return f.keys, nil +} + +// TestInfraAuditKeysCmd_AcceptsConfigFlag is the smoke-test sentinel for +// the args-passing contract documented on runInfraAuditKeysCmd: the +// dispatcher MUST synthesize a clean inner-args slice with only flags +// runInfraAuditKeys understands. Forwarding the raw args slice (which +// includes --config / --env) would error inside runInfraAuditKeys with +// "flag provided but not defined: -config". +// +// Regression guard for the bug where unit tests of runInfraAuditKeys +// passed only synthetic []string{"--type", ...} and never exercised the +// dispatcher's args-passing path — real CLI invocations would error. +func TestInfraAuditKeysCmd_AcceptsConfigFlag(t *testing.T) { + // Stub the provider loader so we don't need a real iac.provider in a + // real infra.yaml — any path is fine since the seam returns our fake. + origLoad := auditKeysLoadProviders + t.Cleanup(func() { auditKeysLoadProviders = origLoad }) + auditKeysLoadProviders = func(_ context.Context, _ *flag.FlagSet, _, _ string) ([]interfaces.IaCProvider, []io.Closer, error) { + return []interfaces.IaCProvider{&fakeIaCProviderForAuditKeys{ + keys: []*interfaces.ResourceOutput{ + {Name: "k", Type: "infra.spaces_key", ProviderID: "AK", Outputs: map[string]any{"name": "k", "access_key": "AK"}}, + }, + }}, nil, nil + } + + // Capture stdout via the seam so we can assert the CLI rendered output. + origStdout := auditKeysStdout + t.Cleanup(func() { auditKeysStdout = origStdout }) + var out bytes.Buffer + auditKeysStdout = &out + + // The fix-under-test: --config + --env are in args. Pre-fix behavior + // errored with "flag provided but not defined: -config" because the + // inner FlagSet only declared --type. + if err := runInfraAuditKeysCmd([]string{ + "--type", "infra.spaces_key", + "--config", "/tmp/some-infra.yaml", + "--env", "staging", + }); err != nil { + t.Fatalf("runInfraAuditKeysCmd failed; --config / --env should be accepted by the dispatcher: %v", err) + } + if !strings.Contains(out.String(), "AK") { + t.Errorf("expected dispatcher to reach EnumerateAll + render output; got: %s", out.String()) + } +} + +// TestInfraAuditKeys_ListsAll verifies that `wfctl infra audit-keys --type +// ` delegates to the provider's EnumeratorAll, then renders every +// returned key's identifying fields (Name, ProviderID/access_key) into the +// writer. This is the failing test for Task 16 of the spaces-key-iac-resource +// plan (PR5). Until Task 17 implements runInfraAuditKeys + the registration +// of `wfctl infra audit-keys`, this test fails with `undefined: +// runInfraAuditKeys`. +func TestInfraAuditKeys_ListsAll(t *testing.T) { + fakeProv := &fakeProviderEnumeratorAll{ + keys: []*interfaces.ResourceOutput{ + { + Name: "key-a", + Type: "infra.spaces_key", + ProviderID: "AK_A", + Outputs: map[string]any{ + "name": "key-a", + "access_key": "AK_A", + "created_at": "2026-05-01T00:00:00Z", + }, + }, + { + Name: "key-b", + Type: "infra.spaces_key", + ProviderID: "AK_B", + Outputs: map[string]any{ + "name": "key-b", + "access_key": "AK_B", + "created_at": "2026-05-02T00:00:00Z", + }, + }, + }, + } + + var out bytes.Buffer + exitCode := runInfraAuditKeys([]string{"--type", "infra.spaces_key"}, fakeProv, &out) + if exitCode != 0 { + t.Fatalf("expected zero exit; got %d\nout=%s", exitCode, out.String()) + } + if !strings.Contains(out.String(), "key-a") || !strings.Contains(out.String(), "key-b") { + t.Errorf("expected both keys in output; got: %s", out.String()) + } + if !strings.Contains(out.String(), "AK_A") || !strings.Contains(out.String(), "AK_B") { + t.Errorf("expected access_keys in output; got: %s", out.String()) + } + // CLI must have forwarded the --type flag to the enumerator. + if fakeProv.lastType != "infra.spaces_key" { + t.Errorf("EnumerateAll resourceType = %q, want %q", fakeProv.lastType, "infra.spaces_key") + } +} diff --git a/cmd/wfctl/infra_prune.go b/cmd/wfctl/infra_prune.go new file mode 100644 index 00000000..b1073f44 --- /dev/null +++ b/cmd/wfctl/infra_prune.go @@ -0,0 +1,397 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "time" + + "github.com/GoCodeAlone/workflow/interfaces" +) + +// pruneProvider is the narrow interface runInfraPrune depends on so unit +// tests can use a minimal fake without implementing the full IaCProvider +// surface. Production code wraps an interfaces.IaCProvider in +// pruneProviderAdapter (below) which bridges to the existing +// interfaces.EnumeratorAll + ResourceDriver.Delete primitives. +type pruneProvider interface { + EnumerateAll(ctx context.Context, resourceType string) ([]*interfaces.ResourceOutput, error) + DeleteResource(ctx context.Context, ref interfaces.ResourceRef) error +} + +// recoveryFile is the on-disk state written by `wfctl infra rotate-and-prune` +// (Task 21) and consumed by `wfctl infra prune --recovery-from-last-rotation` +// (Task 19). The shape is intentionally minimal — only the fields the prune +// filter needs (--created-before + --exclude-access-key derived from the new +// rotation's CreatedAt + AccessKey). +type recoveryFile struct { + Type string `json:"type"` + Name string `json:"name"` + AccessKey string `json:"access_key"` + CreatedAt string `json:"created_at"` +} + +// defaultStateDir returns the canonical wfctl state directory: +// $WFCTL_STATE_DIR if set, else $HOME/.wfctl. Both writers (rotate-and-prune, +// Task 21) and readers (prune --recovery-from-last-rotation) call this so +// paths agree across the lifecycle. +func defaultStateDir() (string, error) { + if dir := os.Getenv("WFCTL_STATE_DIR"); dir != "" { + return dir, nil + } + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("WFCTL_STATE_DIR unset and $HOME unavailable: %w", err) + } + return filepath.Join(home, ".wfctl"), nil +} + +// recoveryFilePath returns the canonical path to the rotation recovery file. +// Falls back to a CWD-relative .wfctl/last-rotation.json if defaultStateDir +// errors — readers will fail loudly on the missing file in that case. +func recoveryFilePath() string { + dir, err := defaultStateDir() + if err != nil { + dir = ".wfctl" + } + return filepath.Join(dir, "last-rotation.json") +} + +// readRecoveryFile loads the recovery file written by rotate-and-prune and +// returns its content. Surface-level error includes the resolved path so +// operators can locate / hand-edit if needed. +func readRecoveryFile() (*recoveryFile, error) { + p := recoveryFilePath() + data, err := os.ReadFile(p) //nolint:gosec // intentional state-dir path + if err != nil { + return nil, fmt.Errorf("read %s: %w", p, err) + } + var rec recoveryFile + if err := json.Unmarshal(data, &rec); err != nil { + return nil, fmt.Errorf("parse %s: %w", p, err) + } + return &rec, nil +} + +// runInfraPrune destructively prunes cloud resources matching a time + +// access_key discriminator. Three opt-ins are required to run: +// +// - `--confirm` flag (explicit consent on each invocation) +// - WFCTL_CONFIRM_PRUNE=1 environment variable (two-key authorization) +// - interactive y/N prompt (unless --non-interactive is set, e.g. in CI) +// +// And two filter args are mandatory (paranoia rail) — the command refuses +// to run without an explicit exclusion target so a typo can't accidentally +// nuke the active credential: +// +// - `--created-before `: only resources older than this are eligible +// - `--exclude-access-key `: this access_key is preserved no matter what +// +// `--recovery-from-last-rotation` short-circuits the two filter args by +// reading them from the recovery file written by `infra rotate-and-prune` +// (Task 21). On success the recovery file is deleted; on failure it's +// retained so the operator can re-invoke after diagnosing. +// +// Exit codes: +// +// - 0: prune succeeded (zero or more deletions; no failures) +// - 1: opt-in/filter validation failed, enumerate failed, or one or more +// deletes failed +// - 2: argument parse error or missing required --type +// +//nolint:cyclop // intentional validation gauntlet — splitting it moves opt-in checks further from --confirm parsing +func runInfraPrune(args []string, provider pruneProvider, w io.Writer) int { + fs := flag.NewFlagSet("infra prune", flag.ContinueOnError) + fs.SetOutput(w) + var resourceType, createdBefore, excludeAK, preserveNames string + var confirm, nonInteractive, recoveryFromLastRotation bool + fs.StringVar(&resourceType, "type", "", "Resource type (required, e.g. infra.spaces_key)") + fs.StringVar(&createdBefore, "created-before", "", "RFC3339 timestamp; only resources older than this are eligible") + fs.StringVar(&excludeAK, "exclude-access-key", "", "Access key to preserve (required: paranoia rail)") + // --preserve-names is the unambiguous semantic: names matching this regex + // are PRESERVED (skipped during delete), not "operated on". Renamed from + // --allowlist per code-reviewer to remove the operator-error-trap where + // "allowlist" reads as "list of resources allowed to be deleted". + fs.StringVar(&preserveNames, "preserve-names", "", "Regex of resource names to preserve (skip during delete; orthogonal to time filter)") + fs.BoolVar(&confirm, "confirm", false, "Required: explicit confirmation flag") + fs.BoolVar(&nonInteractive, "non-interactive", false, "Skip the y/N prompt (CI-friendly)") + fs.BoolVar(&recoveryFromLastRotation, "recovery-from-last-rotation", false, "Read recovery file for filter args (rotate-and-prune writes it)") + if err := fs.Parse(args); err != nil { + return 2 + } + if resourceType == "" { + fmt.Fprintln(w, "prune: --type is required") + return 2 + } + if !confirm { + fmt.Fprintln(w, "prune: --confirm flag is required (this command is destructive)") + return 1 + } + if os.Getenv("WFCTL_CONFIRM_PRUNE") != "1" { + fmt.Fprintln(w, "prune: WFCTL_CONFIRM_PRUNE=1 env var is required (two-key opt-in)") + return 1 + } + + if recoveryFromLastRotation { + rec, err := readRecoveryFile() + if err != nil { + fmt.Fprintf(w, "prune: read recovery file: %v\n", err) + return 1 + } + if rec.Type != resourceType { + fmt.Fprintf(w, "prune: --recovery-from-last-rotation type mismatch — recovery file is %q, --type is %q\n", rec.Type, resourceType) + return 1 + } + createdBefore = rec.CreatedAt + excludeAK = rec.AccessKey + } + + if createdBefore == "" || excludeAK == "" { + fmt.Fprintln(w, "prune: --created-before AND --exclude-access-key are both required (paranoia rail)") + return 1 + } + + cutoff, err := time.Parse(time.RFC3339, createdBefore) + if err != nil { + fmt.Fprintf(w, "prune: invalid --created-before timestamp: %v\n", err) + return 1 + } + + var preserveRe *regexp.Regexp + if preserveNames != "" { + re, reErr := regexp.Compile(preserveNames) + if reErr != nil { + fmt.Fprintf(w, "prune: invalid --preserve-names regex: %v\n", reErr) + return 1 + } + preserveRe = re + } + + ctx := context.Background() + outs, err := provider.EnumerateAll(ctx, resourceType) + if err != nil { + fmt.Fprintf(w, "prune: enumerate: %v\n", err) + return 1 + } + + var toDelete []*interfaces.ResourceOutput + for _, out := range outs { + // Use typed fields for the load-bearing identifiers — the provider + // contract guarantees ProviderID + Name on EnumerateAll results. + // Outputs is for additional metadata (created_at). Falling back to + // Outputs[*] for name / access_key keeps the older provider behavior + // working but the typed fields take priority — defensive against any + // future provider that follows the contract strictly and doesn't + // duplicate ProviderID into Outputs["access_key"]. Without this + // fallback-but-prefer-typed pattern, a strict-contract provider would + // silently delete the active credential because the excludeAK check + // would compare against an empty string. + ak := out.ProviderID + if ak == "" { + ak, _ = out.Outputs["access_key"].(string) + } + name := out.Name + if name == "" { + name, _ = out.Outputs["name"].(string) + } + ca, _ := out.Outputs["created_at"].(string) // metadata; legitimately Outputs-only + if ak == excludeAK { + continue + } + if preserveRe != nil && preserveRe.MatchString(name) { + continue + } + t, parseErr := time.Parse(time.RFC3339, ca) + if parseErr != nil || !t.Before(cutoff) { + continue + } + toDelete = append(toDelete, out) + } + + fmt.Fprintf(w, "Dry-run: %d resource(s) to prune:\n\n", len(toDelete)) + for _, o := range toDelete { + name := o.Name + if name == "" { + name, _ = o.Outputs["name"].(string) + } + ak := o.ProviderID + if ak == "" { + ak, _ = o.Outputs["access_key"].(string) + } + ca, _ := o.Outputs["created_at"].(string) + fmt.Fprintf(w, " - %s (access_key=%s, created=%s)\n", name, ak, ca) + } + + if len(toDelete) == 0 { + return 0 + } + + if !nonInteractive { + fmt.Fprintf(w, "\nProceed? (y/N): ") + var ans string + _, _ = fmt.Scanln(&ans) + if ans != "y" && ans != "Y" { + fmt.Fprintln(w, "Aborted.") + return 0 + } + } + + var failed int + for _, o := range toDelete { + ref := interfaces.ResourceRef{Type: o.Type, Name: o.Name, ProviderID: o.ProviderID} + if delErr := provider.DeleteResource(ctx, ref); delErr != nil { + fmt.Fprintf(w, "prune: delete %s: %v\n", o.Name, delErr) + failed++ + continue + } + fmt.Fprintf(w, " ✓ deleted %s\n", o.Name) + } + if failed > 0 { + // Only mention the recovery file when one actually exists — when + // --recovery-from-last-rotation was set OR rotate-and-prune wrote + // it before this invocation. Plain prune invocations don't have + // one; pointing operators at a non-existent path would mislead + // them into a wild goose chase. + if recoveryFromLastRotation { + fmt.Fprintf(w, "\n%d delete(s) failed; recovery file retained at %s\n", failed, recoveryFilePath()) + } else { + fmt.Fprintf(w, "\n%d delete(s) failed\n", failed) + } + return 1 + } + if recoveryFromLastRotation { + // Cleanup is best-effort but non-silent: if perms changed or the + // file is locked, the next --recovery-from-last-rotation invocation + // would re-read stale data, so warn loud enough that the operator + // can hand-clean. + if rmErr := os.Remove(recoveryFilePath()); rmErr != nil && !os.IsNotExist(rmErr) { + fmt.Fprintf(w, "warning: failed to clean up recovery file %s: %v\n", recoveryFilePath(), rmErr) + } + } + fmt.Fprintf(w, "\n%d resource(s) pruned successfully.\n", len(toDelete)) + return 0 +} + +// pruneProviderAdapter bridges interfaces.IaCProvider to the narrow +// pruneProvider interface that runInfraPrune consumes. The adapter does +// the EnumeratorAll type-assertion + ResourceDriver lookup at the +// boundary so runInfraPrune's body stays free of plugin-loading concerns +// (and unit tests don't need to fake the full IaCProvider surface). +type pruneProviderAdapter struct { + p interfaces.IaCProvider +} + +func (a *pruneProviderAdapter) EnumerateAll(ctx context.Context, resourceType string) ([]*interfaces.ResourceOutput, error) { + enum, ok := a.p.(interfaces.EnumeratorAll) + if !ok { + return nil, fmt.Errorf("provider %q does not implement EnumeratorAll", a.p.Name()) + } + return enum.EnumerateAll(ctx, resourceType) +} + +func (a *pruneProviderAdapter) DeleteResource(ctx context.Context, ref interfaces.ResourceRef) error { + drv, err := a.p.ResourceDriver(ref.Type) + if err != nil { + return fmt.Errorf("resolve driver for %s: %w", ref.Type, err) + } + return drv.Delete(ctx, ref) +} + +// pruneStdout / pruneStderr seam variables mirror cleanupStdout / +// cleanupStderr so prune-related tests don't race on global os.Stdout. +var ( + pruneStdout io.Writer = os.Stdout + pruneStderr io.Writer = os.Stderr +) + +// pruneLoadProviders is the seam tests override to inject fakes. +var pruneLoadProviders = defaultCleanupLoadProviders + +// runInfraPruneCmd is the production entry point for `wfctl infra prune`. +// Loads iac.provider modules from infra.yaml, finds the first one that +// implements interfaces.EnumeratorAll, wraps it in pruneProviderAdapter, +// and dispatches to runInfraPrune. +// +// Args-passing contract: this dispatcher captures EVERY flag it parses +// (including all of runInfraPrune's flags) and synthesizes a clean inner- +// args slice with only the flags runInfraPrune understands. Forwarding +// the raw args slice would error inside runInfraPrune with "flag provided +// but not defined: -config" because its inner FlagSet doesn't declare +// --config / --env (those belong to the dispatcher's provider-loading +// concern). +func runInfraPruneCmd(args []string) error { + fs := flag.NewFlagSet("infra prune", flag.ContinueOnError) + fs.SetOutput(pruneStderr) + var configFile, envName string + var resourceType, createdBefore, excludeAK, preserveNames string + var confirm, nonInteractive, recoveryFromLastRotation bool + fs.StringVar(&configFile, "config", "", "Config file (default: infra.yaml or config/infra.yaml)") + fs.StringVar(&configFile, "c", "", "Config file (short for --config)") + fs.StringVar(&envName, "env", "", "Environment name for config resolution") + fs.StringVar(&resourceType, "type", "", "Resource type") + fs.StringVar(&createdBefore, "created-before", "", "RFC3339 timestamp") + fs.StringVar(&excludeAK, "exclude-access-key", "", "Access key to preserve") + fs.StringVar(&preserveNames, "preserve-names", "", "Regex of resource names to preserve (skip during delete)") + fs.BoolVar(&confirm, "confirm", false, "Confirmation flag") + fs.BoolVar(&nonInteractive, "non-interactive", false, "Skip y/N prompt") + fs.BoolVar(&recoveryFromLastRotation, "recovery-from-last-rotation", false, "Read recovery file") + if err := fs.Parse(args); err != nil { + return err + } + + ctx := context.Background() + providers, closers, err := pruneLoadProviders(ctx, fs, configFile, envName) + if err != nil { + return fmt.Errorf("load providers: %w", err) + } + defer func() { + for _, c := range closers { + if c == nil { + continue + } + if cerr := c.Close(); cerr != nil { + fmt.Fprintf(pruneStderr, "warning: provider shutdown: %v\n", cerr) + } + } + }() + + // Synthesize a clean inner-args slice with only flags runInfraPrune + // declares. Empty strings / false bools are omitted so they don't + // override runInfraPrune's defaults (and so the inner validation + // surfaces the same "X is required" messages the user expects). + inner := []string{"--type", resourceType} + if createdBefore != "" { + inner = append(inner, "--created-before", createdBefore) + } + if excludeAK != "" { + inner = append(inner, "--exclude-access-key", excludeAK) + } + if preserveNames != "" { + inner = append(inner, "--preserve-names", preserveNames) + } + if confirm { + inner = append(inner, "--confirm") + } + if nonInteractive { + inner = append(inner, "--non-interactive") + } + if recoveryFromLastRotation { + inner = append(inner, "--recovery-from-last-rotation") + } + + for _, p := range providers { + if _, ok := p.(interfaces.EnumeratorAll); ok { + adapter := &pruneProviderAdapter{p: p} + if rc := runInfraPrune(inner, adapter, pruneStdout); rc != 0 { + return fmt.Errorf("prune exited with code %d", rc) + } + return nil + } + } + return fmt.Errorf("prune: no loaded provider implements EnumeratorAll") +} diff --git a/cmd/wfctl/infra_prune_test.go b/cmd/wfctl/infra_prune_test.go new file mode 100644 index 00000000..43f34ffb --- /dev/null +++ b/cmd/wfctl/infra_prune_test.go @@ -0,0 +1,271 @@ +package main + +import ( + "bytes" + "context" + "flag" + "io" + "strings" + "testing" + + "github.com/GoCodeAlone/workflow/interfaces" +) + +// fakeIaCProviderForPrune is the minimal IaCProvider needed to exercise +// runInfraPruneCmd's dispatcher path (smoke test for the args-passing +// fix). Implements interfaces.EnumeratorAll so the type-assert in the +// dispatcher succeeds; ResourceDriver returns a fake driver that no-ops +// Delete so the prune step succeeds without touching cloud APIs. +type fakeIaCProviderForPrune struct { + keys []*interfaces.ResourceOutput +} + +func (f *fakeIaCProviderForPrune) Name() string { return "fake-do" } +func (f *fakeIaCProviderForPrune) Version() string { return "0.0.0-test" } +func (f *fakeIaCProviderForPrune) Initialize(_ context.Context, _ map[string]any) error { return nil } +func (f *fakeIaCProviderForPrune) Capabilities() []interfaces.IaCCapabilityDeclaration { return nil } +func (f *fakeIaCProviderForPrune) Plan(_ context.Context, _ []interfaces.ResourceSpec, _ []interfaces.ResourceState) (*interfaces.IaCPlan, error) { + return nil, nil +} +func (f *fakeIaCProviderForPrune) Apply(_ context.Context, _ *interfaces.IaCPlan) (*interfaces.ApplyResult, error) { + return nil, nil +} +func (f *fakeIaCProviderForPrune) Destroy(_ context.Context, _ []interfaces.ResourceRef) (*interfaces.DestroyResult, error) { + return nil, nil +} +func (f *fakeIaCProviderForPrune) Status(_ context.Context, _ []interfaces.ResourceRef) ([]interfaces.ResourceStatus, error) { + return nil, nil +} +func (f *fakeIaCProviderForPrune) DetectDrift(_ context.Context, _ []interfaces.ResourceRef) ([]interfaces.DriftResult, error) { + return nil, nil +} +func (f *fakeIaCProviderForPrune) Import(_ context.Context, _, _ string) (*interfaces.ResourceState, error) { + return nil, nil +} +func (f *fakeIaCProviderForPrune) ResolveSizing(_ string, _ interfaces.Size, _ *interfaces.ResourceHints) (*interfaces.ProviderSizing, error) { + return nil, nil +} +func (f *fakeIaCProviderForPrune) ResourceDriver(_ string) (interfaces.ResourceDriver, error) { + return &fakeNoopDriver{}, nil +} +func (f *fakeIaCProviderForPrune) SupportedCanonicalKeys() []string { return nil } +func (f *fakeIaCProviderForPrune) BootstrapStateBackend(_ context.Context, _ map[string]any) (*interfaces.BootstrapResult, error) { + return nil, nil +} +func (f *fakeIaCProviderForPrune) Close() error { return nil } +func (f *fakeIaCProviderForPrune) EnumerateAll(_ context.Context, _ string) ([]*interfaces.ResourceOutput, error) { + return f.keys, nil +} + +// fakeNoopDriver implements interfaces.ResourceDriver as no-ops so the +// prune-dispatcher smoke test can exercise the full path without touching +// cloud APIs. Delete records nothing — the test asserts on the dispatcher +// running cleanly, not on side effects. +type fakeNoopDriver struct{} + +func (d *fakeNoopDriver) Create(_ context.Context, _ interfaces.ResourceSpec) (*interfaces.ResourceOutput, error) { + return nil, nil +} +func (d *fakeNoopDriver) Read(_ context.Context, _ interfaces.ResourceRef) (*interfaces.ResourceOutput, error) { + return nil, nil +} +func (d *fakeNoopDriver) Update(_ context.Context, _ interfaces.ResourceRef, _ interfaces.ResourceSpec) (*interfaces.ResourceOutput, error) { + return nil, nil +} +func (d *fakeNoopDriver) Diff(_ context.Context, _ interfaces.ResourceSpec, _ *interfaces.ResourceOutput) (*interfaces.DiffResult, error) { + return &interfaces.DiffResult{}, nil +} +func (d *fakeNoopDriver) Delete(_ context.Context, _ interfaces.ResourceRef) error { return nil } +func (d *fakeNoopDriver) HealthCheck(_ context.Context, _ interfaces.ResourceRef) (*interfaces.HealthResult, error) { + return nil, nil +} +func (d *fakeNoopDriver) Scale(_ context.Context, _ interfaces.ResourceRef, _ int) (*interfaces.ResourceOutput, error) { + return nil, nil +} +func (d *fakeNoopDriver) SensitiveKeys() []string { return nil } + +// TestInfraPruneCmd_AcceptsConfigFlag is the smoke-test sentinel for the +// args-passing contract on runInfraPruneCmd: the dispatcher MUST +// synthesize a clean inner-args slice so runInfraPrune's narrow FlagSet +// doesn't error on --config / --env (which only the dispatcher itself +// understands). Regression guard for the same bug class team-lead caught +// in audit-keys (Copilot 3212662894). +func TestInfraPruneCmd_AcceptsConfigFlag(t *testing.T) { + t.Setenv("WFCTL_CONFIRM_PRUNE", "1") + + origLoad := pruneLoadProviders + t.Cleanup(func() { pruneLoadProviders = origLoad }) + pruneLoadProviders = func(_ context.Context, _ *flag.FlagSet, _, _ string) ([]interfaces.IaCProvider, []io.Closer, error) { + return []interfaces.IaCProvider{&fakeIaCProviderForPrune{ + // One eligible old key; --created-before will catch it; --exclude-access-key + // preserves "AK_NEW" (which isn't in this list, so all listed keys are eligible). + keys: []*interfaces.ResourceOutput{ + {Name: "k", Type: "infra.spaces_key", ProviderID: "AK_OLD", Outputs: map[string]any{ + "access_key": "AK_OLD", "created_at": "2026-05-01T00:00:00Z", "name": "k", + }}, + }, + }}, nil, nil + } + + origStdout := pruneStdout + t.Cleanup(func() { pruneStdout = origStdout }) + var out bytes.Buffer + pruneStdout = &out + + if err := runInfraPruneCmd([]string{ + "--type", "infra.spaces_key", + "--config", "/tmp/some-infra.yaml", + "--env", "staging", + "--created-before", "2026-05-08T00:00:00Z", + "--exclude-access-key", "AK_NEW", + "--confirm", + "--non-interactive", + }); err != nil { + t.Fatalf("runInfraPruneCmd failed; --config / --env should be accepted by the dispatcher: %v", err) + } +} + +// fakeProviderWithDelete is a test double implementing the narrow +// pruneProvider interface (EnumerateAll + DeleteResource). Tracks deleted +// resources by their ProviderID (the cloud-side access_key for spaces keys) +// so tests can assert exactly which resources the prune CLI removed. +type fakeProviderWithDelete struct { + keys []*interfaces.ResourceOutput + deleted []string // ProviderIDs of resources passed to DeleteResource + lastType string +} + +func (f *fakeProviderWithDelete) EnumerateAll(_ context.Context, resourceType string) ([]*interfaces.ResourceOutput, error) { + f.lastType = resourceType + return f.keys, nil +} + +func (f *fakeProviderWithDelete) DeleteResource(_ context.Context, ref interfaces.ResourceRef) error { + f.deleted = append(f.deleted, ref.ProviderID) + return nil +} + +// pruneContains is a local helper so the test file doesn't depend on a +// shared 'contains' that might collide with another package-level helper. +func pruneContains(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + return false +} + +// TestInfraPrune_RequiresTwoKeyOptIn locks in the two-factor opt-in for +// destructive prune: BOTH `--confirm` flag AND WFCTL_CONFIRM_PRUNE=1 +// environment variable must be present, otherwise the command must reject +// the request before touching the cloud. This is the safety guard that +// prevents `prune` from running by accident. +func TestInfraPrune_RequiresTwoKeyOptIn(t *testing.T) { + // Without --confirm flag → reject (env var not set either) + var out bytes.Buffer + code := runInfraPrune([]string{ + "--type", "infra.spaces_key", + "--created-before", "2026-05-08T00:00:00Z", + "--exclude-access-key", "AK_NEW", + }, nil, &out) + if code == 0 { + t.Errorf("expected non-zero exit without --confirm; got 0; out=%s", out.String()) + } + + // With --confirm but WFCTL_CONFIRM_PRUNE not set → still reject + out.Reset() + code = runInfraPrune([]string{ + "--type", "infra.spaces_key", + "--created-before", "2026-05-08T00:00:00Z", + "--exclude-access-key", "AK_NEW", + "--confirm", + }, nil, &out) + if code == 0 { + t.Errorf("expected non-zero exit without WFCTL_CONFIRM_PRUNE=1; got 0; out=%s", out.String()) + } +} + +// TestInfraPrune_RequiresExcludeAccessKey verifies the safety guard that +// prevents pruning every key in the account by accident: --exclude-access-key +// is mandatory so the operator MUST name the active credential they want to +// preserve. Without it, prune refuses even with both opt-ins. +func TestInfraPrune_RequiresExcludeAccessKey(t *testing.T) { + t.Setenv("WFCTL_CONFIRM_PRUNE", "1") + var out bytes.Buffer + code := runInfraPrune([]string{ + "--type", "infra.spaces_key", + "--created-before", "2026-05-08T00:00:00Z", + "--confirm", + "--non-interactive", + }, nil, &out) + if code == 0 { + t.Errorf("expected non-zero exit without --exclude-access-key; got 0") + } + if !strings.Contains(out.String(), "exclude-access-key") { + t.Errorf("error message must mention --exclude-access-key requirement; got: %s", out.String()) + } +} + +// TestInfraPrune_FiltersByTimeAndExcludesAccessKey is the happy-path +// regression sentinel for the prune filter: with both opt-ins satisfied +// (env + flag), --exclude-access-key naming the active credential, and +// --created-before set to "right now", prune must delete all OLD keys +// while preserving the active one. Tracks deletions by ProviderID +// (access_key) on the fake provider. +func TestInfraPrune_FiltersByTimeAndExcludesAccessKey(t *testing.T) { + t.Setenv("WFCTL_CONFIRM_PRUNE", "1") + fakeProv := &fakeProviderWithDelete{ + keys: []*interfaces.ResourceOutput{ + { + Name: "old-1", + Type: "infra.spaces_key", + ProviderID: "AK_OLD_1", + Outputs: map[string]any{ + "access_key": "AK_OLD_1", + "created_at": "2026-05-01T00:00:00Z", + "name": "old-1", + }, + }, + { + Name: "old-2", + Type: "infra.spaces_key", + ProviderID: "AK_OLD_2", + Outputs: map[string]any{ + "access_key": "AK_OLD_2", + "created_at": "2026-05-02T00:00:00Z", + "name": "old-2", + }, + }, + { + Name: "new", + Type: "infra.spaces_key", + ProviderID: "AK_NEW", + Outputs: map[string]any{ + "access_key": "AK_NEW", + "created_at": "2026-05-08T11:00:00Z", + "name": "new", + }, + }, + }, + } + var out bytes.Buffer + code := runInfraPrune([]string{ + "--type", "infra.spaces_key", + "--created-before", "2026-05-08T11:00:00Z", + "--exclude-access-key", "AK_NEW", + "--confirm", + "--non-interactive", + }, fakeProv, &out) + if code != 0 { + t.Fatalf("prune failed: code=%d, out=%s", code, out.String()) + } + // AK_OLD_1, AK_OLD_2 must be deleted (older than --created-before). + if !pruneContains(fakeProv.deleted, "AK_OLD_1") || !pruneContains(fakeProv.deleted, "AK_OLD_2") { + t.Errorf("expected AK_OLD_1 + AK_OLD_2 deleted; got %v", fakeProv.deleted) + } + // AK_NEW must NOT be deleted (excluded via --exclude-access-key). + if pruneContains(fakeProv.deleted, "AK_NEW") { + t.Errorf("AK_NEW must NOT be deleted (excluded); got %v", fakeProv.deleted) + } +} diff --git a/cmd/wfctl/infra_rotate_and_prune.go b/cmd/wfctl/infra_rotate_and_prune.go new file mode 100644 index 00000000..c7539fe9 --- /dev/null +++ b/cmd/wfctl/infra_rotate_and_prune.go @@ -0,0 +1,289 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "io" + "os" + "path/filepath" + "time" + + "github.com/GoCodeAlone/workflow/interfaces" +) + +// rotateAndPruneStdout / rotateAndPruneStderr seam variables mirror the +// auditKeysStdout / pruneStdout pattern so rotate-and-prune dispatcher +// tests don't race on global os.Stdout. +var ( + rotateAndPruneStdout io.Writer = os.Stdout + rotateAndPruneStderr io.Writer = os.Stderr +) + +// rotateAndPruneLoadProviders is the seam tests override to inject fakes. +// Defaults to defaultCleanupLoadProviders so rotate-and-prune inherits the +// same env-resolution + plugin-discovery contract as cleanup / audit-keys +// / prune. +var rotateAndPruneLoadProviders = defaultCleanupLoadProviders + +// runInfraRotateAndPruneCmd is the production entry point for `wfctl infra +// rotate-and-prune`. Loads iac.provider modules from infra.yaml, finds the +// first one that implements interfaces.EnumeratorAll, wraps it in +// pruneProviderAdapter, and dispatches to runInfraRotateAndPrune. +// +// runInfraRotateAndPrune already declares --config / --env (it needs them +// for parseSecretsConfig in Step 1 of the rotate flow), so the dispatcher +// forwards args verbatim — no synthesize-clean-inner-args dance required. +// We still pre-parse here to extract --config / --env for the provider +// loader, but we don't reformat the args slice. +func runInfraRotateAndPruneCmd(args []string) error { + fs := flag.NewFlagSet("infra rotate-and-prune (dispatch)", flag.ContinueOnError) + fs.SetOutput(rotateAndPruneStderr) + var configFile, envName string + // Declare every flag runInfraRotateAndPrune accepts so flag.Parse + // doesn't error here. Only --config / --env are captured (used by the + // provider loader); the rest are re-parsed inside runInfraRotateAndPrune + // against the same args slice (Go's flag package is idempotent). + fs.StringVar(&configFile, "config", "", "Config file (default: infra.yaml or config/infra.yaml)") + fs.StringVar(&configFile, "c", "", "Config file (short for --config)") + fs.StringVar(&envName, "env", "", "Environment name for config resolution") + _ = fs.String("type", "", "Resource type") + _ = fs.String("name", "", "Canonical credential name") + _ = fs.String("preserve-names", "", "Regex of names to preserve during prune") + _ = fs.Bool("confirm", false, "Confirmation flag") + _ = fs.Bool("non-interactive", false, "Skip y/N prompt") + if err := fs.Parse(args); err != nil { + return err + } + + ctx := context.Background() + providers, closers, err := rotateAndPruneLoadProviders(ctx, fs, configFile, envName) + if err != nil { + return fmt.Errorf("load providers: %w", err) + } + defer func() { + for _, c := range closers { + if c == nil { + continue + } + if cerr := c.Close(); cerr != nil { + fmt.Fprintf(rotateAndPruneStderr, "warning: provider shutdown: %v\n", cerr) + } + } + }() + + for _, p := range providers { + if _, ok := p.(interfaces.EnumeratorAll); ok { + adapter := &pruneProviderAdapter{p: p} + if rc := runInfraRotateAndPrune(args, adapter, rotateAndPruneStdout); rc != 0 { + return fmt.Errorf("rotate-and-prune exited with code %d", rc) + } + return nil + } + } + return fmt.Errorf("rotate-and-prune: no loaded provider implements EnumeratorAll") +} + +// recoveryRecord is persisted to ${WFCTL_STATE_DIR:-$HOME/.wfctl}/last-rotation.json +// after a successful rotate step, BEFORE the prune step. Used by the +// `wfctl infra prune --recovery-from-last-rotation` flow (Task 19) to recover +// from partial-failure scenarios without re-rotating (which would mint another +// key and worsen any leak). +// +// The JSON shape is a superset of `recoveryFile` (defined in infra_prune.go): +// adds Source + RotatedAt for forensics. The prune reader ignores extra fields +// — it only needs Type/Name/AccessKey/CreatedAt. +type recoveryRecord struct { + Type string `json:"type"` + Name string `json:"name"` + AccessKey string `json:"access_key"` + CreatedAt string `json:"created_at"` + Source string `json:"source,omitempty"` + RotatedAt time.Time `json:"rotated_at,omitempty"` +} + +// writeRecoveryRecord persists recovery JSON to defaultStateDir()/last-rotation.json +// with 0600 file perms (sensitive credential metadata) + 0700 parent dir. +// +// Writes to the same path that recoveryFilePath() in infra_prune.go reads +// from so prune --recovery-from-last-rotation finds it. Uses an atomic-ish +// write (os.WriteFile truncates and rewrites) — no rename-on-temp pattern +// needed since the file is per-host and per-user. +func writeRecoveryRecord(rec recoveryRecord) error { + dir, err := defaultStateDir() + if err != nil { + return fmt.Errorf("rotate-and-prune: resolve state dir: %w", err) + } + if err := os.MkdirAll(dir, 0700); err != nil { + return fmt.Errorf("rotate-and-prune: create state dir %s: %w", dir, err) + } + data, err := json.MarshalIndent(rec, "", " ") + if err != nil { + return fmt.Errorf("rotate-and-prune: marshal recovery record: %w", err) + } + path := filepath.Join(dir, "last-rotation.json") + if err := os.WriteFile(path, data, 0600); err != nil { //nolint:gosec // intentional 0600 + return fmt.Errorf("rotate-and-prune: write recovery file %s: %w", path, err) + } + return nil +} + +// runInfraRotateAndPrune is the all-in-one rotate+prune CLI: +// +// 1. ROTATE the canonical credential by reusing bootstrapSecrets's force-rotate +// path (per ADR 0020, returns []RotationResult in-process — no subprocess, +// no stderr parsing). +// 2. PERSIST a recovery record to $WFCTL_STATE_DIR/last-rotation.json with +// 0600 perms BEFORE the prune step. This is the safety net: if prune +// fails partway, an operator can recover via +// `wfctl infra prune --recovery-from-last-rotation` without re-rotating +// (which would worsen any leak by minting yet another live credential). +// 3. DELEGATE the actual prune to runInfraPrune, passing the new key's +// created_at as --created-before and access_key as --exclude-access-key +// so only OLDER keys for the same Type are eligible for deletion. On +// success runInfraPrune deletes the recovery file (it's the consumer); +// on failure the recovery file is retained. +// +// Same two-key opt-in as runInfraPrune: --confirm flag + WFCTL_CONFIRM_PRUNE=1 +// env var. Both required, otherwise the command refuses to run. +// +// `provider` is typed as pruneProvider (the narrow interface defined in +// infra_prune.go) so this CLI shares the unit-test fake surface with prune +// itself — keeps the fake-IaCProvider blast radius small. +// +// Exit codes: +// +// - 0: rotation + prune both succeeded +// - 1: opt-in/validation failed, rotate failed, recovery write failed, or +// prune failed (recovery file retained in the prune-failed case) +// - 2: argument parse error or missing required --type / --name +func runInfraRotateAndPrune(args []string, provider pruneProvider, w io.Writer) int { + fs := flag.NewFlagSet("infra rotate-and-prune", flag.ContinueOnError) + fs.SetOutput(w) + var resourceType, name, configFile, preserveNames string + var confirm, nonInteractive bool + fs.StringVar(&resourceType, "type", "", "Resource type (required, e.g. infra.spaces_key)") + fs.StringVar(&name, "name", "", "Canonical credential name to rotate (required)") + fs.StringVar(&configFile, "config", "infra.yaml", "Config file") + fs.StringVar(&configFile, "c", "infra.yaml", "Config file (short)") + // --env is accepted-and-ignored here so the dispatcher (runInfraRotateAndPruneCmd) + // can forward args verbatim including --env without the inner FlagSet + // erroring on unknown-flag. The dispatcher already uses --env to scope + // provider loading; secrets-config resolution happens via --config alone. + _ = fs.String("env", "", "Environment name (consumed by dispatcher; ignored here)") + fs.StringVar(&preserveNames, "preserve-names", "", "Regex of resource names to preserve during prune") + fs.BoolVar(&confirm, "confirm", false, "Required: explicit confirmation flag") + fs.BoolVar(&nonInteractive, "non-interactive", false, "Skip the prune y/N prompt") + if err := fs.Parse(args); err != nil { + return 2 + } + if resourceType == "" || name == "" { + fmt.Fprintln(w, "rotate-and-prune: --type and --name are required") + return 2 + } + if !confirm { + fmt.Fprintln(w, "rotate-and-prune: --confirm flag is required (rotation + prune is destructive)") + return 1 + } + if os.Getenv("WFCTL_CONFIRM_PRUNE") != "1" { + fmt.Fprintln(w, "rotate-and-prune: WFCTL_CONFIRM_PRUNE=1 env var is required (two-key opt-in)") + return 1 + } + + ctx := context.Background() + + // Step 1: rotate the canonical credential via bootstrapSecrets's + // force-rotate path. parseSecretsConfig + resolveSecretsProvider + + // resolveCredentialRevoker are the existing helpers from + // cmd/wfctl/infra_secrets.go and infra_bootstrap.go. + fmt.Fprintf(w, "Step 1: rotating credential %q (type %s)...\n", name, resourceType) + + cfg, err := parseSecretsConfig(configFile) + if err != nil { + fmt.Fprintf(w, "rotate-and-prune: load config %s: %v\n", configFile, err) + return 1 + } + if cfg == nil { + fmt.Fprintf(w, "rotate-and-prune: config %s has no secrets section\n", configFile) + return 1 + } + secretsProvider, err := resolveSecretsProvider(cfg) + if err != nil { + fmt.Fprintf(w, "rotate-and-prune: resolve secrets provider: %v\n", err) + return 1 + } + forceRotate := map[string]bool{name: true} + revoker, closer := resolveCredentialRevoker(ctx, configFile, cfg, forceRotate) + if closer != nil { + defer closer.Close() + } + + _, rotations, err := bootstrapSecrets(ctx, secretsProvider, cfg, forceRotate, revoker) + if err != nil { + fmt.Fprintf(w, "rotate-and-prune: rotate: %v\n", err) + return 1 + } + if len(rotations) != 1 { + fmt.Fprintf(w, "rotate-and-prune: expected 1 rotation result, got %d\n", len(rotations)) + return 1 + } + rotated := rotations[0] + fmt.Fprintf(w, " new access_key=%s, created_at=%s\n", rotated.AccessKey, rotated.CreatedAt) + + // Persist recovery record BEFORE the prune step. Treated as a sub-step + // of Step 1 (rotate) — operationally invisible from the user's vantage + // (no Fprintf header), only the resulting path is surfaced for ops use. + rec := recoveryRecord{ + Type: resourceType, + Name: name, + AccessKey: rotated.AccessKey, + CreatedAt: rotated.CreatedAt, + Source: rotated.Source, + RotatedAt: time.Now().UTC(), + } + if err := writeRecoveryRecord(rec); err != nil { + fmt.Fprintf(w, "rotate-and-prune: %v\n", err) + return 1 + } + fmt.Fprintf(w, " recovery file written to %s\n", recoveryFilePath()) + + // Step 2 (user-visible): delegate to prune with the new key as the + // exclusion target. Step numbering matches the two banners the user + // sees: Step 1 is the rotate (above), Step 2 is the prune (here). + // The recovery-write between them is a sub-step of Step 1. + fmt.Fprintf(w, "\nStep 2: pruning older %s resources...\n", resourceType) + pruneArgs := []string{ + "--type", resourceType, + "--created-before", rotated.CreatedAt, + "--exclude-access-key", rotated.AccessKey, + "--confirm", + } + if nonInteractive { + pruneArgs = append(pruneArgs, "--non-interactive") + } + if preserveNames != "" { + pruneArgs = append(pruneArgs, "--preserve-names", preserveNames) + } + code := runInfraPrune(pruneArgs, provider, w) + if code != 0 { + fmt.Fprintf(w, "\nrotate-and-prune: prune step failed (code=%d). Recovery file retained at %s.\n", code, recoveryFilePath()) + fmt.Fprintf(w, "Re-run prune with: wfctl infra prune --type %s --recovery-from-last-rotation --confirm\n", resourceType) + return code + } + + // Success: rotate-and-prune is the recovery file's writer, so it owns + // cleanup on its own happy path. (runInfraPrune only deletes the file + // when invoked with --recovery-from-last-rotation, which we don't pass + // because we already have the rotation result in-process.) + recPath := recoveryFilePath() + if err := os.Remove(recPath); err != nil && !os.IsNotExist(err) { + // Non-fatal: a concurrent operator may have removed it, or perms + // changed under us. Surface with an explicit cleanup hint so the + // operator knows the rotation+prune itself succeeded and only the + // stale state file needs hand-clearing. + fmt.Fprintf(w, "rotate-and-prune: warning: rotation+prune succeeded but failed to remove stale recovery file at %s: %v\n", recPath, err) + fmt.Fprintf(w, "rotate-and-prune: hint: this file is no longer needed; remove with `rm %s` once safe.\n", recPath) + } + return 0 +} diff --git a/cmd/wfctl/infra_rotate_and_prune_test.go b/cmd/wfctl/infra_rotate_and_prune_test.go new file mode 100644 index 00000000..ae456426 --- /dev/null +++ b/cmd/wfctl/infra_rotate_and_prune_test.go @@ -0,0 +1,225 @@ +package main + +import ( + "bytes" + "context" + "errors" + "io" + "os" + "path/filepath" + "testing" + + "github.com/GoCodeAlone/workflow/interfaces" + "github.com/GoCodeAlone/workflow/secrets" +) + +// fakeProviderEnumerableDriver is the test double for `runInfraRotateAndPrune`. +// It implements the EnumerateAll + DeleteResource surface that the rotate-and- +// prune CLI needs (same shape as the existing `pruneProvider` interface in +// infra_prune.go), plus a deleteErr hook so tests can force the prune step to +// fail and exercise the recovery-file-retained branch. +// +// outputs is the slice returned by EnumerateAll, deleted is the running list +// of ProviderIDs the CLI has DeleteResource'd, and deleteErr (if non-nil) is +// returned from every DeleteResource call so tests can simulate transient +// network or godo failures. +type fakeProviderEnumerableDriver struct { + outputs []*interfaces.ResourceOutput + deleted []string + deleteErr error +} + +func (f *fakeProviderEnumerableDriver) EnumerateAll(_ context.Context, _ string) ([]*interfaces.ResourceOutput, error) { + return f.outputs, nil +} + +func (f *fakeProviderEnumerableDriver) DeleteResource(_ context.Context, ref interfaces.ResourceRef) error { + if f.deleteErr != nil { + return f.deleteErr + } + f.deleted = append(f.deleted, ref.ProviderID) + return nil +} + +// rotateAndPruneContains is a local helper rather than relying on a shared +// `contains` to avoid cross-file collisions with sibling test helpers. +func rotateAndPruneContains(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + return false +} + +// writeMinimalRotationConfig writes a minimal infra.yaml fixture sufficient +// for runInfraRotateAndPrune to load without erroring before reaching the +// stubbed `bootstrapSecrets` hook. The config provides a `secrets:` block +// (so parseSecretsConfig returns non-nil) with provider=env so +// resolveSecretsProvider builds without external dependencies. Returns the +// fixture's full path for use as `--config` arg. +func writeMinimalRotationConfig(t *testing.T, tmpDir string) string { + t.Helper() + cfgPath := filepath.Join(tmpDir, "infra.yaml") + body := `secrets: + provider: env + config: + prefix: WFCTL_TEST_ + generate: + - key: test-key + type: provider_credential + source: digitalocean.spaces + name: test-key +` + if err := os.WriteFile(cfgPath, []byte(body), 0600); err != nil { + t.Fatalf("write fixture infra.yaml: %v", err) + } + return cfgPath +} + +// TestInfraRotateAndPrune_HappyPath verifies the all-in-one flow: +// +// 1. The rotate primitive (`bootstrapSecrets` package-level test hook — +// same pattern as `generateSecret`) returns a single RotationResult +// describing the newly-minted credential. +// 2. The CLI persists a recovery record at $WFCTL_STATE_DIR/last-rotation.json +// BEFORE the prune step, so a mid-prune failure can be recovered without +// re-rotating (which would leak yet another key). +// 3. The CLI prunes every key for the same Type/Name whose ProviderID is +// NOT the new AccessKey (AK_OLD must be deleted; AK_NEW must be preserved). +// 4. On full success, the recovery file is deleted (no leftover state). +// +// Until Task 21 lands runInfraRotateAndPrune in infra_rotate_and_prune.go, +// this test fails to compile with `undefined: runInfraRotateAndPrune` — the +// failing-side signal Task 20 is supposed to produce. +func TestInfraRotateAndPrune_HappyPath(t *testing.T) { + t.Setenv("WFCTL_CONFIRM_PRUNE", "1") + tmpDir := t.TempDir() + t.Setenv("WFCTL_STATE_DIR", tmpDir) + cfgPath := writeMinimalRotationConfig(t, tmpDir) + + // Stub bootstrapSecrets — the package-level test hook (defined in + // infra_bootstrap.go:507 as `var bootstrapSecrets = func(...)`). + origBoot := bootstrapSecrets + defer func() { bootstrapSecrets = origBoot }() + bootstrapSecrets = func(_ context.Context, _ secrets.Provider, _ *SecretsConfig, _ map[string]bool, _ ...interfaces.ProviderCredentialRevoker) (map[string]string, []RotationResult, error) { + return map[string]string{}, []RotationResult{ + {Key: "test-key", Source: "digitalocean.spaces", AccessKey: "AK_NEW", CreatedAt: "2026-05-08T11:00:00Z"}, + }, nil + } + + fakeProv := &fakeProviderEnumerableDriver{ + outputs: []*interfaces.ResourceOutput{ + { + Name: "test-key", + Type: "infra.spaces_key", + ProviderID: "AK_OLD", + Outputs: map[string]any{ + "access_key": "AK_OLD", + "created_at": "2026-05-01T00:00:00Z", + "name": "test-key", + }, + }, + { + Name: "test-key", + Type: "infra.spaces_key", + ProviderID: "AK_NEW", + Outputs: map[string]any{ + "access_key": "AK_NEW", + "created_at": "2026-05-08T11:00:00Z", + "name": "test-key", + }, + }, + }, + } + + var out bytes.Buffer + code := runInfraRotateAndPrune([]string{ + "--type", "infra.spaces_key", + "--name", "test-key", + "--config", cfgPath, + "--confirm", "--non-interactive", + }, fakeProv, &out) + if code != 0 { + t.Fatalf("rotate-and-prune failed: code=%d, out=%s", code, out.String()) + } + + // Recovery file is deleted on full success — the only durable evidence + // left behind is the new credential itself in the secrets store + the + // pruned-key state. + if _, err := os.Stat(filepath.Join(tmpDir, "last-rotation.json")); !os.IsNotExist(err) { + t.Errorf("recovery file should be deleted after successful prune; stat err = %v", err) + } + + // AK_OLD must be deleted (older than AK_NEW + matches Name); AK_NEW + // must be preserved (it's the freshly-rotated key). + if !rotateAndPruneContains(fakeProv.deleted, "AK_OLD") { + t.Errorf("AK_OLD must be deleted; deleted = %v", fakeProv.deleted) + } + if rotateAndPruneContains(fakeProv.deleted, "AK_NEW") { + t.Errorf("AK_NEW must be preserved (just rotated in); deleted = %v", fakeProv.deleted) + } +} + +// TestInfraRotateAndPrune_RecoveryFileWrittenWithCorrectPerms verifies the +// partial-failure semantics: when the prune step fails (here: simulated +// network error from DeleteResource), the recovery record at +// $WFCTL_STATE_DIR/last-rotation.json must: +// +// 1. Be retained (NOT deleted), so a follow-up `wfctl infra prune +// --recovery-from-last-rotation` invocation can complete the prune +// without re-rotating. +// 2. Have permissions 0600 (owner-readable only) — the file contains +// enough metadata (access_key + name) to reconstruct which key the +// prune-by-time filter would target. +// +// This test is the safety-net side of Task 21's design; it pins the +// invariant that a partial-failure rotation never silently loses the +// recovery information the operator needs to finish cleanup. +func TestInfraRotateAndPrune_RecoveryFileWrittenWithCorrectPerms(t *testing.T) { + t.Setenv("WFCTL_CONFIRM_PRUNE", "1") + tmpDir := t.TempDir() + t.Setenv("WFCTL_STATE_DIR", tmpDir) + cfgPath := writeMinimalRotationConfig(t, tmpDir) + + origBoot := bootstrapSecrets + defer func() { bootstrapSecrets = origBoot }() + bootstrapSecrets = func(_ context.Context, _ secrets.Provider, _ *SecretsConfig, _ map[string]bool, _ ...interfaces.ProviderCredentialRevoker) (map[string]string, []RotationResult, error) { + return map[string]string{}, []RotationResult{ + {Key: "test-key", Source: "digitalocean.spaces", AccessKey: "AK_NEW", CreatedAt: "2026-05-08T11:00:00Z"}, + }, nil + } + + fakeProv := &fakeProviderEnumerableDriver{ + deleteErr: errors.New("simulated network failure"), + outputs: []*interfaces.ResourceOutput{ + { + Name: "test-key", + Type: "infra.spaces_key", + ProviderID: "AK_OLD", + Outputs: map[string]any{ + "access_key": "AK_OLD", + "created_at": "2026-05-01T00:00:00Z", + "name": "test-key", + }, + }, + }, + } + + // Run; ignore exit code (we expect non-zero from the prune-failure path, + // but the contract under test is the recovery file). + _ = runInfraRotateAndPrune([]string{ + "--type", "infra.spaces_key", + "--name", "test-key", + "--config", cfgPath, + "--confirm", "--non-interactive", + }, fakeProv, io.Discard) + + info, err := os.Stat(filepath.Join(tmpDir, "last-rotation.json")) + if err != nil { + t.Fatalf("recovery file should be retained on failure; stat err = %v", err) + } + if perm := info.Mode().Perm(); perm != 0600 { + t.Errorf("recovery file perms must be 0600 (owner-only); got %o", perm) + } +} diff --git a/docs/WFCTL.md b/docs/WFCTL.md index 92f81c32..74575672 100644 --- a/docs/WFCTL.md +++ b/docs/WFCTL.md @@ -1220,6 +1220,10 @@ wfctl infra [options] [config.yaml] | `outputs` | Print resource outputs from state (yaml/json/env formats) | | `refresh-outputs` | Read live outputs from each provider and reconcile state (no cloud writes) | | `cleanup` | Tag-based force-cleanup across providers that implement `interfaces.Enumerator` | +| `audit-secrets` | Report `provider_credential` anti-patterns in `secrets.generate` | +| `audit-keys` | List cloud-side resources of `--type` via the provider's `interfaces.EnumeratorAll` | +| `prune` | Destructively delete cloud resources by `--created-before` / `--exclude-access-key` (two-key opt-in) | +| `rotate-and-prune` | All-in-one: rotate the canonical credential, then prune older keys with the new key as exclusion target | | Flag | Default | Description | |------|---------|-------------| @@ -1501,6 +1505,115 @@ wfctl infra plan -c infra.yaml --env staging -o plan.json wfctl infra align -c infra.yaml --env staging --plan plan.json --strict ``` +#### `infra audit-keys` + +List every cloud-side resource of `--type ` via the provider's +optional `interfaces.EnumeratorAll`. Renders Name, ProviderID +(access_key), and CreatedAt as a fixed-width table — the read-only +surface for drift correction before the destructive `infra prune`. + +``` +wfctl infra audit-keys --type [-c CONFIG] [--env ENV] +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--type` | _(required)_ | Resource type to enumerate (e.g. `infra.spaces_key`) | +| `--config`, `-c` | _(auto-detected)_ | Config file (searches `infra.yaml`, `config/infra.yaml`) | +| `--env` | `` | Environment name for config resolution | + +`audit-keys` requires the loaded `iac.provider` to implement the optional +`interfaces.EnumeratorAll` interface (per ADR 0016). Providers that don't +are surfaced as a structured error (`audit-keys: no loaded provider +implements EnumeratorAll`); pair with `audit-secrets` for the +config-side equivalent. + +```bash +wfctl infra audit-keys --type infra.spaces_key +wfctl infra audit-keys --type infra.spaces_key -c infra.yaml --env staging +``` + +#### `infra prune` + +Destructively delete cloud-side resources matching a time + access_key +discriminator. The command refuses to run unless **all three** opt-ins +are satisfied (`--confirm` flag + `WFCTL_CONFIRM_PRUNE=1` env var + +interactive y/N prompt unless `--non-interactive`), AND the +`--exclude-access-key` flag names the active credential to preserve +(paranoia rail — prevents a typo from nuking the live key). + +``` +wfctl infra prune --type --created-before --exclude-access-key --confirm [--non-interactive] [--preserve-names ] [--recovery-from-last-rotation] +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--type` | _(required)_ | Resource type (e.g. `infra.spaces_key`) | +| `--created-before` | _(required)_ | RFC3339 timestamp; only resources older than this are eligible | +| `--exclude-access-key` | _(required)_ | Access key to preserve (paranoia rail) | +| `--preserve-names` | `` | Regex of resource names to preserve (skip during delete; orthogonal to time filter) | +| `--confirm` | `false` | Required: explicit confirmation flag (paired with `WFCTL_CONFIRM_PRUNE=1` env var) | +| `--non-interactive` | `false` | Skip the y/N prompt (CI-friendly) | +| `--recovery-from-last-rotation` | `false` | Read filter args from `${WFCTL_STATE_DIR:-$HOME/.wfctl}/last-rotation.json` (written by `infra rotate-and-prune` for recovery from partial-failure rotations without re-rotating) | + +Exit codes: + +- `0`: prune succeeded (zero or more deletions; no failures) +- `1`: opt-in/filter validation failed, enumerate failed, or one or more deletes failed +- `2`: argument parse error or missing required `--type` + +On success when `--recovery-from-last-rotation` was used, the recovery +file is removed. On failure, it is retained so the operator can re-invoke +after diagnosing. + +```bash +WFCTL_CONFIRM_PRUNE=1 wfctl infra prune \ + --type infra.spaces_key \ + --created-before 2026-05-08T00:00:00Z \ + --exclude-access-key AK_ACTIVE \ + --confirm + +# Recovery flow after a partial-failure rotate-and-prune: +WFCTL_CONFIRM_PRUNE=1 wfctl infra prune \ + --type infra.spaces_key \ + --recovery-from-last-rotation \ + --confirm +``` + +#### `infra rotate-and-prune` + +All-in-one: rotate the canonical credential for `--name` (mints a new +key, revokes the old credential per ADR 0012), persist a recovery +record, then delegate to `infra prune` with the new access_key as the +exclusion target. On full success, the recovery file is removed (no +durable evidence beyond the new credential itself + pruned-key state). +On prune-step failure, the recovery file is retained at +`${WFCTL_STATE_DIR:-$HOME/.wfctl}/last-rotation.json` (perms `0600`) so +the operator can re-invoke `infra prune --recovery-from-last-rotation` +without re-rotating (which would leak yet another key). + +``` +wfctl infra rotate-and-prune --type --name --confirm [--non-interactive] [-c CONFIG] [--env ENV] [--preserve-names ] +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--type` | _(required)_ | Resource type (e.g. `infra.spaces_key`) | +| `--name` | _(required)_ | Canonical credential name to rotate (matches `secrets.generate[].name`) | +| `--config`, `-c` | _(auto-detected)_ | Config file | +| `--env` | `` | Environment name | +| `--preserve-names` | `` | Regex of resource names to preserve during the prune step (forwarded as `--preserve-names` to `infra prune`) | +| `--confirm` | `false` | Required: paired with `WFCTL_CONFIRM_PRUNE=1` env var | +| `--non-interactive` | `false` | Skip the y/N prompt (forwarded to the prune step) | + +```bash +WFCTL_CONFIRM_PRUNE=1 wfctl infra rotate-and-prune \ + --type infra.spaces_key \ + --name SPACES \ + --confirm \ + --non-interactive +``` + --- ### `docs generate`