diff --git a/cmd/wfctl/infra.go b/cmd/wfctl/infra.go index e4b70e5f..2d9de8f5 100644 --- a/cmd/wfctl/infra.go +++ b/cmd/wfctl/infra.go @@ -1273,6 +1273,8 @@ func runInfraApply(args []string) error { fs.BoolVar(&skipRefreshFlag, "skip-refresh", false, "Skip the WFCTL_REFRESH_OUTPUTS pre-step refresh even if the env var is set (does NOT cancel explicit --refresh-outputs)") var skipBootstrapFlag bool fs.BoolVar(&skipBootstrapFlag, "skip-bootstrap", false, "Skip auto-bootstrap before apply; use only when required secrets/state already exist") + var waitFlag bool + fs.BoolVar(&waitFlag, "wait", false, "Wait for deployable infra resources to become healthy before exiting") var allowReplaceFlag string fs.StringVar(&allowReplaceFlag, "allow-replace", "", "Comma-separated list of resource names whose protected: true status is overridden for this apply (replace/delete actions only)") @@ -1311,6 +1313,10 @@ func runInfraApply(args []string) error { applyAllowReplaceSet = parseAllowReplaceFlag(allowReplaceFlag) defer func() { applyAllowReplaceSet = nil }() + prevInfraApplyWait := currentInfraApplyWait + currentInfraApplyWait = waitFlag + defer func() { currentInfraApplyWait = prevInfraApplyWait }() + // Publish the --include flag value for the apply path's filter helpers // (including dry-run). Reset to "" at the top of every invocation so the // filter fails open (all-resources) on subsequent invocations that do not diff --git a/cmd/wfctl/infra_apply.go b/cmd/wfctl/infra_apply.go index b0ea009d..93992ed2 100644 --- a/cmd/wfctl/infra_apply.go +++ b/cmd/wfctl/infra_apply.go @@ -72,6 +72,11 @@ var applyV2ApplyPlanWithHooksFn = wfctlhelpers.ApplyPlanWithHooks // before dispatch. var applyAllowReplaceSet map[string]struct{} +// currentInfraApplyWait is the per-invocation --wait setting for +// `wfctl infra apply`. When set, apply waits for deployable infra resources to +// pass provider health checks before reporting success. +var currentInfraApplyWait bool + // parseAllowReplaceFlag turns a comma-separated --allow-replace= // flag value into a name-set. Empty input → nil (the canonical // "no override" value, indistinguishable from the flag never being @@ -428,6 +433,9 @@ func applyWithProviderAndStore(ctx context.Context, provider interfaces.IaCProvi } if len(plan.Actions) == 0 { fmt.Println(" No changes — infrastructure is up-to-date.") + if currentInfraApplyWait { + return waitForInfraHealth(ctx, provider, specs, current, nil, envName) + } return nil } @@ -517,9 +525,81 @@ func applyWithProviderAndStore(ctx context.Context, provider interfaces.IaCProvi return finalErr } } + if currentInfraApplyWait { + if err := waitForInfraHealth(ctx, provider, specs, current, result, envName); err != nil { + return err + } + } return nil } +type infraHealthTarget struct { + name string + ref interfaces.ResourceRef +} + +func waitForInfraHealth(ctx context.Context, provider interfaces.IaCProvider, specs []interfaces.ResourceSpec, current []interfaces.ResourceState, result *interfaces.ApplyResult, envName string) error { + targets, err := infraHealthTargets(specs, current, result) + if err != nil { + return err + } + for _, target := range targets { + driver, err := provider.ResourceDriver(target.ref.Type) + if err != nil { + return fmt.Errorf("health check %s/%s: resolve resource driver: %w", target.ref.Type, target.ref.Name, err) + } + if err := pollUntilHealthy(ctx, driver, target.ref, target.name, envName); err != nil { + return err + } + } + return nil +} + +func infraHealthTargets(specs []interfaces.ResourceSpec, current []interfaces.ResourceState, result *interfaces.ApplyResult) ([]infraHealthTarget, error) { + currentByName := make(map[string]interfaces.ResourceState, len(current)) + for i := range current { + state := current[i] + currentByName[state.Name] = state + } + appliedByName := map[string]interfaces.ResourceOutput{} + if result != nil { + for _, out := range result.Resources { + appliedByName[out.Name] = out + } + } + + targets := make([]infraHealthTarget, 0) + seen := map[string]struct{}{} + for _, spec := range specs { + if !shouldWaitForInfraHealth(spec) { + continue + } + if _, ok := seen[spec.Name]; ok { + continue + } + seen[spec.Name] = struct{}{} + + ref := interfaces.ResourceRef{Name: spec.Name, Type: spec.Type} + if out, ok := appliedByName[spec.Name]; ok { + ref.ProviderID = out.ProviderID + } + if ref.ProviderID == "" { + if state, ok := currentByName[spec.Name]; ok { + ref.ProviderID = state.ProviderID + } + } + if ref.ProviderID == "" { + return nil, fmt.Errorf("health check %s/%s: no ProviderID available after apply", spec.Type, spec.Name) + } + targets = append(targets, infraHealthTarget{name: spec.Name, ref: ref}) + } + return targets, nil +} + +func shouldWaitForInfraHealth(spec interfaces.ResourceSpec) bool { + return spec.Type == "infra.container_service" +} + func statePersistenceHooks( store infraStateStore, secretsProvider secrets.Provider, @@ -1577,5 +1657,22 @@ func applyPrecomputedPlanWithStore(ctx context.Context, plan interfaces.IaCPlan, return finalErr } } + if currentInfraApplyWait { + if err := waitForInfraHealth(ctx, provider, specsFromPlanActions(plan.Actions), nil, result, envName); err != nil { + return err + } + } return nil } + +func specsFromPlanActions(actions []interfaces.PlanAction) []interfaces.ResourceSpec { + specs := make([]interfaces.ResourceSpec, 0, len(actions)) + for i := range actions { + action := actions[i] + if action.Action == "delete" { + continue + } + specs = append(specs, action.Resource) + } + return specs +} diff --git a/cmd/wfctl/infra_apply_test.go b/cmd/wfctl/infra_apply_test.go index a50c0ef2..17286fc4 100644 --- a/cmd/wfctl/infra_apply_test.go +++ b/cmd/wfctl/infra_apply_test.go @@ -11,6 +11,7 @@ import ( "strings" "sync" "testing" + "time" "github.com/GoCodeAlone/workflow/iac/sensitive" "github.com/GoCodeAlone/workflow/iac/wfctlhelpers" @@ -325,6 +326,106 @@ func (p *noDriverApplyProvider) ResourceDriver(resourceType string) (interfaces. return nil, fmt.Errorf("no driver for %s", resourceType) } +type waitHealthDriver struct { + readDriver + results []interfaces.HealthResult + fallback *interfaces.HealthResult + refs []interfaces.ResourceRef +} + +func (d *waitHealthDriver) HealthCheck(_ context.Context, ref interfaces.ResourceRef) (*interfaces.HealthResult, error) { + d.refs = append(d.refs, ref) + if len(d.results) == 0 { + if d.fallback != nil { + return d.fallback, nil + } + return &interfaces.HealthResult{Healthy: true}, nil + } + next := d.results[0] + d.results = d.results[1:] + return &next, nil +} + +func TestWaitForInfraHealth_UsesAppliedContainerProviderID(t *testing.T) { + driver := &waitHealthDriver{} + provider := &readBackedProvider{driver: driver} + specs := []interfaces.ResourceSpec{ + {Name: "web", Type: "infra.container_service"}, + } + result := &interfaces.ApplyResult{ + Resources: []interfaces.ResourceOutput{ + {Name: "web", Type: "infra.container_service", ProviderID: "app-new"}, + }, + } + + if err := waitForInfraHealth(context.Background(), provider, specs, nil, result, "prod"); err != nil { + t.Fatalf("waitForInfraHealth: %v", err) + } + if len(driver.refs) != 1 { + t.Fatalf("HealthCheck calls = %d, want 1", len(driver.refs)) + } + if got := driver.refs[0].ProviderID; got != "app-new" { + t.Fatalf("HealthCheck ProviderID = %q, want app-new", got) + } +} + +func TestWaitForInfraHealth_NoChangesStillChecksCurrentContainer(t *testing.T) { + driver := &waitHealthDriver{} + provider := &readBackedProvider{driver: driver} + specs := []interfaces.ResourceSpec{ + {Name: "web", Type: "infra.container_service"}, + } + current := []interfaces.ResourceState{ + {Name: "web", Type: "infra.container_service", ProviderID: "app-current"}, + } + + if err := waitForInfraHealth(context.Background(), provider, specs, current, nil, "prod"); err != nil { + t.Fatalf("waitForInfraHealth: %v", err) + } + if len(driver.refs) != 1 { + t.Fatalf("HealthCheck calls = %d, want 1", len(driver.refs)) + } + if got := driver.refs[0].ProviderID; got != "app-current" { + t.Fatalf("HealthCheck ProviderID = %q, want app-current", got) + } +} + +func TestWaitForInfraHealth_UnhealthyContainerFailsApply(t *testing.T) { + origTimeout := healthPollDefaultTimeout + origInitial := healthPollInitialInterval + origBackoff := healthPollBackoffInterval + origProgress := healthPollProgressInterval + healthPollDefaultTimeout = time.Millisecond + healthPollInitialInterval = time.Millisecond + healthPollBackoffInterval = time.Millisecond + healthPollProgressInterval = time.Hour + t.Cleanup(func() { + healthPollDefaultTimeout = origTimeout + healthPollInitialInterval = origInitial + healthPollBackoffInterval = origBackoff + healthPollProgressInterval = origProgress + }) + + driver := &waitHealthDriver{ + fallback: &interfaces.HealthResult{Healthy: false, Message: "no deployment found"}, + } + provider := &readBackedProvider{driver: driver} + specs := []interfaces.ResourceSpec{ + {Name: "web", Type: "infra.container_service"}, + } + current := []interfaces.ResourceState{ + {Name: "web", Type: "infra.container_service", ProviderID: "app-current"}, + } + + err := waitForInfraHealth(context.Background(), provider, specs, current, nil, "prod") + if err == nil { + t.Fatal("expected waitForInfraHealth to fail for unhealthy container") + } + if !strings.Contains(err.Error(), "no deployment found") { + t.Fatalf("error = %q, want no deployment found", err.Error()) + } +} + // ── TestApplyInfraModules_DirectPath ─────────────────────────────────────────── // TestApplyInfraModules_DirectPath verifies that applyInfraModules: