diff --git a/cmd/ci-operator/main.go b/cmd/ci-operator/main.go index a688c01e..fcdb0a57 100644 --- a/cmd/ci-operator/main.go +++ b/cmd/ci-operator/main.go @@ -191,6 +191,7 @@ type options struct { extraInputHash stringSlice idleCleanupDuration time.Duration cleanupDuration time.Duration + graceDuration time.Duration inputHash string secrets []*coreapi.Secret @@ -207,6 +208,7 @@ func bindOptions(flag *flag.FlagSet) *options { opt := &options{ idleCleanupDuration: time.Duration(1 * time.Hour), cleanupDuration: time.Duration(12 * time.Hour), + graceDuration: time.Duration(coreapi.DefaultTerminationGracePeriodSeconds * time.Second), } // command specific options @@ -229,6 +231,7 @@ func bindOptions(flag *flag.FlagSet) *options { flag.StringVar(&opt.baseNamespace, "base-namespace", "stable", "Namespace to read builds from, defaults to stable.") flag.DurationVar(&opt.idleCleanupDuration, "delete-when-idle", opt.idleCleanupDuration, "If no pod is running for longer than this interval, delete the namespace. Set to zero to retain the contents. Requires the namespace TTL controller to be deployed.") flag.DurationVar(&opt.cleanupDuration, "delete-after", opt.cleanupDuration, "If namespace exists for longer than this interval, delete the namespace. Set to zero to retain the contents. Requires the namespace TTL controller to be deployed.") + flag.DurationVar(&opt.graceDuration, "grace-period", opt.graceDuration, "Interval to wait after requesting pod termination before killing the pod. ci-operator itself will wait an additional 30 seconds to try and receive the successful deletion response.") // actions to add to the graph flag.BoolVar(&opt.promote, "promote", false, "When all other targets complete, publish the set of images built by this job into the release configuration.") @@ -296,6 +299,7 @@ func (o *options) Complete() error { jobSpec.Refs = spec.Refs } jobSpec.BaseNamespace = o.baseNamespace + jobSpec.GracePeriod = &o.graceDuration o.jobSpec = jobSpec if o.dry && o.verbose { @@ -392,9 +396,10 @@ func (o *options) Run() error { if o.dry { os.Exit(0) } - log.Printf("error: Process interrupted with signal %s, exiting in 2s ...", s) + duration := o.graceDuration + 30*time.Second + log.Printf("error: Process interrupted with signal %s, exiting in %v ...", s, duration) cancel() - time.Sleep(2 * time.Second) + time.Sleep(duration) os.Exit(1) } diff --git a/pkg/api/job_spec.go b/pkg/api/job_spec.go index dafee768..c67d8a23 100644 --- a/pkg/api/job_spec.go +++ b/pkg/api/job_spec.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "strings" + "time" meta "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -28,6 +29,8 @@ type JobSpec struct { Namespace string BaseNamespace string + GracePeriod *time.Duration + // if set, any new artifacts will be a child of this object owner *meta.OwnerReference } diff --git a/pkg/steps/artifacts.go b/pkg/steps/artifacts.go index 2a2cc6dd..15e66e64 100644 --- a/pkg/steps/artifacts.go +++ b/pkg/steps/artifacts.go @@ -384,7 +384,7 @@ func (w *ArtifactWorker) downloadArtifacts(podName string, hasArtifacts bool) er } defer func() { - // signal to artifacts container to gracefully shut don + // signal to artifacts container to gracefully shut down err := removeFile(w.podClient, w.namespace, podName, "artifacts", []string{"/tmp/done"}) if err == nil { return diff --git a/pkg/steps/pod.go b/pkg/steps/pod.go index f7252f78..e98f5150 100644 --- a/pkg/steps/pod.go +++ b/pkg/steps/pod.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "log" + "math" "path/filepath" coreapi "k8s.io/api/core/v1" @@ -93,7 +94,12 @@ func (s *podStep) Run(ctx context.Context, dry bool) error { <-ctx.Done() notifier.Cancel() log.Printf("cleanup: Deleting %s pod %s", s.name, s.config.As) - if err := s.podClient.Pods(s.jobSpec.Namespace).Delete(s.config.As, nil); err != nil && !errors.IsNotFound(err) { + opts := &meta.DeleteOptions{} + if s.jobSpec.GracePeriod != nil { + sec := int64(math.Ceil(s.jobSpec.GracePeriod.Seconds())) + opts.GracePeriodSeconds = &sec + } + if err := s.podClient.Pods(s.jobSpec.Namespace).Delete(s.config.As, opts); err != nil && !errors.IsNotFound(err) { log.Printf("error: Could not delete %s pod: %v", s.name, err) } }()