Skip to content

Commit c20b75e

Browse files
committed
stopping update run implementation
Signed-off-by: Britania Rodriguez Reyes <britaniar@microsoft.com>
1 parent 0552f2e commit c20b75e

File tree

6 files changed

+850
-27
lines changed

6 files changed

+850
-27
lines changed

pkg/controllers/updaterun/controller_integration_test.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,26 @@ func generateWaitingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *
312312
}
313313
}
314314

315+
func generateStoppingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
316+
return &prometheusclientmodel.Metric{
317+
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing),
318+
string(metav1.ConditionFalse), condition.UpdateRunStoppingReason),
319+
Gauge: &prometheusclientmodel.Gauge{
320+
Value: ptr.To(float64(time.Now().UnixNano()) / 1e9),
321+
},
322+
}
323+
}
324+
325+
func generateStoppedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
326+
return &prometheusclientmodel.Metric{
327+
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing),
328+
string(metav1.ConditionFalse), condition.UpdateRunStoppedReason),
329+
Gauge: &prometheusclientmodel.Gauge{
330+
Value: ptr.To(float64(time.Now().UnixNano()) / 1e9),
331+
},
332+
}
333+
}
334+
315335
func generateStuckMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
316336
return &prometheusclientmodel.Metric{
317337
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing),
@@ -823,3 +843,9 @@ func generateFalseProgressingCondition(obj client.Object, condType any, reason s
823843
falseCond.Reason = reason
824844
return falseCond
825845
}
846+
847+
func generateFalseConditionWithReason(obj client.Object, condType any, reason string) metav1.Condition {
848+
falseCond := generateFalseCondition(obj, condType)
849+
falseCond.Reason = reason
850+
return falseCond
851+
}

pkg/controllers/updaterun/execution.go

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package updaterun
1818

1919
import (
2020
"context"
21-
"errors"
2221
"fmt"
2322
"reflect"
2423
"strconv"
@@ -67,16 +66,7 @@ func (r *Reconciler) execute(
6766
var updatingStageStatus *placementv1beta1.StageUpdatingStatus
6867

6968
// Set up defer function to handle errStagedUpdatedAborted.
70-
defer func() {
71-
if errors.Is(err, errStagedUpdatedAborted) {
72-
if updatingStageStatus != nil {
73-
markStageUpdatingFailed(updatingStageStatus, updateRun.GetGeneration(), err.Error())
74-
} else {
75-
// Handle deletion stage case.
76-
markStageUpdatingFailed(updateRunStatus.DeletionStageStatus, updateRun.GetGeneration(), err.Error())
77-
}
78-
}
79-
}()
69+
defer checkIfErrorStagedUpdateAborted(err, updateRun, updatingStageStatus)
8070

8171
// Mark updateRun as progressing if it's not already marked as waiting or stuck.
8272
// This avoids triggering an unnecessary in-memory transition from stuck (waiting) -> progressing -> stuck (waiting),
@@ -166,13 +156,13 @@ func (r *Reconciler) executeUpdatingStage(
166156
for i := 0; i < len(updatingStageStatus.Clusters) && clusterUpdatingCount < maxConcurrency; i++ {
167157
clusterStatus := &updatingStageStatus.Clusters[i]
168158
clusterUpdateSucceededCond := meta.FindStatusCondition(clusterStatus.Conditions, string(placementv1beta1.ClusterUpdatingConditionSucceeded))
169-
if condition.IsConditionStatusTrue(clusterUpdateSucceededCond, updateRun.GetGeneration()) {
159+
if clusterUpdateSucceededCond != nil && clusterUpdateSucceededCond.Status == metav1.ConditionTrue {
170160
// The cluster has been updated successfully.
171161
finishedClusterCount++
172162
continue
173163
}
174164
clusterUpdatingCount++
175-
if condition.IsConditionStatusFalse(clusterUpdateSucceededCond, updateRun.GetGeneration()) {
165+
if clusterUpdateSucceededCond != nil && clusterUpdateSucceededCond.Status == metav1.ConditionFalse {
176166
// The cluster is marked as failed to update, this cluster is counted as updating cluster since it's not finished to avoid processing more clusters than maxConcurrency in this round.
177167
failedErr := fmt.Errorf("the cluster `%s` in the stage %s has failed", clusterStatus.ClusterName, updatingStageStatus.StageName)
178168
klog.ErrorS(failedErr, "The cluster has failed to be updated", "updateRun", updateRunRef)
@@ -232,7 +222,8 @@ func (r *Reconciler) executeUpdatingStage(
232222
}
233223
}
234224
markClusterUpdatingStarted(clusterStatus, updateRun.GetGeneration())
235-
if finishedClusterCount == 0 {
225+
stageUpdatingProgressCond := meta.FindStatusCondition(updatingStageStatus.Conditions, string(placementv1beta1.StageUpdatingConditionProgressing))
226+
if finishedClusterCount == 0 || (stageUpdatingProgressCond != nil && stageUpdatingProgressCond.Status != metav1.ConditionTrue) {
236227
markStageUpdatingStarted(updatingStageStatus, updateRun.GetGeneration())
237228
}
238229
// Need to continue as we need to process at most maxConcurrency number of clusters in parallel.
@@ -444,8 +435,8 @@ func (r *Reconciler) handleStageApprovalTask(
444435
) (bool, error) {
445436
updateRunRef := klog.KObj(updateRun)
446437

447-
stageTaskApproved := condition.IsConditionStatusTrue(meta.FindStatusCondition(stageTaskStatus.Conditions, string(placementv1beta1.StageTaskConditionApprovalRequestApproved)), updateRun.GetGeneration())
448-
if stageTaskApproved {
438+
stageTaskApprovedCond := meta.FindStatusCondition(stageTaskStatus.Conditions, string(placementv1beta1.StageTaskConditionApprovalRequestApproved))
439+
if stageTaskApprovedCond != nil && stageTaskApprovedCond.Status == metav1.ConditionTrue {
449440
// The stageTask has been approved.
450441
return true, nil
451442
}
@@ -456,7 +447,10 @@ func (r *Reconciler) handleStageApprovalTask(
456447
if err := r.Client.Create(ctx, approvalRequest); err != nil {
457448
if apierrors.IsAlreadyExists(err) {
458449
// The approval task already exists.
459-
markStageTaskRequestCreated(stageTaskStatus, updateRun.GetGeneration())
450+
approvalRequestCreatedCond := meta.FindStatusCondition(stageTaskStatus.Conditions, string(placementv1beta1.StageTaskConditionApprovalRequestCreated))
451+
if approvalRequestCreatedCond == nil {
452+
markStageTaskRequestCreated(stageTaskStatus, updateRun.GetGeneration())
453+
}
460454
if err = r.Client.Get(ctx, client.ObjectKeyFromObject(approvalRequest), approvalRequest); err != nil {
461455
klog.ErrorS(err, "Failed to get the already existing approval request", "approvalRequest", requestRef, "stage", updatingStage.Name, "updateRun", updateRunRef)
462456
return false, controller.NewAPIServerError(true, err)
@@ -468,8 +462,10 @@ func (r *Reconciler) handleStageApprovalTask(
468462
return false, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error())
469463
}
470464
approvalRequestStatus := approvalRequest.GetApprovalRequestStatus()
471-
approvalAccepted := condition.IsConditionStatusTrue(meta.FindStatusCondition(approvalRequestStatus.Conditions, string(placementv1beta1.ApprovalRequestConditionApprovalAccepted)), approvalRequest.GetGeneration())
472-
approved := condition.IsConditionStatusTrue(meta.FindStatusCondition(approvalRequestStatus.Conditions, string(placementv1beta1.ApprovalRequestConditionApproved)), approvalRequest.GetGeneration())
465+
approvalAcceptedCond := meta.FindStatusCondition(approvalRequestStatus.Conditions, string(placementv1beta1.ApprovalRequestConditionApprovalAccepted))
466+
approvalAccepted := approvalAcceptedCond != nil && approvalAcceptedCond.Status == metav1.ConditionTrue
467+
approvedCond := meta.FindStatusCondition(approvalRequestStatus.Conditions, string(placementv1beta1.ApprovalRequestConditionApproved))
468+
approved := approvedCond != nil && approvedCond.Status == metav1.ConditionTrue
473469
if !approvalAccepted && !approved {
474470
klog.V(2).InfoS("The approval request has not been approved yet", "approvalRequestTask", requestRef, "stage", updatingStage.Name, "updateRun", updateRunRef)
475471
return false, nil
@@ -565,8 +561,13 @@ func aggregateUpdateRunStatus(updateRun placementv1beta1.UpdateRunObj, stageName
565561
if len(stuckClusterNames) > 0 {
566562
markUpdateRunStuck(updateRun, stageName, strings.Join(stuckClusterNames, ", "))
567563
} else {
568-
// If there is no stuck cluster but some progress has been made, mark the update run as progressing.
569-
markUpdateRunProgressing(updateRun)
564+
if updateRun.GetUpdateRunSpec().State == placementv1beta1.StateStop {
565+
// If the update run is in Stop state, mark it as stopping.
566+
markUpdateRunStopping(updateRun)
567+
} else {
568+
// If there is no stuck cluster but some progress has been made, mark the update run as progressing.
569+
markUpdateRunProgressing(updateRun)
570+
}
570571
}
571572
}
572573

0 commit comments

Comments
 (0)