Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions internal/controllers/module_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,13 @@ func (mrh *moduleReconcilerHelper) moduleUpdateWorkerPodsStatus(ctx context.Cont
mod.Status.ModuleLoader.DesiredNumber = int32(len(nmcs))
mod.Status.ModuleLoader.AvailableNumber = int32(numAvailable)

micObj, err := mrh.micAPI.Get(ctx, mod.Name, mod.Namespace)
if err != nil {
logger.Info("Could not get MIC to update ImageRebuildTriggerGeneration status, skipping", "error", err)
} else {
mod.Status.ImageRebuildTriggerGeneration = micObj.Status.ImageRebuildTriggerGeneration
}
Comment on lines +550 to +555
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Handle MIC read failures explicitly to avoid stale Module status.

At Line [551], all MIC Get errors are ignored, so mod.Status.ImageRebuildTriggerGeneration can remain stale and diverge from MIC indefinitely. Prefer: clear on NotFound, but return error for transient failures so reconcile retries.

Proposed fix
 import (
 	"context"
 	"errors"
 	"fmt"
 	"reflect"
 	"strings"
@@
 	v1 "k8s.io/api/core/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@
 	micObj, err := mrh.micAPI.Get(ctx, mod.Name, mod.Namespace)
 	if err != nil {
-		logger.Info("Could not get MIC to update ImageRebuildTriggerGeneration status, skipping", "error", err)
+		if apierrors.IsNotFound(err) {
+			mod.Status.ImageRebuildTriggerGeneration = nil
+		} else {
+			return fmt.Errorf("failed to get MIC %s/%s: %w", mod.Namespace, mod.Name, err)
+		}
 	} else {
 		mod.Status.ImageRebuildTriggerGeneration = micObj.Status.ImageRebuildTriggerGeneration
 	}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
micObj, err := mrh.micAPI.Get(ctx, mod.Name, mod.Namespace)
if err != nil {
logger.Info("Could not get MIC to update ImageRebuildTriggerGeneration status, skipping", "error", err)
} else {
mod.Status.ImageRebuildTriggerGeneration = micObj.Status.ImageRebuildTriggerGeneration
}
import (
"context"
"errors"
"fmt"
"reflect"
"strings"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
micObj, err := mrh.micAPI.Get(ctx, mod.Name, mod.Namespace)
if err != nil {
if apierrors.IsNotFound(err) {
mod.Status.ImageRebuildTriggerGeneration = nil
} else {
return fmt.Errorf("failed to get MIC %s/%s: %w", mod.Namespace, mod.Name, err)
}
} else {
mod.Status.ImageRebuildTriggerGeneration = micObj.Status.ImageRebuildTriggerGeneration
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/controllers/module_reconciler.go` around lines 550 - 555, The
current branch ignores all errors from mrh.micAPI.Get so
mod.Status.ImageRebuildTriggerGeneration can stay stale; change the logic in the
reconcile where mrh.micAPI.Get is called so that: if Get returns a NotFound (use
the Kubernetes apierrors.IsNotFound check) clear/zero
mod.Status.ImageRebuildTriggerGeneration, if Get returns any other error return
that error so the reconcile will be retried, and only when Get succeeds copy
micObj.Status.ImageRebuildTriggerGeneration into
mod.Status.ImageRebuildTriggerGeneration; update any log messages (logger.Info)
to reflect the NotFound vs transient error handling.

Comment on lines +550 to +555
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we doing it inside of moduleUpdateWorkerPodsStatus? This is not related to the worker pods.

I would create a new helper method called moduleUpdatePullerPodsStatus or something similar.

I know this is what we did u/s and I am OK merging this PR as is but I think we should fix it u/s and then pull this change m/s as well.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, I think also we should rename this function since its not updating worker pod status, maybe something like updateModuleLoaderStatus.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I will comment on the u/s PR.


return mrh.client.Status().Patch(ctx, mod, client.MergeFrom(unmodifiedMod))
}

Expand Down
72 changes: 71 additions & 1 deletion internal/controllers/module_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,7 @@ var _ = Describe("moduleUpdateWorkerPodsStatus", func() {
mrh *moduleReconcilerHelper
helper *nmc.MockHelper
statusWriter *client.MockStatusWriter
mockMicAPI *mic.MockMIC
)

BeforeEach(func() {
Expand All @@ -1036,14 +1037,15 @@ var _ = Describe("moduleUpdateWorkerPodsStatus", func() {
clnt = client.NewMockClient(ctrl)
helper = nmc.NewMockHelper(ctrl)
statusWriter = client.NewMockStatusWriter(ctrl)
mockMicAPI = mic.NewMockMIC(ctrl)
mod = kmmv1beta1.Module{
ObjectMeta: metav1.ObjectMeta{
Name: "modName",
Namespace: "modNamespace",
},
}
mockNetworkPolicyAPI := networkpolicy.NewMockNetworkPolicy(ctrl)
mrh = &moduleReconcilerHelper{client: clnt, nmcHelper: helper, networkPolicyAPI: mockNetworkPolicyAPI}
mrh = &moduleReconcilerHelper{client: clnt, nmcHelper: helper, networkPolicyAPI: mockNetworkPolicyAPI, micAPI: mockMicAPI}
})

It("faled to get configured NMCs", func() {
Expand All @@ -1069,6 +1071,7 @@ var _ = Describe("moduleUpdateWorkerPodsStatus", func() {
},
),
helper.EXPECT().GetModuleSpecEntry(&nmc1, mod.Namespace, mod.Name).Return(nil, 0),
mockMicAPI.EXPECT().Get(ctx, mod.Name, mod.Namespace).Return(&kmmv1beta1.ModuleImagesConfig{}, nil),
clnt.EXPECT().Status().Return(statusWriter),
statusWriter.EXPECT().Patch(ctx, expectedMod, gomock.Any()),
)
Expand Down Expand Up @@ -1118,6 +1121,7 @@ var _ = Describe("moduleUpdateWorkerPodsStatus", func() {
} else {
helper.EXPECT().GetModuleStatusEntry(&nmc1, mod.Namespace, mod.Name).Return(nil)
}
mockMicAPI.EXPECT().Get(ctx, mod.Name, mod.Namespace).Return(&kmmv1beta1.ModuleImagesConfig{}, nil)
clnt.EXPECT().Status().Return(statusWriter)
statusWriter.EXPECT().Patch(ctx, expectedMod, gomock.Any())

Expand Down Expand Up @@ -1155,12 +1159,78 @@ var _ = Describe("moduleUpdateWorkerPodsStatus", func() {
),
helper.EXPECT().GetModuleSpecEntry(&nmc1, mod.Namespace, mod.Name).Return(&nmcModuleSpec, 0),
helper.EXPECT().GetModuleStatusEntry(&nmc1, mod.Namespace, mod.Name).Return(&nmcModuleStatus),
mockMicAPI.EXPECT().Get(ctx, mod.Name, mod.Namespace).Return(&kmmv1beta1.ModuleImagesConfig{}, nil),
clnt.EXPECT().Status().Return(statusWriter),
statusWriter.EXPECT().Patch(ctx, expectedMod, gomock.Any()),
)

err := mrh.moduleUpdateWorkerPodsStatus(ctx, &mod, targetedNodes)
Expect(err).NotTo(HaveOccurred())
})

It("should propagate MIC status.ImageRebuildTriggerGeneration to Module status", func() {
micTriggerValue := 3
micObj := &kmmv1beta1.ModuleImagesConfig{
Status: kmmv1beta1.ModuleImagesConfigStatus{
ImageRebuildTriggerGeneration: &micTriggerValue,
},
}

nmc1 := kmmv1beta1.NodeModulesConfig{
ObjectMeta: metav1.ObjectMeta{Name: "nmc1"},
}
targetedNodes := []v1.Node{{}, {}}
expectedMod := mod.DeepCopy()
expectedMod.Status.ModuleLoader.NodesMatchingSelectorNumber = int32(2)
expectedMod.Status.ModuleLoader.DesiredNumber = int32(1)
expectedMod.Status.ModuleLoader.AvailableNumber = int32(0)
expectedMod.Status.ImageRebuildTriggerGeneration = &micTriggerValue

gomock.InOrder(
clnt.EXPECT().List(ctx, gomock.Any(), gomock.Any()).DoAndReturn(
func(_ interface{}, list *kmmv1beta1.NodeModulesConfigList, _ ...interface{}) error {
list.Items = []kmmv1beta1.NodeModulesConfig{nmc1}
return nil
},
),
helper.EXPECT().GetModuleSpecEntry(&nmc1, mod.Namespace, mod.Name).Return(nil, 0),
mockMicAPI.EXPECT().Get(ctx, mod.Name, mod.Namespace).Return(micObj, nil),
clnt.EXPECT().Status().Return(statusWriter),
statusWriter.EXPECT().Patch(ctx, expectedMod, gomock.Any()),
)

err := mrh.moduleUpdateWorkerPodsStatus(ctx, &mod, targetedNodes)
Expect(err).NotTo(HaveOccurred())
Expect(mod.Status.ImageRebuildTriggerGeneration).NotTo(BeNil())
Expect(*mod.Status.ImageRebuildTriggerGeneration).To(Equal(micTriggerValue))
})

It("should not fail if MIC Get returns an error", func() {
nmc1 := kmmv1beta1.NodeModulesConfig{
ObjectMeta: metav1.ObjectMeta{Name: "nmc1"},
}
targetedNodes := []v1.Node{{}}
expectedMod := mod.DeepCopy()
expectedMod.Status.ModuleLoader.NodesMatchingSelectorNumber = int32(1)
expectedMod.Status.ModuleLoader.DesiredNumber = int32(1)
expectedMod.Status.ModuleLoader.AvailableNumber = int32(0)

gomock.InOrder(
clnt.EXPECT().List(ctx, gomock.Any(), gomock.Any()).DoAndReturn(
func(_ interface{}, list *kmmv1beta1.NodeModulesConfigList, _ ...interface{}) error {
list.Items = []kmmv1beta1.NodeModulesConfig{nmc1}
return nil
},
),
helper.EXPECT().GetModuleSpecEntry(&nmc1, mod.Namespace, mod.Name).Return(nil, 0),
mockMicAPI.EXPECT().Get(ctx, mod.Name, mod.Namespace).Return(nil, fmt.Errorf("mic not found")),
clnt.EXPECT().Status().Return(statusWriter),
statusWriter.EXPECT().Patch(ctx, expectedMod, gomock.Any()),
)

err := mrh.moduleUpdateWorkerPodsStatus(ctx, &mod, targetedNodes)
Expect(err).NotTo(HaveOccurred())
Expect(mod.Status.ImageRebuildTriggerGeneration).To(BeNil())
})

})
Expand Down
Loading