diff --git a/api/core/v1alpha2/vmbdacondition/condition.go b/api/core/v1alpha2/vmbdacondition/condition.go index 19d30e18cd..a77a61aff6 100644 --- a/api/core/v1alpha2/vmbdacondition/condition.go +++ b/api/core/v1alpha2/vmbdacondition/condition.go @@ -65,6 +65,10 @@ const ( Conflict AttachedReason = "Conflict" // DeviceNotAvailableOnNode indicates that the block device's PersistentVolume is not available on the node where the virtual machine is running. DeviceNotAvailableOnNode AttachedReason = "DeviceNotAvailableOnNode" + // HotPlugPodNotScheduled indicates that the hotplug pod cannot be scheduled on any node. + HotPlugPodNotScheduled AttachedReason = "HotPlugPodNotScheduled" + // FailedAttachVolume indicates that the hotplug pod failed to attach a volume. + FailedAttachVolume AttachedReason = "FailedAttachVolume" // CapacityAvailable signifies that the capacity not reached and attaching available. CapacityAvailable DiskAttachmentCapacityAvailableReason = "CapacityAvailable" diff --git a/api/core/v1alpha2/vmcondition/condition.go b/api/core/v1alpha2/vmcondition/condition.go index 6abfecbe0c..786e5e57f1 100644 --- a/api/core/v1alpha2/vmcondition/condition.go +++ b/api/core/v1alpha2/vmcondition/condition.go @@ -169,6 +169,7 @@ const ( ReasonVirtualMachineRunning RunningReason = "Running" ReasonInternalVirtualMachineError RunningReason = "InternalVirtualMachineError" ReasonPodNotStarted RunningReason = "PodNotStarted" + ReasonPodVolumeErrors RunningReason = "PodVolumeErrors" ReasonPodTerminating RunningReason = "PodTerminating" ReasonPodNotFound RunningReason = "PodNotFound" ReasonPodConditionMissing RunningReason = "PodConditionMissing" diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/lifecycle.go b/images/virtualization-artifact/pkg/controller/vm/internal/lifecycle.go index 347068a476..b908fde213 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/lifecycle.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "log/slog" + "slices" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -54,6 +55,11 @@ type LifeCycleHandler struct { recorder eventrecord.EventRecorderLogger } +type podVolumeErrorEvent struct { + Reason string + Message string +} + func (h *LifeCycleHandler) Handle(ctx context.Context, s state.VirtualMachineState) (reconcile.Result, error) { if s.VirtualMachine().IsEmpty() { return reconcile.Result{}, nil @@ -103,6 +109,18 @@ func (h *LifeCycleHandler) Handle(ctx context.Context, s state.VirtualMachineSta } log := logger.FromContext(ctx).With(logger.SlogHandler(nameLifeCycleHandler)) + // While the pod is not running, the VMI does not set the node and the method returns nil, so it is necessary to check if there are any issues with the pod + if pod == nil { + cb := conditions.NewConditionBuilder(vmcondition.TypeRunning).Generation(changed.GetGeneration()) + + if volumeErr := h.checkPodVolumeErrors(ctx, changed, log); volumeErr != nil { + cb.Status(metav1.ConditionFalse). + Reason(vmcondition.ReasonPodVolumeErrors). + Message(fmt.Sprintf("Error attaching block devices to virtual machine: %s: %s", volumeErr.Reason, volumeErr.Message)) + conditions.SetCondition(cb, &changed.Status.Conditions) + return reconcile.Result{}, nil + } + } h.syncRunning(ctx, changed, kvvm, kvvmi, pod, log) return reconcile.Result{}, nil @@ -125,8 +143,8 @@ func (h *LifeCycleHandler) syncRunning(ctx context.Context, vm *v1alpha2.Virtual if volumeError := h.checkPodVolumeErrors(ctx, vm, log); volumeError != nil { cb.Status(metav1.ConditionFalse). - Reason(vmcondition.ReasonPodNotStarted). - Message(volumeError.Error()) + Reason(vmcondition.ReasonPodVolumeErrors). + Message(fmt.Sprintf("Error attaching block devices to virtual machine: %s: %s", volumeError.Reason, volumeError.Message)) conditions.SetCondition(cb, &vm.Status.Conditions) return } @@ -138,7 +156,7 @@ func (h *LifeCycleHandler) syncRunning(ctx context.Context, vm *v1alpha2.Virtual if podScheduled.Message != "" { cb.Status(metav1.ConditionFalse). Reason(vmcondition.ReasonPodNotStarted). - Message(fmt.Sprintf("%s: %s", podScheduled.Reason, podScheduled.Message)) + Message(fmt.Sprintf("Could not schedule the virtual machine: %s: %s", podScheduled.Reason, podScheduled.Message)) conditions.SetCondition(cb, &vm.Status.Conditions) } @@ -210,11 +228,12 @@ func (h *LifeCycleHandler) syncRunning(ctx context.Context, vm *v1alpha2.Virtual } else { vm.Status.Node = "" } + cb.Reason(vmcondition.ReasonVirtualMachineNotRunning).Status(metav1.ConditionFalse) conditions.SetCondition(cb, &vm.Status.Conditions) } -func (h *LifeCycleHandler) checkPodVolumeErrors(ctx context.Context, vm *v1alpha2.VirtualMachine, log *slog.Logger) error { +func (h *LifeCycleHandler) checkPodVolumeErrors(ctx context.Context, vm *v1alpha2.VirtualMachine, log *slog.Logger) *podVolumeErrorEvent { var podList corev1.PodList err := h.client.List(ctx, &podList, &client.ListOptions{ Namespace: vm.Namespace, @@ -237,6 +256,9 @@ func (h *LifeCycleHandler) checkPodVolumeErrors(ctx context.Context, vm *v1alpha } func isContainerCreating(pod *corev1.Pod) bool { + if pod == nil { + return false + } if pod.Status.Phase != corev1.PodPending { return false } @@ -248,7 +270,7 @@ func isContainerCreating(pod *corev1.Pod) bool { return false } -func (h *LifeCycleHandler) getPodVolumeError(ctx context.Context, pod *corev1.Pod, log *slog.Logger) error { +func (h *LifeCycleHandler) getPodVolumeError(ctx context.Context, pod *corev1.Pod, log *slog.Logger) *podVolumeErrorEvent { if !isContainerCreating(pod) { return nil } @@ -266,9 +288,17 @@ func (h *LifeCycleHandler) getPodVolumeError(ctx context.Context, pod *corev1.Po return nil } - for _, e := range eventList.Items { - if e.Type == corev1.EventTypeWarning && (e.Reason == watcher.ReasonFailedAttachVolume || e.Reason == watcher.ReasonFailedMount) { - return fmt.Errorf("%s: %s", e.Reason, e.Message) + if len(eventList.Items) == 0 { + return nil + } + + last := slices.MaxFunc(eventList.Items, func(a, b corev1.Event) int { + return a.LastTimestamp.Compare(b.LastTimestamp.Time) + }) + if last.Reason == watcher.ReasonFailedAttachVolume || last.Reason == watcher.ReasonFailedMount { + return &podVolumeErrorEvent{ + Reason: last.Reason, + Message: last.Message, } } diff --git a/images/virtualization-artifact/pkg/controller/vmbda/internal/life_cycle.go b/images/virtualization-artifact/pkg/controller/vmbda/internal/life_cycle.go index d12c94f99d..c0f5e29fc2 100644 --- a/images/virtualization-artifact/pkg/controller/vmbda/internal/life_cycle.go +++ b/images/virtualization-artifact/pkg/controller/vmbda/internal/life_cycle.go @@ -22,6 +22,7 @@ import ( "fmt" "time" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" virtv1 "kubevirt.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -29,6 +30,7 @@ import ( "github.com/deckhouse/virtualization-controller/pkg/controller/conditions" "github.com/deckhouse/virtualization-controller/pkg/controller/service" intsvc "github.com/deckhouse/virtualization-controller/pkg/controller/vmbda/internal/service" + "github.com/deckhouse/virtualization-controller/pkg/controller/vmbda/internal/watcher" "github.com/deckhouse/virtualization-controller/pkg/logger" "github.com/deckhouse/virtualization/api/core/v1alpha2" "github.com/deckhouse/virtualization/api/core/v1alpha2/vmbdacondition" @@ -199,6 +201,11 @@ func (h LifeCycleHandler) Handle(ctx context.Context, vmbda *v1alpha2.VirtualMac if err != nil { if errors.Is(err, intsvc.ErrVolumeStatusNotReady) { vmbda.Status.Phase = v1alpha2.BlockDeviceAttachmentPhaseInProgress + + if handled, podErr := h.handleHotPlugPodIssues(ctx, ad, kvvmi, vmbda, cb); podErr != nil || handled { + return reconcile.Result{}, podErr + } + cb. Status(metav1.ConditionFalse). Reason(vmbdacondition.AttachmentRequestSent). @@ -300,3 +307,58 @@ func (h LifeCycleHandler) Handle(ctx context.Context, vmbda *v1alpha2.VirtualMac return reconcile.Result{}, err } } + +func (h LifeCycleHandler) handleHotPlugPodIssues( + ctx context.Context, + ad *intsvc.AttachmentDisk, + kvvmi *virtv1.VirtualMachineInstance, + vmbda *v1alpha2.VirtualMachineBlockDeviceAttachment, + cb *conditions.ConditionBuilder, +) (bool, error) { + hotPlugPod, err := h.attacher.GetHotPlugPod(ctx, ad, kvvmi) + if err != nil { + return false, err + } + if hotPlugPod == nil { + return false, nil + } + + for _, c := range hotPlugPod.Status.Conditions { + if c.Type == corev1.PodScheduled && c.Status == corev1.ConditionFalse && c.Message != "" { + vmbda.Status.Phase = v1alpha2.BlockDeviceAttachmentPhasePending + cb. + Status(metav1.ConditionFalse). + Reason(vmbdacondition.HotPlugPodNotScheduled). + Message(fmt.Sprintf("Error attaching block device to virtual machine: %s: %s", c.Reason, c.Message)) + return true, nil + } + } + + if isContainerCreating(hotPlugPod) { + lastEvent, err := h.attacher.GetLastPodEvent(ctx, hotPlugPod) + if err != nil { + return false, err + } + if lastEvent != nil && (lastEvent.Reason == watcher.ReasonFailedAttachVolume || lastEvent.Reason == watcher.ReasonFailedMount) { + cb. + Status(metav1.ConditionFalse). + Reason(vmbdacondition.FailedAttachVolume). + Message(fmt.Sprintf("Error attaching block device to virtual machine: %s: %s", lastEvent.Reason, lastEvent.Message)) + return true, nil + } + } + + return false, nil +} + +func isContainerCreating(pod *corev1.Pod) bool { + if pod.Status.Phase != corev1.PodPending { + return false + } + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Waiting != nil && cs.State.Waiting.Reason == "ContainerCreating" { + return true + } + } + return false +} diff --git a/images/virtualization-artifact/pkg/controller/vmbda/internal/service/attachment_service.go b/images/virtualization-artifact/pkg/controller/vmbda/internal/service/attachment_service.go index 6032079351..04159549f1 100644 --- a/images/virtualization-artifact/pkg/controller/vmbda/internal/service/attachment_service.go +++ b/images/virtualization-artifact/pkg/controller/vmbda/internal/service/attachment_service.go @@ -20,9 +20,11 @@ import ( "context" "errors" "fmt" + "slices" "strings" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/types" "k8s.io/component-helpers/scheduling/corev1/nodeaffinity" virtv1 "kubevirt.io/api/core/v1" @@ -314,6 +316,69 @@ func (s AttachmentService) IsPVAvailableOnVMNode(ctx context.Context, pvc *corev return true, nil } +func (s AttachmentService) GetHotPlugPod(ctx context.Context, ad *AttachmentDisk, kvvmi *virtv1.VirtualMachineInstance) (*corev1.Pod, error) { + if ad == nil || kvvmi == nil { + return nil, nil + } + + for _, vs := range kvvmi.Status.VolumeStatus { + if vs.HotplugVolume == nil || vs.Name != ad.GenerateName { + continue + } + if vs.HotplugVolume.AttachPodName == "" { + return nil, nil + } + + return object.FetchObject(ctx, types.NamespacedName{ + Namespace: kvvmi.Namespace, + Name: vs.HotplugVolume.AttachPodName, + }, s.client, &corev1.Pod{}) + } + return nil, nil +} + +func (s AttachmentService) GetHotPlugPodCondition(ctx context.Context, ad *AttachmentDisk, kvvmi *virtv1.VirtualMachineInstance, condType corev1.PodConditionType) (*corev1.PodCondition, error) { + pod, err := s.GetHotPlugPod(ctx, ad, kvvmi) + if err != nil || pod == nil { + return nil, err + } + + for i, c := range pod.Status.Conditions { + if c.Type == condType { + return &pod.Status.Conditions[i], nil + } + } + return nil, nil +} + +func (s AttachmentService) GetLastPodEvent(ctx context.Context, pod *corev1.Pod) (*corev1.Event, error) { + if pod == nil { + return nil, nil + } + + eventList := &corev1.EventList{} + err := s.client.List(ctx, eventList, &client.ListOptions{ + Namespace: pod.Namespace, + FieldSelector: fields.SelectorFromSet(fields.Set{ + "involvedObject.name": pod.Name, + "involvedObject.kind": "Pod", + }), + }) + if err != nil { + return nil, err + } + + if len(eventList.Items) == 0 { + return nil, nil + } + + last := slices.MaxFunc(eventList.Items, func(a, b corev1.Event) int { + return a.LastTimestamp.Compare(b.LastTimestamp.Time) + }) + + return &last, nil +} + func isSameBlockDeviceRefs(a, b v1alpha2.VMBDAObjectRef) bool { return a.Kind == b.Kind && a.Name == b.Name } diff --git a/images/virtualization-artifact/pkg/controller/vmbda/internal/watcher/hotplug_pod_watcher.go b/images/virtualization-artifact/pkg/controller/vmbda/internal/watcher/hotplug_pod_watcher.go new file mode 100644 index 0000000000..97cf18fe4b --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmbda/internal/watcher/hotplug_pod_watcher.go @@ -0,0 +1,114 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package watcher + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + virtv1 "kubevirt.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + "github.com/deckhouse/virtualization-controller/pkg/controller/kvbuilder" + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +func NewHotPlugPodWatcher(client client.Client) *HotPlugPodWatcher { + return &HotPlugPodWatcher{ + client: client, + } +} + +type HotPlugPodWatcher struct { + client client.Client +} + +func (w *HotPlugPodWatcher) Watch(mgr manager.Manager, ctr controller.Controller) error { + if err := ctr.Watch( + source.Kind( + mgr.GetCache(), + &corev1.Pod{}, + handler.TypedEnqueueRequestsFromMapFunc(w.enqueueVMBDAs), + predicate.TypedFuncs[*corev1.Pod]{ + CreateFunc: func(e event.TypedCreateEvent[*corev1.Pod]) bool { return true }, + DeleteFunc: func(e event.TypedDeleteEvent[*corev1.Pod]) bool { return true }, + UpdateFunc: func(e event.TypedUpdateEvent[*corev1.Pod]) bool { + return e.ObjectOld.Status.Phase != e.ObjectNew.Status.Phase + }, + }, + ), + ); err != nil { + return fmt.Errorf("error setting watch on hot-plug Pod: %w", err) + } + return nil +} + +func (w *HotPlugPodWatcher) enqueueVMBDAs(ctx context.Context, pod *corev1.Pod) []reconcile.Request { + if pod == nil { + return nil + } + + ns := pod.Namespace + podName := pod.Name + + var kvvmiList virtv1.VirtualMachineInstanceList + if err := w.client.List(ctx, &kvvmiList, &client.ListOptions{Namespace: ns}); err != nil { + return nil + } + + for _, kvvmi := range kvvmiList.Items { + for _, vs := range kvvmi.Status.VolumeStatus { + if vs.HotplugVolume == nil || vs.HotplugVolume.AttachPodName != podName { + continue + } + + name, kind := kvbuilder.GetOriginalDiskName(vs.Name) + if kind == "" { + continue + } + + var vmbdas v1alpha2.VirtualMachineBlockDeviceAttachmentList + if err := w.client.List(ctx, &vmbdas, &client.ListOptions{Namespace: ns}); err != nil { + return nil + } + + var requests []reconcile.Request + for _, vmbda := range vmbdas.Items { + if vmbda.Spec.BlockDeviceRef.Name == name { + requests = append(requests, reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: vmbda.Namespace, + Name: vmbda.Name, + }, + }) + } + } + return requests + } + } + + return nil +} diff --git a/images/virtualization-artifact/pkg/controller/vmbda/internal/watcher/volumeevent_watcher.go b/images/virtualization-artifact/pkg/controller/vmbda/internal/watcher/volumeevent_watcher.go new file mode 100644 index 0000000000..424669b4f6 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmbda/internal/watcher/volumeevent_watcher.go @@ -0,0 +1,128 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package watcher + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + virtv1 "kubevirt.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + "github.com/deckhouse/virtualization-controller/pkg/controller/kvbuilder" + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +const ( + ReasonFailedAttachVolume = "FailedAttachVolume" + ReasonFailedMount = "FailedMount" +) + +func NewVolumeEventWatcher(client client.Client) *VolumeEventWatcher { + return &VolumeEventWatcher{ + client: client, + } +} + +type VolumeEventWatcher struct { + client client.Client +} + +func (w *VolumeEventWatcher) Watch(mgr manager.Manager, ctr controller.Controller) error { + if err := ctr.Watch( + source.Kind( + mgr.GetCache(), + &corev1.Event{}, + handler.TypedEnqueueRequestsFromMapFunc(w.enqueueVMBDAs), + predicate.TypedFuncs[*corev1.Event]{ + CreateFunc: func(e event.TypedCreateEvent[*corev1.Event]) bool { + return e.Object.Type == corev1.EventTypeWarning && + (e.Object.Reason == ReasonFailedAttachVolume || e.Object.Reason == ReasonFailedMount) + }, + UpdateFunc: func(e event.TypedUpdateEvent[*corev1.Event]) bool { + return false + }, + DeleteFunc: func(e event.TypedDeleteEvent[*corev1.Event]) bool { + return false + }, + }, + ), + ); err != nil { + return fmt.Errorf("error setting watch on Event: %w", err) + } + return nil +} + +func (w *VolumeEventWatcher) enqueueVMBDAs(ctx context.Context, e *corev1.Event) []reconcile.Request { + if e.InvolvedObject.Kind != "Pod" { + return nil + } + + if e.Reason != ReasonFailedAttachVolume && e.Reason != ReasonFailedMount { + return nil + } + + ns := e.InvolvedObject.Namespace + podName := e.InvolvedObject.Name + + var kvvmiList virtv1.VirtualMachineInstanceList + if err := w.client.List(ctx, &kvvmiList, &client.ListOptions{Namespace: ns}); err != nil { + return nil + } + + for _, kvvmi := range kvvmiList.Items { + for _, vs := range kvvmi.Status.VolumeStatus { + if vs.HotplugVolume == nil || vs.HotplugVolume.AttachPodName != podName { + continue + } + + name, kind := kvbuilder.GetOriginalDiskName(vs.Name) + if kind == "" { + continue + } + + var vmbdas v1alpha2.VirtualMachineBlockDeviceAttachmentList + if err := w.client.List(ctx, &vmbdas, &client.ListOptions{Namespace: ns}); err != nil { + return nil + } + + var requests []reconcile.Request + for _, vmbda := range vmbdas.Items { + if vmbda.Spec.BlockDeviceRef.Name == name { + requests = append(requests, reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: vmbda.Namespace, + Name: vmbda.Name, + }, + }) + } + } + return requests + } + } + + return nil +} diff --git a/images/virtualization-artifact/pkg/controller/vmbda/vmbda_reconciler.go b/images/virtualization-artifact/pkg/controller/vmbda/vmbda_reconciler.go index 45e8a7b147..10b41e41d6 100644 --- a/images/virtualization-artifact/pkg/controller/vmbda/vmbda_reconciler.go +++ b/images/virtualization-artifact/pkg/controller/vmbda/vmbda_reconciler.go @@ -84,6 +84,7 @@ func (r *Reconciler) SetupController(_ context.Context, mgr manager.Manager, ctr watcher.NewClusterVirtualImageWatcher(mgr.GetClient()), watcher.NewVirtualImageWatcherr(mgr.GetClient()), watcher.NewKVVMIWatcher(mgr.GetClient()), + watcher.NewVolumeEventWatcher(mgr.GetClient()), } { err := w.Watch(mgr, ctr) if err != nil {