@@ -17,6 +17,7 @@ import (
1717 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1818 "k8s.io/apimachinery/pkg/labels"
1919 "k8s.io/apimachinery/pkg/types"
20+ "k8s.io/utils/ptr"
2021 "sigs.k8s.io/controller-runtime/pkg/client"
2122
2223 ocv1 "github.com/operator-framework/operator-controller/api/v1"
@@ -56,13 +57,16 @@ func TestClusterCatalogUnpacking(t *testing.T) {
5657 managerPod = managerPods .Items [0 ]
5758 }, time .Minute , time .Second )
5859
59- t .Log ("Waiting for acquired leader election" )
60- leaderCtx , leaderCancel := context .WithTimeout (ctx , 3 * time .Minute )
61- defer leaderCancel ()
62- leaderSubstrings := []string {"successfully acquired lease" }
63- leaderElected , err := watchPodLogsForSubstring (leaderCtx , & managerPod , leaderSubstrings ... )
64- require .NoError (t , err )
65- require .True (t , leaderElected )
60+ t .Logf ("Waiting for acquired leader election by checking lease" )
61+ // Instead of watching logs (which has timing issues in controller-runtime v0.23.1+),
62+ // directly check the lease object to confirm leader election occurred
63+ require .EventuallyWithT (t , func (ct * assert.CollectT ) {
64+ var lease coordinationv1.Lease
65+ err := c .Get (ctx , types.NamespacedName {Name : "catalogd-operator-lock" , Namespace : "olmv1-system" }, & lease )
66+ require .NoError (ct , err )
67+ require .NotNil (ct , lease .Spec .HolderIdentity , "lease should have a holder" )
68+ require .NotEmpty (ct , * lease .Spec .HolderIdentity , "lease holder identity should not be empty" )
69+ }, 3 * time .Minute , time .Second )
6670
6771 t .Log ("Reading logs to make sure that ClusterCatalog was reconciled by catalogdv1" )
6872 logCtx , cancel := context .WithTimeout (ctx , time .Minute )
@@ -110,16 +114,18 @@ func TestClusterExtensionAfterOLMUpgrade(t *testing.T) {
110114 t .Log ("Wait for operator-controller deployment to be ready" )
111115 managerPod := waitForDeployment (t , ctx , "operator-controller" )
112116
113- t .Log ("Wait for acquired leader election" )
117+ t .Log ("Wait for acquired leader election by checking lease" )
118+ // Instead of watching logs (which has timing issues in controller-runtime v0.23.1+),
119+ // directly check the lease object to confirm leader election occurred
114120 // Average case is under 1 minute but in the worst case: (previous leader crashed)
115121 // we could have LeaseDuration (137s) + RetryPeriod (26s) +/- 163s
116- leaderCtx , leaderCancel := context . WithTimeout ( ctx , 3 * time . Minute )
117- defer leaderCancel ()
118-
119- leaderSubstrings := [] string { "successfully acquired lease" }
120- leaderElected , err := watchPodLogsForSubstring ( leaderCtx , managerPod , leaderSubstrings ... )
121- require .NoError ( t , err )
122- require . True ( t , leaderElected )
122+ require . EventuallyWithT ( t , func ( ct * assert. CollectT ) {
123+ var lease coordinationv1. Lease
124+ err := c . Get ( ctx , types. NamespacedName { Name : "9c4404e7.operatorframework.io" , Namespace : "olmv1-system" }, & lease )
125+ require . NoError ( ct , err )
126+ require . NotNil ( ct , lease . Spec . HolderIdentity , "lease should have a holder" )
127+ require .NotEmpty ( ct , * lease . Spec . HolderIdentity , "lease holder identity should not be empty" )
128+ }, 3 * time . Minute , time . Second )
123129
124130 t .Log ("Reading logs to make sure that ClusterExtension was reconciled by operator-controller before we update it" )
125131 // Make sure that after we upgrade OLM itself we can still reconcile old objects without any changes
@@ -277,32 +283,46 @@ func waitForDeployment(t *testing.T, ctx context.Context, controlPlaneLabel stri
277283}
278284
279285func watchPodLogsForSubstring (ctx context.Context , pod * corev1.Pod , substrings ... string ) (bool , error ) {
280- podLogOpts := corev1.PodLogOptions {
281- Follow : true ,
282- Container : container ,
283- }
284-
285- req := kclientset .CoreV1 ().Pods (pod .Namespace ).GetLogs (pod .Name , & podLogOpts )
286- podLogs , err := req .Stream (ctx )
287- if err != nil {
288- return false , err
289- }
290- defer podLogs .Close ()
286+ // Use a polling approach to periodically check pod logs for the substrings
287+ // This handles controller-runtime v0.23.1+ where leader election happens faster
288+ // and the message may be emitted before we start watching with Follow
289+ ticker := time .NewTicker (2 * time .Second )
290+ defer ticker .Stop ()
291+
292+ for {
293+ select {
294+ case <- ctx .Done ():
295+ return false , ctx .Err ()
296+ case <- ticker .C :
297+ // Get recent logs without Follow to check if message exists
298+ logOpts := corev1.PodLogOptions {
299+ Container : container ,
300+ TailLines : ptr .To (int64 (1000 )),
301+ }
291302
292- scanner := bufio .NewScanner (podLogs )
293- for scanner .Scan () {
294- line := scanner .Text ()
303+ req := kclientset .CoreV1 ().Pods (pod .Namespace ).GetLogs (pod .Name , & logOpts )
304+ podLogs , err := req .Stream (context .Background ()) // Use Background to avoid context cancellation during read
305+ if err != nil {
306+ // Pod might not be ready yet, continue polling
307+ continue
308+ }
295309
296- foundCount := 0
297- for _ , substring := range substrings {
298- if strings .Contains (line , substring ) {
299- foundCount ++
310+ scanner := bufio .NewScanner (podLogs )
311+ for scanner .Scan () {
312+ line := scanner .Text ()
313+
314+ foundCount := 0
315+ for _ , substring := range substrings {
316+ if strings .Contains (line , substring ) {
317+ foundCount ++
318+ }
319+ }
320+ if foundCount == len (substrings ) {
321+ podLogs .Close ()
322+ return true , nil
323+ }
300324 }
301- }
302- if foundCount == len (substrings ) {
303- return true , nil
325+ podLogs .Close ()
304326 }
305327 }
306-
307- return false , scanner .Err ()
308328}
0 commit comments