44 "context"
55 "errors"
66 "fmt"
7+ "strings"
78 "sync"
89 "time"
910
@@ -118,7 +119,9 @@ func (s *StandardCapabilities) Start(ctx context.Context) error {
118119 CRESettings : s .creSettings ,
119120 TriggerEventStore : s .triggerEventStore ,
120121 }
121- if err = s .capabilitiesLoop .Service .Initialise (cctx , dependencies ); err != nil {
122+
123+ s .log .Infow ("StandardCapabilities calling Initialise on capability service" , "command" , s .command )
124+ if err = s .retryInitialiseUntilReady (cctx , dependencies ); err != nil {
122125 s .log .Errorf ("error initialising standard capabilities service: %v" , err )
123126 return
124127 }
@@ -136,6 +139,36 @@ func (s *StandardCapabilities) Start(ctx context.Context) error {
136139 })
137140}
138141
142+ // retryInitialiseUntilReady calls Initialise and retries on "empty local registry" or
143+ // "metadataRegistry information not available" so that capability init runs after the
144+ // registry syncer has pushed at least one non-empty local registry (startup race fix).
145+ const initRetryTimeout = 90 * time .Second
146+ const initRetryInterval = 3 * time .Second
147+
148+ func (s * StandardCapabilities ) retryInitialiseUntilReady (ctx context.Context , dependencies core.StandardCapabilitiesDependencies ) error {
149+ deadline := time .Now ().Add (initRetryTimeout )
150+ var lastErr error
151+ for attempt := 0 ; time .Now ().Before (deadline ); attempt ++ {
152+ lastErr = s .capabilitiesLoop .Service .Initialise (ctx , dependencies )
153+ if lastErr == nil {
154+ return nil
155+ }
156+ msg := lastErr .Error ()
157+ if ! strings .Contains (msg , "empty local registry" ) && ! strings .Contains (msg , "metadataRegistry information not available" ) {
158+ return lastErr
159+ }
160+ if attempt > 0 {
161+ s .log .Infow ("StandardCapabilities Initialise retry (waiting for registry sync)" , "command" , s .command , "attempt" , attempt + 1 , "err" , lastErr )
162+ }
163+ select {
164+ case <- ctx .Done ():
165+ return ctx .Err ()
166+ case <- time .After (initRetryInterval ):
167+ }
168+ }
169+ return fmt .Errorf ("initialise still failing after %v (registry never became ready): %w" , initRetryTimeout , lastErr )
170+ }
171+
139172// Ready is a non-blocking check for the service's ready state. Errors if not
140173// ready when called.
141174func (s * StandardCapabilities ) Ready () error {
0 commit comments