@@ -32,6 +32,7 @@ import (
3232
3333 "github.com/AliceO2Group/Control/common/logger/infologger"
3434 "github.com/AliceO2Group/Control/common/utils"
35+ "github.com/AliceO2Group/Control/core/environment"
3536 "github.com/AliceO2Group/Control/core/integration/odc/odcutils"
3637 odcpb "github.com/AliceO2Group/Control/core/integration/odc/protos"
3738 "github.com/sirupsen/logrus"
@@ -243,7 +244,7 @@ func handleReset(ctx context.Context, odcClient *RpcClient, arguments map[string
243244 return nil
244245}
245246
246- func handleCleanup (ctx context.Context , odcClient * RpcClient , arguments map [string ]string , envId string ) error {
247+ func handleCleanupLegacy (ctx context.Context , odcClient * RpcClient , arguments map [string ]string , envId string ) error {
247248 defer utils .TimeTrackFunction (time .Now (), log .WithPrefix ("odcclient" ))
248249 if envId == "" {
249250 return errors .New ("cannot proceed with empty environment id" )
@@ -277,6 +278,93 @@ func handleCleanup(ctx context.Context, odcClient *RpcClient, arguments map[stri
277278 return nil // We clobber the error because nothing can be done for a failed cleanup
278279}
279280
281+ func handleCleanup (ctx context.Context , odcClient * RpcClient , arguments map [string ]string , envId string ) error {
282+ defer utils .TimeTrackFunction (time .Now (), log .WithPrefix ("odcclient" ))
283+
284+ // First we query ODC for the full list of active partitions
285+ req := & odcpb.StatusRequest {}
286+
287+ var err error = nil
288+ var rep * odcpb.StatusReply
289+
290+ rep , err = odcClient .Status (ctx , req , grpc.EmptyCallOption {})
291+ if err != nil {
292+ return printGrpcError (err )
293+ }
294+
295+ if rep == nil || rep .GetStatus () == odcpb .ReplyStatus_UNKNOWN {
296+ // We got a nil response with nil error, this should never happen
297+ return errors .New ("nil response error" )
298+ }
299+
300+ if odcErr := rep .GetError (); odcErr != nil {
301+ return fmt .Errorf ("code %d from ODC: %s" , odcErr .GetCode (), odcErr .GetMsg ())
302+ }
303+ if replyStatus := rep .GetStatus (); replyStatus != odcpb .ReplyStatus_SUCCESS {
304+ return fmt .Errorf ("status %s from ODC" , replyStatus .String ())
305+ }
306+ log .WithFields (logrus.Fields {
307+ "odcMsg" : rep .GetMsg (),
308+ "odcStatus" : rep .GetStatus ().String (),
309+ "odcExectime" : rep .GetExectime (),
310+ }).
311+ Trace ("call to ODC complete" )
312+
313+ knownEnvs := environment .ManagerInstance ().Ids ()
314+ partitionsToClean := make (map [string ]struct {})
315+ for _ , odcPartition := range rep .GetPartitions () {
316+ isOrphan := true
317+ for _ , knownEnv := range knownEnvs {
318+ if odcPartition .Partitionid == knownEnv .String () { // found a matching env
319+ isOrphan = false
320+ break
321+ }
322+ }
323+ if isOrphan { // no env was found for the given ODC partition
324+ partitionsToClean [odcPartition .Partitionid ] = struct {}{}
325+ }
326+ }
327+
328+ // The present function can in principle be called with envId = "", if the cleanup is triggered from
329+ // outside of an active environment.
330+ // If an envId is passed, we append it to the list of partitions to clean up just in case, otherwise we
331+ // ignore it.
332+ if envId != "" {
333+ partitionsToClean [envId ] = struct {}{}
334+ }
335+
336+ // Then the actual cleanup calls begin, one partition at a time...
337+ for odcPartitionId , _ := range partitionsToClean {
338+ // This block tries to perform the regular teardown sequence.
339+ // Since Shutdown is supposed to work in any state, we don't bail on error.
340+ err := doReset (ctx , odcClient , arguments , odcPartitionId )
341+ if err != nil {
342+ log .WithError (printGrpcError (err )).
343+ WithField ("level" , infologger .IL_Devel ).
344+ WithField ("partition" , odcPartitionId ).
345+ Warn ("ODC Reset call failed" )
346+ }
347+
348+ err = doTerminate (ctx , odcClient , arguments , odcPartitionId )
349+ if err != nil {
350+ log .WithError (printGrpcError (err )).
351+ WithField ("level" , infologger .IL_Devel ).
352+ WithField ("partition" , odcPartitionId ).
353+ Warn ("ODC Terminate call failed" )
354+ }
355+
356+ err = doShutdown (ctx , odcClient , arguments , odcPartitionId )
357+ if err != nil {
358+ log .WithError (printGrpcError (err )).
359+ WithField ("level" , infologger .IL_Devel ).
360+ WithField ("partition" , odcPartitionId ).
361+ Warn ("ODC Shutdown call failed" )
362+ }
363+ }
364+
365+ return nil // We clobber the error because nothing can be done for a failed cleanup
366+ }
367+
280368func doReset (ctx context.Context , odcClient * RpcClient , arguments map [string ]string , envId string ) error {
281369 // RESET
282370 req := & odcpb.ResetRequest {
@@ -307,12 +395,12 @@ func doReset(ctx context.Context, odcClient *RpcClient, arguments map[string]str
307395 return fmt .Errorf ("status %s from ODC" , replyStatus .String ())
308396 }
309397 log .WithFields (logrus.Fields {
310- "odcMsg" : rep .Reply .Msg ,
311- "odcStatus" : rep .Reply .Status .String (),
312- "odcExectime" : rep .Reply .Exectime ,
313- "odcRunid" : rep .Reply .Partitionid ,
314- "odcSessionid" : rep .Reply .Sessionid ,
315- }).
398+ "odcMsg" : rep .Reply .Msg ,
399+ "odcStatus" : rep .Reply .Status .String (),
400+ "odcExectime" : rep .Reply .Exectime ,
401+ "odcRunid" : rep .Reply .Partitionid ,
402+ "odcSessionid" : rep .Reply .Sessionid ,
403+ }).
316404 Debug ("call to ODC complete" )
317405 return err
318406}
0 commit comments