Skip to content

Commit f9fbbd4

Browse files
miltalexteo
authored andcommitted
[executor] add timeout to OCC transition
1 parent 2d237ee commit f9fbbd4

1 file changed

Lines changed: 30 additions & 7 deletions

File tree

executor/executable/controllabletask.go

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ type ControllableTask struct {
4848
pendingFinalTaskStateCh chan mesos.TaskState
4949
}
5050

51+
type CommitResponse struct {
52+
newState string
53+
transitionError error
54+
}
55+
5156
func (t *ControllableTask) Launch() error {
5257
t.pendingFinalTaskStateCh = make(chan mesos.TaskState, 1) // we use this to receive a pending status update if the task was killed
5358
taskCmd, err := prepareTaskCmd(t.tci)
@@ -387,17 +392,35 @@ func (t *ControllableTask) Kill() error {
387392
"targetList": cmd.TargetList,
388393
}).
389394
Debug("state DONE not reached, about to commit transition")
395+
396+
// Call cmd.Commit() asynchronous
397+
commitDone := make(chan *CommitResponse)
398+
go func() {
399+
var cr CommitResponse
400+
cr.newState, cr.transitionError = cmd.Commit()
401+
commitDone <- &cr
402+
}()
403+
404+
// Set timeout cause OCC is locking up so killing is not possible. The following approach
405+
// help us to bypass the OCC transition, so we can kill the tasks on force destroy env.
406+
// Currently this will run for every Kill message receive (force or not).
407+
// TODO: Find a better way to distinguish force from plain Kill message.
408+
var _cr *CommitResponse
409+
select {
410+
case _cr = <- commitDone:
411+
case <-time.After(45 * time.Second):
412+
log.Error("deadline exceeded")
413+
break
414+
}
390415

391-
newState, transitionError := cmd.Commit()
392-
393-
log.WithField("newState", newState).
394-
WithError(transitionError).
416+
log.WithField("newState", _cr.newState).
417+
WithError(_cr.transitionError).
395418
Debug("transition committed")
396-
if transitionError != nil || len(cmd.Event) == 0 {
397-
log.WithError(transitionError).Error("cannot gracefully end task")
419+
if _cr.transitionError != nil || len(cmd.Event) == 0 {
420+
log.WithError(_cr.transitionError).Error("cannot gracefully end task")
398421
break
399422
}
400-
reachedState = newState
423+
reachedState = _cr.newState
401424
}
402425

403426
log.Debug("end transition loop done")

0 commit comments

Comments
 (0)