Skip to content

Commit 0e4a278

Browse files
committed
[core] Ensure all ODC calls are bound by a context with timeout
1 parent a2599b2 commit 0e4a278

2 files changed

Lines changed: 95 additions & 22 deletions

File tree

core/integration/odc/handlers.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -550,12 +550,12 @@ func handleConfigure(ctx context.Context, odcClient *RpcClient, arguments map[st
550550
return fmt.Errorf("status %s from ODC", replyStatus.String())
551551
}
552552
log.WithFields(logrus.Fields{
553-
"odcMsg": configureResponse.Reply.Msg,
554-
"odcStatus": configureResponse.Reply.Status.String(),
555-
"odcExectime": configureResponse.Reply.Exectime,
556-
"odcRunid": configureResponse.Reply.Partitionid,
557-
"odcSessionid": configureResponse.Reply.Sessionid,
558-
}).
553+
"odcMsg": configureResponse.Reply.Msg,
554+
"odcStatus": configureResponse.Reply.Status.String(),
555+
"odcExectime": configureResponse.Reply.Exectime,
556+
"odcRunid": configureResponse.Reply.Partitionid,
557+
"odcSessionid": configureResponse.Reply.Sessionid,
558+
}).
559559
Debug("call to ODC complete")
560560
return err
561561
}

core/integration/odc/plugin.go

Lines changed: 89 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,14 @@ import (
4444
"google.golang.org/grpc"
4545
)
4646

47-
const ODC_DIAL_TIMEOUT = 2 * time.Second
47+
const(
48+
ODC_DIAL_TIMEOUT = 2 * time.Second
49+
ODC_GENERAL_OP_TIMEOUT = 5 * time.Second
50+
ODC_CONFIGURE_TIMEOUT = 60 * time.Second
51+
ODC_START_TIMEOUT = 15 * time.Second
52+
ODC_STOP_TIMEOUT = 15 * time.Second
53+
ODC_RESET_TIMEOUT = 30 * time.Second
54+
)
4855

4956

5057
type Plugin struct {
@@ -151,20 +158,28 @@ func (p *Plugin) ObjectStack(data interface{}) (stack map[string]interface{}) {
151158
ok := false
152159
topology, ok = varStack["odc_topology"]
153160
if !ok {
154-
log.Error("cannot acquire ODC topology")
161+
log.WithField("partition", envId).
162+
WithField("call", "Configure").
163+
Error("cannot acquire ODC topology")
155164
return
156165
}
157166
plugin, ok = varStack["odc_plugin"]
158167
if !ok {
159-
log.Error("cannot acquire ODC RMS plugin declaration")
168+
log.WithField("partition", envId).
169+
WithField("call", "Configure").
170+
Error("cannot acquire ODC RMS plugin declaration")
160171
return
161172
}
162173
resources, ok = varStack["odc_resources"]
163174
if !ok {
164-
log.Error("cannot acquire ODC resources declaration")
175+
log.WithField("partition", envId).
176+
WithField("call", "Configure").
177+
Error("cannot acquire ODC resources declaration")
165178
return
166179
}
167180

181+
timeout := acquireTimeout(ODC_CONFIGURE_TIMEOUT, varStack, "Configure", envId)
182+
168183
arguments := make(map[string]string)
169184
arguments["environment_id"] = envId
170185

@@ -175,66 +190,101 @@ func (p *Plugin) ObjectStack(data interface{}) (stack map[string]interface{}) {
175190
arguments[strings.TrimPrefix(k, "odc_")] = v
176191
}
177192
}
178-
179-
err := handleConfigure(context.Background(), p.odcClient, arguments, topology, plugin, resources, envId)
193+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
194+
defer cancel()
195+
err := handleConfigure(ctx, p.odcClient, arguments, topology, plugin, resources, envId)
180196
if err != nil {
181197
log.WithField("level", infologger.IL_Support).
182198
WithField("partition", envId).
199+
WithField("call", "Configure").
183200
WithError(err).Error("ODC error")
184-
log.WithField("partition", envId).Error("EPN Configure call failed")
201+
log.WithField("partition", envId).
202+
WithField("call", "Configure").
203+
Error("EPN Configure call failed")
185204
}
186205
return
187206
}
188207
stack["Start"] = func() (out string) { // must formally return string even when we return nothing
189208
rn, ok := varStack["run_number"]
190209
if !ok {
191-
log.Warn("cannot acquire run number for ODC")
210+
log.WithField("partition", envId).
211+
WithField("call", "Start").
212+
Warn("cannot acquire run number for ODC")
192213
}
193214

215+
timeout := acquireTimeout(ODC_START_TIMEOUT, varStack, "Start", envId)
216+
194217
arguments := make(map[string]string)
195218
arguments["run_number"] = rn
196219
arguments["runNumber"] = rn
197220

198-
err := handleStart(context.Background(), p.odcClient, arguments, envId)
221+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
222+
defer cancel()
223+
err := handleStart(ctx, p.odcClient, arguments, envId)
199224
if err != nil {
200225
log.WithError(err).
201226
WithField("level", infologger.IL_Support).
202227
WithField("partition", envId).
228+
WithField("call", "Start").
203229
Error("ODC error")
204-
log.WithField("partition", envId).Error("EPN Start call failed")
230+
log.WithField("partition", envId).
231+
WithField("call", "Start").
232+
Error("EPN Start call failed")
205233
}
206234
return
207235
}
208236
stack["Stop"] = func() (out string) {
209-
err := handleStop(context.Background(), p.odcClient, nil, envId)
237+
timeout := acquireTimeout(ODC_STOP_TIMEOUT, varStack, "Stop", envId)
238+
239+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
240+
defer cancel()
241+
err := handleStop(ctx, p.odcClient, nil, envId)
210242
if err != nil {
211243
log.WithError(err).
212244
WithField("level", infologger.IL_Support).
213245
WithField("partition", envId).
246+
WithField("call", "Stop").
214247
Error("ODC error")
215-
log.WithField("partition", envId).Error("EPN Stop call failed")
248+
log.WithField("partition", envId).
249+
WithField("call", "Stop").
250+
Error("EPN Stop call failed")
216251
}
217252
return
218253
}
219254
stack["Reset"] = func() (out string) {
220-
err := handleReset(context.Background(), p.odcClient, nil, envId)
255+
timeout := acquireTimeout(ODC_RESET_TIMEOUT, varStack, "Reset", envId)
256+
257+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
258+
defer cancel()
259+
err := handleReset(ctx, p.odcClient, nil, envId)
221260
if err != nil {
222261
log.WithError(err).
223262
WithField("level", infologger.IL_Support).
224263
WithField("partition", envId).
264+
WithField("call", "Reset").
225265
Error("ODC error")
226-
log.WithField("partition", envId).Error("EPN Reset call failed")
266+
log.WithField("partition", envId).
267+
WithField("call", "Reset").
268+
Error("EPN Reset call failed")
227269
}
228270
return
229271
}
230272
stack["EnsureCleanup"] = func() (out string) {
231-
err := handleCleanup(context.Background(), p.odcClient, nil, envId)
273+
timeout := acquireTimeout(ODC_GENERAL_OP_TIMEOUT, varStack, "EnsureCleanup", envId)
274+
275+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
276+
defer cancel()
277+
err := handleCleanup(ctx, p.odcClient, nil, envId)
232278
if err != nil {
233279
log.WithError(err).
234280
WithField("level", infologger.IL_Support).
235281
WithField("partition", envId).
282+
WithField("call", "EnsureCleanup").
283+
236284
Error("ODC error")
237-
log.WithField("partition", envId).Error("EPN Cleanup sequence failed")
285+
log.WithField("partition", envId).
286+
WithField("call", "EnsureCleanup").
287+
Error("EPN Cleanup sequence failed")
238288
}
239289
return
240290
}
@@ -245,3 +295,26 @@ func (p *Plugin) ObjectStack(data interface{}) (stack map[string]interface{}) {
245295
func (p *Plugin) Destroy() error {
246296
return p.odcClient.Close()
247297
}
298+
299+
func acquireTimeout(defaultTimeout time.Duration, varStack map[string]string, callName string, envId string) time.Duration {
300+
timeout := defaultTimeout
301+
timeoutStr, ok := varStack["__call_timeout"] // the Call interface ensures we'll find this key
302+
// see Call.Call in callable/call.go for details
303+
if ok {
304+
var err error
305+
timeout, err = time.ParseDuration(timeoutStr)
306+
if err != nil {
307+
timeout = defaultTimeout
308+
log.WithField("partition", envId).
309+
WithField("call", callName).
310+
WithField("default", timeout.String()).
311+
Warn("could not parse timeout declaration for hook call")
312+
}
313+
} else {
314+
log.WithField("partition", envId).
315+
WithField("call", callName).
316+
WithField("default", timeout.String()).
317+
Warn("could not get timeout declaration for hook call")
318+
}
319+
return timeout
320+
}

0 commit comments

Comments
 (0)