Skip to content

Commit 516082e

Browse files
Kmd notify improvements [1/n]: Quick KMD sleep optimization
- KmdNotifyProperties struct for CapabilityTable that can be extended by incoming KmdNotify related optimizations - Quick KMD sleep optimization that is called from async events handler - Optimization makes a taskCount check in busy loop with much smaller delay than basic version of KMD Notify optimization Change-Id: I60c851c59895f0cf9de1e1f21e755a8b4c2fe900
1 parent 0290944 commit 516082e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+309
-102
lines changed

runtime/command_queue/command_queue.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,13 +151,13 @@ bool CommandQueue::isCompleted(uint32_t taskCount) const {
151151
return tag >= taskCount;
152152
}
153153

154-
void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait) {
154+
void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
155155
WAIT_ENTER()
156156

157157
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", taskCountToWait);
158158
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag());
159159

160-
device->getCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait);
160+
device->getCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep);
161161

162162
DEBUG_BREAK_IF(getHwTag() < taskCountToWait);
163163
latestTaskCountWaited = taskCountToWait;

runtime/command_queue/command_queue.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
318318

319319
MOCKABLE_VIRTUAL bool isQueueBlocked();
320320

321-
void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait);
321+
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
322322

323323
void flushWaitList(cl_uint numEventsInWaitList,
324324
const cl_event *eventWaitList,

runtime/command_queue/enqueue_common.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
325325
commandStreamReceiver.overrideMediaVFEStateDirty(true);
326326

327327
if (devQueueHw->getSchedulerReturnInstance() > 0) {
328-
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp);
328+
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
329329

330330
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
331331
simulation.runSchedulerSimulation(devQueueHw->getQueueBuffer(),
@@ -404,9 +404,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
404404
if (blockQueue) {
405405
while (isQueueBlocked())
406406
;
407-
waitUntilComplete(taskCount, flushStamp->peekStamp());
407+
waitUntilComplete(taskCount, flushStamp->peekStamp(), false);
408408
} else {
409-
waitUntilComplete(taskCount, flushStamp->peekStamp());
409+
waitUntilComplete(taskCount, flushStamp->peekStamp(), false);
410410
for (auto sIt = surfacesForResidency, sE = surfacesForResidency + numSurfaceForResidency;
411411
sIt != sE; ++sIt) {
412412
(*sIt)->setCompletionStamp(completionStamp, nullptr, nullptr);

runtime/command_queue/finish.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ cl_int CommandQueueHw<GfxFamily>::finish(bool dcFlush) {
4141
auto flushStampToWaitFor = this->flushStamp->peekStamp();
4242

4343
// Stall until HW reaches CQ taskCount
44-
waitUntilComplete(taskCountToWaitFor, flushStampToWaitFor);
44+
waitUntilComplete(taskCountToWaitFor, flushStampToWaitFor, false);
4545

4646
commandStreamReceiver.waitForTaskCountAndCleanAllocationList(taskCountToWaitFor, TEMPORARY_ALLOCATION);
4747

runtime/command_stream/command_stream_receiver.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class CommandStreamReceiver {
111111

112112
void requestThreadArbitrationPolicy(uint32_t requiredPolicy) { this->requiredThreadArbitrationPolicy = requiredPolicy; }
113113

114-
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait) = 0;
114+
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) = 0;
115115
MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
116116

117117
// returns size of block that needs to be reserved at the beginning of each instruction heap for CommandStreamReceiver

runtime/command_stream/command_stream_receiver_hw.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
6565
size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const;
6666
void programCoherency(LinearStream &csr, DispatchFlags &dispatchFlags);
6767

68-
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait) override;
68+
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override;
6969
const HardwareInfo &peekHwInfo() const { return hwInfo; }
7070

7171
protected:

runtime/command_stream/command_stream_receiver_hw.inl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -558,14 +558,18 @@ inline void CommandStreamReceiverHw<GfxFamily>::emitNoop(LinearStream &commandSt
558558
}
559559

560560
template <typename GfxFamily>
561-
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait) {
562-
auto status = waitForCompletionWithTimeout(this->hwInfo.capabilityTable.enableKmdNotify && flushStampToWait != 0,
563-
this->hwInfo.capabilityTable.delayKmdNotifyMicroseconds,
564-
taskCountToWait);
561+
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
562+
const auto &kmdNotifyProperties = this->hwInfo.capabilityTable.kmdNotifyProperties;
563+
564+
const auto &kmdNotifyDelay = useQuickKmdSleep && kmdNotifyProperties.enableQuickKmdSleep ? kmdNotifyProperties.delayQuickKmdSleepMicroseconds
565+
: kmdNotifyProperties.delayKmdNotifyMicroseconds;
566+
567+
auto status = waitForCompletionWithTimeout(kmdNotifyProperties.enableKmdNotify && flushStampToWait != 0,
568+
kmdNotifyDelay, taskCountToWait);
565569
if (!status) {
566570
waitForFlushStamp(flushStampToWait);
567571
//now call blocking wait, this is to ensure that task count is reached
568-
waitForCompletionWithTimeout(false, this->hwInfo.capabilityTable.delayKmdNotifyMicroseconds, taskCountToWait);
572+
waitForCompletionWithTimeout(false, kmdNotifyDelay, taskCountToWait);
569573
}
570574

571575
UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait);

runtime/event/async_events_handler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -87,7 +87,7 @@ void AsyncEventsHandler::asyncProcess() {
8787

8888
sleepCandidate = processList();
8989
if (sleepCandidate) {
90-
sleepCandidate->wait(true);
90+
sleepCandidate->wait(true, true);
9191
}
9292
std::this_thread::yield();
9393
}

runtime/event/event.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -288,14 +288,14 @@ bool Event::calcProfilingData() {
288288
return dataCalculated;
289289
}
290290

291-
inline bool Event::wait(bool blocking) {
291+
inline bool Event::wait(bool blocking, bool useQuickKmdSleep) {
292292
while (this->taskCount == Event::eventNotReady) {
293293
if (blocking == false) {
294294
return false;
295295
}
296296
}
297297

298-
cmdQueue->waitUntilComplete(taskCount.load(), flushStamp->peekStamp());
298+
cmdQueue->waitUntilComplete(taskCount.load(), flushStamp->peekStamp(), useQuickKmdSleep);
299299
updateExecutionStatus();
300300

301301
DEBUG_BREAK_IF(this->taskLevel == Event::eventNotReady && this->executionStatus >= 0);
@@ -495,7 +495,7 @@ cl_int Event::waitForEvents(cl_uint numEvents,
495495
return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
496496
}
497497

498-
if (event->wait(false) == false) {
498+
if (event->wait(false, false) == false) {
499499
pendingEventsLeft->push_back(event);
500500
}
501501
}

runtime/event/event.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -225,7 +225,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
225225

226226
//returns true on success
227227
//if(blocking==false), will return with false instead of blocking while waiting for completion
228-
virtual bool wait(bool blocking);
228+
virtual bool wait(bool blocking, bool useQuickKmdSleep);
229229

230230
bool isUserEvent() const {
231231
return (CL_COMMAND_USER == cmdType);

0 commit comments

Comments
 (0)