Skip to content

Commit 57d35c8

Browse files
Add state compute mode tracking
Related-To: NEO-5019 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
1 parent 5c0789a commit 57d35c8

File tree

17 files changed

+652
-4
lines changed

17 files changed

+652
-4
lines changed

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ struct CommandList : _ze_command_list_handle_t {
323323
bool multiReturnPointCommandList = false;
324324
bool systolicModeSupport = false;
325325
bool pipelineSelectStateTracking = false;
326+
bool stateComputeModeTracking = false;
326327

327328
std::atomic<uint32_t> barrierCounter{0u};
328329
uint32_t latestFlushedBarrierCounter = 0u;

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,8 +2320,13 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
23202320
if (!containsAnyKernel) {
23212321
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, hwInfo);
23222322
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo);
2323-
finalStreamState = requiredStreamState;
2324-
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
2323+
if (this->stateComputeModeTracking) {
2324+
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
2325+
finalStreamState = requiredStreamState;
2326+
} else {
2327+
finalStreamState = requiredStreamState;
2328+
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
2329+
}
23252330
containsAnyKernel = true;
23262331
}
23272332

level_zero/core/source/cmdlist/cmdlist_imp.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ namespace L0 {
2929
CommandList::CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {
3030
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
3131
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
32+
stateComputeModeTracking = L0HwHelper::enableStateComputeModeTracking();
3233
}
3334

3435
CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {};

level_zero/core/source/cmdqueue/cmdqueue.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr
4646

4747
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
4848
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
49+
stateComputeModeTracking = L0HwHelper::enableStateComputeModeTracking();
4950
}
5051

5152
ze_result_t CommandQueueImp::destroy() {

level_zero/core/source/cmdqueue/cmdqueue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
6969
bool internalUsage = false;
7070
bool multiReturnPointCommandList = false;
7171
bool pipelineSelectStateTracking = false;
72+
bool stateComputeModeTracking = false;
7273
};
7374

7475
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,

level_zero/core/source/cmdqueue/cmdqueue_hw.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,15 @@ struct CommandQueueHw : public CommandQueueImp {
181181
const NEO::StreamProperties &cmdListRequired,
182182
const NEO::StreamProperties &cmdListFinal);
183183

184+
inline size_t estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy,
185+
const NEO::StreamProperties &cmdListRequired,
186+
const NEO::StreamProperties &cmdListFinal);
187+
inline void programRequiredStateComputeModeForCommandList(CommandList *commandList,
188+
NEO::LinearStream &commandStream,
189+
NEO::StreamProperties &csrState,
190+
const NEO::StreamProperties &cmdListRequired,
191+
const NEO::StreamProperties &cmdListFinal);
192+
184193
size_t alignedChildStreamPadding{};
185194
};
186195

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
182182
auto &finalStreamState = commandList->getFinalStreamState();
183183

184184
this->updateOneCmdListPreemptionModeAndCtxStatePreemption(ctx, commandList->getCommandListPreemptionMode(), child);
185+
185186
this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
186187
this->programOneCmdListFrontEndIfDirty(ctx, child, csrStateProperties, requiredStreamState, finalStreamState);
188+
this->programRequiredStateComputeModeForCommandList(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
187189

188190
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
189191
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
@@ -655,7 +657,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
655657
linearStreamSizeEstimate += estimateFrontEndCmdSize(ctx.frontEndStateDirty);
656658
linearStreamSizeEstimate += estimatePipelineSelectCmdSize();
657659

658-
if (this->pipelineSelectStateTracking || frontEndTrackingEnabled()) {
660+
if (this->stateComputeModeTracking || this->pipelineSelectStateTracking || frontEndTrackingEnabled()) {
659661
bool frontEndStateDirtyCopy = ctx.frontEndStateDirty;
660662
auto streamPropertiesCopy = csr->getStreamProperties();
661663
bool gpgpuEnabledCopy = csr->getPreambleSetFlag();
@@ -667,6 +669,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
667669
linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirtyCopy, ctx.engineInstanced, cmdList,
668670
streamPropertiesCopy, requiredStreamState, finalStreamState);
669671
linearStreamSizeEstimate += estimatePipelineSelectCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState, gpgpuEnabledCopy);
672+
linearStreamSizeEstimate += estimateScmCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState);
670673
}
671674
}
672675

@@ -1174,6 +1177,54 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListPipelineSelect(CommandList
11741177
csrState.pipelineSelect.setProperties(cmdListFinal.pipelineSelect);
11751178
}
11761179

1180+
template <GFXCORE_FAMILY gfxCoreFamily>
1181+
size_t CommandQueueHw<gfxCoreFamily>::estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy,
1182+
const NEO::StreamProperties &cmdListRequired,
1183+
const NEO::StreamProperties &cmdListFinal) {
1184+
if (!this->stateComputeModeTracking) {
1185+
return 0;
1186+
}
1187+
1188+
size_t estimatedSize = 0;
1189+
1190+
bool isRcs = this->getCsr()->isRcs();
1191+
size_t singleScmCmdSize = NEO::EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(device->getHwInfo(), false, isRcs);
1192+
1193+
csrStateCopy.stateComputeMode.setProperties(cmdListRequired.stateComputeMode);
1194+
if (csrStateCopy.stateComputeMode.isDirty()) {
1195+
estimatedSize += singleScmCmdSize;
1196+
}
1197+
csrStateCopy.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
1198+
1199+
return estimatedSize;
1200+
}
1201+
1202+
template <GFXCORE_FAMILY gfxCoreFamily>
1203+
void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandList(CommandList *commandList,
1204+
NEO::LinearStream &commandStream,
1205+
NEO::StreamProperties &csrState,
1206+
const NEO::StreamProperties &cmdListRequired,
1207+
const NEO::StreamProperties &cmdListFinal) {
1208+
if (!this->stateComputeModeTracking) {
1209+
return;
1210+
}
1211+
1212+
csrState.stateComputeMode.setProperties(cmdListRequired.stateComputeMode);
1213+
1214+
if (csrState.stateComputeMode.isDirty()) {
1215+
NEO::PipelineSelectArgs pipelineSelectArgs = {
1216+
!!csrState.pipelineSelect.systolicMode.value,
1217+
false,
1218+
false,
1219+
commandList->getSystolicModeSupport()};
1220+
1221+
bool isRcs = this->getCsr()->isRcs();
1222+
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(commandStream, csrState.stateComputeMode, pipelineSelectArgs,
1223+
false, device->getHwInfo(), isRcs, nullptr);
1224+
}
1225+
csrState.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
1226+
}
1227+
11771228
template <GFXCORE_FAMILY gfxCoreFamily>
11781229
bool CommandQueueHw<gfxCoreFamily>::isCleanLeftoverMemoryRequired() {
11791230
return false;

level_zero/core/source/hw_helpers/l0_hw_helper.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,12 @@ bool L0HwHelper::enablePipelineSelectStateTracking() {
3131
return defaultValue;
3232
}
3333

34+
bool L0HwHelper::enableStateComputeModeTracking() {
35+
constexpr bool defaultValue = false;
36+
if (NEO::DebugManager.flags.EnableStateComputeModeTracking.get() != -1) {
37+
return !!NEO::DebugManager.flags.EnableStateComputeModeTracking.get();
38+
}
39+
return defaultValue;
40+
}
41+
3442
} // namespace L0

level_zero/core/source/hw_helpers/l0_hw_helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class L0HwHelper {
3232
static L0HwHelper &get(GFXCORE_FAMILY gfxCore);
3333
static bool enableMultiReturnPointCommandList();
3434
static bool enablePipelineSelectStateTracking();
35+
static bool enableStateComputeModeTracking();
3536
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
3637
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;
3738

level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,5 +126,10 @@ void CmdListPipelineSelectStateFixture::setUp() {
126126
ModuleMutableCommandListFixture::setUp();
127127
}
128128

129+
void CmdListStateComputeModeStateFixture::setUp() {
130+
DebugManager.flags.EnableStateComputeModeTracking.set(1);
131+
ModuleMutableCommandListFixture::setUp();
132+
}
133+
129134
} // namespace ult
130135
} // namespace L0

0 commit comments

Comments
 (0)