Skip to content

Commit a0c044e

Browse files
pwilmaCompute-Runtime-Automation
authored andcommitted
Extend batch buffer flattening in AubCSR to BatchedDispatch mode
- batch buffer flatening in batched mode - added MI_USER_INTERRUPT command - added GUC Work Queue Item Change-Id: I35142da34b30d3006bb4ffc1521db7f6ebe68ebc
1 parent 3115757 commit a0c044e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1189
-248
lines changed

runtime/command_queue/command_queue_hw.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ class CommandQueueHw : public CommandQueue {
6565
}
6666

6767
if (getCmdQueueProperties<cl_queue_properties>(properties, CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
68-
device->getCommandStreamReceiver().overrideDispatchPolicy(CommandStreamReceiver::BatchedDispatch);
68+
device->getCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
6969
}
7070
}
7171

runtime/command_queue/enqueue_common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
259259
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
260260
for (auto &dispatchInfo : multiDispatchInfo) {
261261
for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) {
262-
commandStreamReceiver.setPatchInfoData(patchInfoData);
262+
commandStreamReceiver.getFlatBatchBufferHelper().setPatchInfoData(patchInfoData);
263263
}
264264
}
265265
}

runtime/command_stream/aub_command_stream_receiver_hw.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,10 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
6363

6464
MemoryManager *createMemoryManager(bool enable64kbPages) override {
6565
this->memoryManager = new OsAgnosticMemoryManager(enable64kbPages);
66+
this->flatBatchBufferHelper->setMemoryManager(this->memoryManager);
6667
return this->memoryManager;
6768
}
6869

69-
bool setPatchInfoData(PatchInfoData &data) override;
70-
71-
std::vector<PatchInfoData> patchInfoCollection;
72-
7370
static const AubMemDump::LrcaHelper &getCsTraits(EngineType engineType);
7471

7572
struct EngineInfo {
@@ -91,7 +88,8 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverHw<GfxFamily> {
9188
// remap CPU VA -> GGTT VA
9289
AddressMapper gttRemap;
9390

94-
MOCKABLE_VIRTUAL void *flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer);
9591
MOCKABLE_VIRTUAL bool addPatchInfoComments();
92+
void addGUCStartMessage(uint64_t batchBufferAddress, EngineType engineType);
93+
uint32_t getGUCWorkQueueItemHeader(EngineType engineType);
9694
};
9795
} // namespace OCLRT

runtime/command_stream/aub_command_stream_receiver_hw.inl

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ AUBCommandStreamReceiverHw<GfxFamily>::AUBCommandStreamReceiverHw(const Hardware
3737
: BaseClass(hwInfoIn),
3838
stream(std::unique_ptr<AUBCommandStreamReceiver::AubFileStream>(new AUBCommandStreamReceiver::AubFileStream())),
3939
standalone(standalone) {
40-
this->dispatchMode = CommandStreamReceiver::DispatchMode::BatchedDispatch;
40+
this->dispatchMode = DispatchMode::BatchedDispatch;
4141
if (DebugManager.flags.CsrDispatchMode.get()) {
42-
this->dispatchMode = (CommandStreamReceiver::DispatchMode)DebugManager.flags.CsrDispatchMode.get();
42+
this->dispatchMode = (DispatchMode)DebugManager.flags.CsrDispatchMode.get();
4343
}
4444
for (auto &engineInfo : engineInfoTable) {
4545
engineInfo.pLRCA = nullptr;
@@ -221,8 +221,8 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
221221
auto sizeBatchBuffer = currentOffset - batchBuffer.startOffset;
222222

223223
std::unique_ptr<void, std::function<void(void *)>> flatBatchBuffer(nullptr, [&](void *ptr) { this->getMemoryManager()->alignedFreeWrapper(ptr); });
224-
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get() && (this->dispatchMode == CommandStreamReceiver::DispatchMode::ImmediateDispatch)) {
225-
flatBatchBuffer.reset(flattenBatchBuffer(batchBuffer, sizeBatchBuffer));
224+
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
225+
flatBatchBuffer.reset(this->flatBatchBufferHelper->flattenBatchBuffer(batchBuffer, sizeBatchBuffer, this->dispatchMode));
226226
if (flatBatchBuffer.get() != nullptr) {
227227
pBatchBuffer = flatBatchBuffer.get();
228228
}
@@ -248,7 +248,7 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
248248
}
249249

250250
if (this->standalone) {
251-
if (this->dispatchMode == CommandStreamReceiver::DispatchMode::ImmediateDispatch) {
251+
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
252252
if (!DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
253253
makeResident(*batchBuffer.commandBufferAllocation);
254254
}
@@ -259,6 +259,7 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
259259
processResidency(allocationsForResidency);
260260
}
261261
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
262+
addGUCStartMessage(static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(pBatchBuffer)), engineType);
262263
addPatchInfoComments();
263264
}
264265

@@ -387,32 +388,13 @@ FlushStamp AUBCommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer
387388
return 0;
388389
}
389390

390-
template <typename GfxFamily>
391-
void *AUBCommandStreamReceiverHw<GfxFamily>::flattenBatchBuffer(BatchBuffer &batchBuffer, size_t &sizeBatchBuffer) {
392-
void *flatBatchBuffer = nullptr;
393-
394-
if (batchBuffer.chainedBatchBuffer) {
395-
batchBuffer.chainedBatchBuffer->setTypeAubNonWritable();
396-
auto sizeMainBatchBuffer = batchBuffer.chainedBatchBufferStartOffset - batchBuffer.startOffset;
397-
auto flatBatchBufferSize = alignUp(sizeMainBatchBuffer + batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), MemoryConstants::pageSize);
398-
flatBatchBuffer = this->getMemoryManager()->alignedMallocWrapper(flatBatchBufferSize, MemoryConstants::pageSize);
399-
UNRECOVERABLE_IF(flatBatchBuffer == nullptr);
400-
// Copy FLB
401-
memcpy_s(flatBatchBuffer, sizeMainBatchBuffer, ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset), sizeMainBatchBuffer);
402-
// Copy SLB
403-
memcpy_s(ptrOffset(flatBatchBuffer, sizeMainBatchBuffer), batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize(), batchBuffer.chainedBatchBuffer->getUnderlyingBuffer(), batchBuffer.chainedBatchBuffer->getUnderlyingBufferSize());
404-
sizeBatchBuffer = flatBatchBufferSize;
405-
}
406-
return flatBatchBuffer;
407-
}
408-
409391
template <typename GfxFamily>
410392
bool AUBCommandStreamReceiverHw<GfxFamily>::addPatchInfoComments() {
411393
std::map<uint64_t, uint64_t> allocationsMap;
412394

413395
std::ostringstream str;
414396
str << "PatchInfoData" << std::endl;
415-
for (auto &patchInfoData : this->patchInfoCollection) {
397+
for (auto &patchInfoData : this->flatBatchBufferHelper->getPatchInfoCollection()) {
416398
str << std::hex << patchInfoData.sourceAllocation << ";";
417399
str << std::hex << patchInfoData.sourceAllocationOffset << ";";
418400
str << std::hex << patchInfoData.sourceType << ";";
@@ -432,7 +414,7 @@ bool AUBCommandStreamReceiverHw<GfxFamily>::addPatchInfoComments() {
432414
}
433415
}
434416
bool result = stream->addComment(str.str().c_str());
435-
this->patchInfoCollection.clear();
417+
this->flatBatchBufferHelper->getPatchInfoCollection().clear();
436418
if (!result) {
437419
return false;
438420
}
@@ -547,8 +529,41 @@ void AUBCommandStreamReceiverHw<GfxFamily>::addContextToken() {
547529
}
548530

549531
template <typename GfxFamily>
550-
bool AUBCommandStreamReceiverHw<GfxFamily>::setPatchInfoData(PatchInfoData &data) {
551-
patchInfoCollection.push_back(data);
552-
return true;
532+
void AUBCommandStreamReceiverHw<GfxFamily>::addGUCStartMessage(uint64_t batchBufferAddress, EngineType engineType) {
533+
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
534+
535+
auto bufferSize = sizeof(uint32_t) + sizeof(MI_BATCH_BUFFER_START);
536+
537+
std::unique_ptr<void, std::function<void(void *)>> buffer(this->getMemoryManager()->alignedMallocWrapper(bufferSize, MemoryConstants::pageSize), [&](void *ptr) { this->getMemoryManager()->alignedFreeWrapper(ptr); });
538+
LinearStream linearStream(buffer.get(), bufferSize);
539+
540+
uint32_t *header = static_cast<uint32_t *>(linearStream.getSpace(sizeof(uint32_t)));
541+
*header = getGUCWorkQueueItemHeader(engineType);
542+
543+
MI_BATCH_BUFFER_START *miBatchBufferStart = linearStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
544+
DEBUG_BREAK_IF(bufferSize != linearStream.getUsed());
545+
miBatchBufferStart->init();
546+
miBatchBufferStart->setBatchBufferStartAddressGraphicsaddress472(AUB::ptrToPPGTT(buffer.get()));
547+
miBatchBufferStart->setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
548+
549+
auto physBufferAddres = ppgtt.map(reinterpret_cast<uintptr_t>(buffer.get()), bufferSize);
550+
AUB::reserveAddressPPGTT(*stream, reinterpret_cast<uintptr_t>(buffer.get()), bufferSize, physBufferAddres);
551+
552+
AUB::addMemoryWrite(
553+
*stream,
554+
physBufferAddres,
555+
buffer.get(),
556+
bufferSize,
557+
AubMemDump::AddressSpaceValues::TraceNonlocal);
558+
559+
PatchInfoData patchInfoData(batchBufferAddress, 0u, PatchInfoAllocationType::Default, reinterpret_cast<uintptr_t>(buffer.get()), sizeof(uint32_t) + sizeof(MI_BATCH_BUFFER_START) - sizeof(uint64_t), PatchInfoAllocationType::GUCStartMessage);
560+
this->flatBatchBufferHelper->setPatchInfoData(patchInfoData);
553561
}
562+
563+
template <typename GfxFamily>
564+
uint32_t AUBCommandStreamReceiverHw<GfxFamily>::getGUCWorkQueueItemHeader(EngineType engineType) {
565+
uint32_t GUCWorkQueueItemHeader = 0x00030001;
566+
return GUCWorkQueueItemHeader;
567+
}
568+
554569
} // namespace OCLRT

runtime/command_stream/command_stream_receiver.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ MemoryManager *CommandStreamReceiver::getMemoryManager() {
127127

128128
void CommandStreamReceiver::setMemoryManager(MemoryManager *mm) {
129129
memoryManager = mm;
130+
if (flatBatchBufferHelper) {
131+
flatBatchBufferHelper->setMemoryManager(mm);
132+
}
130133
}
131134

132135
LinearStream &CommandStreamReceiver::getCS(size_t minRequiredSize) {

runtime/command_stream/command_stream_receiver.h

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "runtime/helpers/completion_stamp.h"
2828
#include "runtime/helpers/aligned_memory.h"
2929
#include "runtime/helpers/address_patch.h"
30+
#include "runtime/helpers/flat_batch_buffer_helper.h"
3031
#include "runtime/command_stream/csr_definitions.h"
3132
#include <cstddef>
3233
#include <cstdint>
@@ -40,16 +41,16 @@ class MemoryManager;
4041
class OSInterface;
4142
class GraphicsAllocation;
4243

44+
enum class DispatchMode {
45+
DeviceDefault = 0, //default for given device
46+
ImmediateDispatch, //everything is submitted to the HW immediately
47+
AdaptiveDispatch, //dispatching is handled to async thread, which combines batch buffers basing on load (not implemented)
48+
BatchedDispatchWithCounter, //dispatching is batched, after n commands there is implicit flush (not implemented)
49+
BatchedDispatch // dispatching is batched, explicit clFlush is required
50+
};
51+
4352
class CommandStreamReceiver {
4453
public:
45-
enum DispatchMode {
46-
DeviceDefault = 0, //default for given device
47-
ImmediateDispatch, //everything is submitted to the HW immediately
48-
AdaptiveDispatch, //dispatching is handled to async thread, which combines batch buffers basing on load (not implemented)
49-
BatchedDispatchWithCounter, //dispatching is batched, after n commands there is implicit flush (not implemented)
50-
BatchedDispatch // dispatching is batched, explicit clFlush is required
51-
};
52-
5354
enum class SamplerCacheFlushState {
5455
samplerCacheFlushNotRequired,
5556
samplerCacheFlushBefore, //add sampler cache flush before Walker with redescribed image
@@ -102,7 +103,7 @@ class CommandStreamReceiver {
102103

103104
uint32_t peekLatestFlushedTaskCount() const { return latestFlushedTaskCount; }
104105

105-
void overrideDispatchPolicy(CommandStreamReceiver::DispatchMode overrideValue) { this->dispatchMode = overrideValue; }
106+
void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; }
106107

107108
virtual void overrideMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; }
108109

@@ -122,8 +123,8 @@ class CommandStreamReceiver {
122123

123124
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }
124125

125-
// Collect patch info data
126-
virtual bool setPatchInfoData(PatchInfoData &data) { return false; }
126+
FlatBatchBufferHelper &getFlatBatchBufferHelper() { return *flatBatchBufferHelper.get(); }
127+
void overwriteFlatBatchBufferHelper(FlatBatchBufferHelper *newHelper) { flatBatchBufferHelper.reset(newHelper); }
127128

128129
protected:
129130
void setDisableL3Cache(bool val) {
@@ -167,11 +168,12 @@ class CommandStreamReceiver {
167168
std::unique_ptr<OSInterface> osInterface;
168169
std::unique_ptr<SubmissionAggregator> submissionAggregator;
169170

170-
DispatchMode dispatchMode = ImmediateDispatch;
171+
DispatchMode dispatchMode = DispatchMode::ImmediateDispatch;
171172
bool disableL3Cache = false;
172173
uint32_t requiredScratchSize = 0;
173174
uint64_t totalMemoryUsed = 0u;
174175
SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired;
176+
std::unique_ptr<FlatBatchBufferHelper> flatBatchBufferHelper;
175177
};
176178

177179
typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(const HardwareInfo &hwInfoIn, bool withAubDump);

runtime/command_stream/command_stream_receiver_hw.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
5555
int getRequiredPipeControlSize() const;
5656

5757
static void addBatchBufferEnd(LinearStream &commandStream, void **patchLocation);
58-
static void addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress);
58+
void addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress);
5959
static void alignToCacheLine(LinearStream &commandStream);
6060

6161
size_t getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags);
@@ -71,13 +71,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
7171
const HardwareInfo &peekHwInfo() const { return hwInfo; }
7272

7373
void collectStateBaseAddresPatchInfo(
74-
uint64_t baseAddress,
74+
uint64_t commandBufferAddress,
7575
uint64_t commandOffset,
7676
const LinearStream &dsh,
7777
const LinearStream &ioh,
7878
const LinearStream &ssh,
79-
uint64_t generalStateBase,
80-
uint64_t internalHeapBaseAddress);
79+
uint64_t generalStateBase);
8180

8281
void resetKmdNotifyHelper(KmdNotifyHelper *newHelper);
8382

0 commit comments

Comments
 (0)