Skip to content

Commit 9bdf014

Browse files
[20/n] Internal 4GB allocator.
- Switch to internal heap for kernel ISA allocations. - remove IH from various functions - remove IHState from CSR , IH is never dirty - ISA is no longer copied on enqueue calls. Change-Id: I0099cf2a9ebab6192ea03a74dd35f7da963fd5a5
1 parent 9f07de3 commit 9bdf014

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+124
-318
lines changed

runtime/command_queue/dispatch_walker.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -579,8 +579,6 @@ void dispatchWalker(
579579
auto offsetCrossThreadData = KernelCommandsHelper<GfxFamily>::sendIndirectState(
580580
*commandStream,
581581
*dsh,
582-
*ish,
583-
cmdQInstructionHeapReservedBlockSize,
584582
*ioh,
585583
*ssh,
586584
kernel,
@@ -750,8 +748,6 @@ void dispatchScheduler(
750748
auto offsetCrossThreadData = KernelCommandsHelper<GfxFamily>::sendIndirectState(
751749
*commandStream,
752750
*dsh,
753-
*ish,
754-
0,
755751
*ioh,
756752
*ssh,
757753
scheduler,

runtime/command_queue/enqueue_common.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
568568
commandStream,
569569
commandStreamStart,
570570
*dsh,
571-
getIndirectHeap(IndirectHeap::INSTRUCTION),
572571
*ioh,
573572
getIndirectHeap(IndirectHeap::SURFACE_STATE),
574573
taskLevel,

runtime/command_stream/command_stream_receiver.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,7 @@ class CommandStreamReceiver {
6060
virtual FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) = 0;
6161

6262
virtual CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
63-
const LinearStream &dsh, const LinearStream &ih,
64-
const LinearStream &ioh, const LinearStream &ssh,
63+
const LinearStream &dsh, const LinearStream &ioh, const LinearStream &ssh,
6564
uint32_t taskLevel, DispatchFlags &dispatchFlags) = 0;
6665

6766
virtual void flushBatchedSubmissions() = 0;

runtime/command_stream/command_stream_receiver_hw.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
4646
FlushStamp flush(BatchBuffer &batchBuffer, EngineType engineType, ResidencyContainer *allocationsForResidency) override;
4747

4848
CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
49-
const LinearStream &dsh, const LinearStream &ih,
50-
const LinearStream &ioh, const LinearStream &ssh,
49+
const LinearStream &dsh, const LinearStream &ioh, const LinearStream &ssh,
5150
uint32_t taskLevel, DispatchFlags &dispatchFlags) override;
5251

5352
void flushBatchedSubmissions() override;
@@ -72,10 +71,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
7271
uint64_t baseAddress,
7372
uint64_t commandOffset,
7473
const LinearStream &dsh,
75-
const LinearStream &ih,
7674
const LinearStream &ioh,
7775
const LinearStream &ssh,
78-
uint64_t generalStateBase);
76+
uint64_t generalStateBase,
77+
uint64_t internalHeapBaseAddress);
7978

8079
protected:
8180
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);
@@ -96,7 +95,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
9695
static void emitNoop(LinearStream &commandStream, size_t bytesToUpdate);
9796

9897
HeapDirtyState dshState;
99-
HeapDirtyState ihState;
10098
HeapDirtyState iohState;
10199
HeapDirtyState sshState;
102100

runtime/command_stream/command_stream_receiver_hw.inl

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
109109
LinearStream &commandStreamTask,
110110
size_t commandStreamStartTask,
111111
const LinearStream &dsh,
112-
const LinearStream &ih,
113112
const LinearStream &ioh,
114113
const LinearStream &ssh,
115114
uint32_t taskLevel,
@@ -229,11 +228,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
229228
programVFEState(commandStreamCSR, dispatchFlags);
230229

231230
bool dshDirty = dshState.updateAndCheck(&dsh);
232-
bool ihDirty = ihState.updateAndCheck(&ih);
233231
bool iohDirty = iohState.updateAndCheck(&ioh);
234232
bool sshDirty = sshState.updateAndCheck(&ssh);
235233

236-
auto isStateBaseAddressDirty = dshDirty || ihDirty || iohDirty || sshDirty || stateBaseAddressDirty;
234+
auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty;
237235

238236
auto requiredL3Index = CacheSettings::l3CacheOn;
239237
if (this->disableL3Cache) {
@@ -265,16 +263,16 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
265263
StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
266264
commandStreamCSR,
267265
dsh,
268-
ih,
269266
ioh,
270267
ssh,
271268
newGSHbase,
272-
requiredL3Index);
269+
requiredL3Index,
270+
memoryManager->getInternalHeapBaseAddress());
273271

274272
latestSentStatelessMocsConfig = requiredL3Index;
275273

276274
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
277-
collectStateBaseAddresPatchInfo(commandStream.getGpuBase(), stateBaseAddressCmdOffset, dsh, ih, ioh, ssh, newGSHbase);
275+
collectStateBaseAddresPatchInfo(commandStream.getGpuBase(), stateBaseAddressCmdOffset, dsh, ioh, ssh, newGSHbase, memoryManager->getInternalHeapBaseAddress());
278276
}
279277
}
280278

@@ -299,12 +297,10 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
299297
}
300298

301299
auto dshAllocation = dsh.getGraphicsAllocation();
302-
auto ihAllocation = ih.getGraphicsAllocation();
303300
auto iohAllocation = ioh.getGraphicsAllocation();
304301
auto sshAllocation = ssh.getGraphicsAllocation();
305302

306303
this->makeResident(*dshAllocation);
307-
this->makeResident(*ihAllocation);
308304
this->makeResident(*iohAllocation);
309305
this->makeResident(*sshAllocation);
310306

@@ -650,18 +646,18 @@ void CommandStreamReceiverHw<GfxFamily>::collectStateBaseAddresPatchInfo(
650646
uint64_t baseAddress,
651647
uint64_t commandOffset,
652648
const LinearStream &dsh,
653-
const LinearStream &ih,
654649
const LinearStream &ioh,
655650
const LinearStream &ssh,
656-
uint64_t generalStateBase) {
651+
uint64_t generalStateBase,
652+
uint64_t internalHeapOffset) {
657653

658654
typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
659655

660656
PatchInfoData dynamicStatePatchInfo = {dsh.getGpuBase(), 0u, PatchInfoAllocationType::DynamicStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::DYNAMICSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
661657
PatchInfoData generalStatePatchInfo = {generalStateBase, 0u, PatchInfoAllocationType::GeneralStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::GENERALSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
662658
PatchInfoData surfaceStatePatchInfo = {ssh.getGpuBase(), 0u, PatchInfoAllocationType::SurfaceStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::SURFACESTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
663659
PatchInfoData indirectObjectPatchInfo = {ioh.getGpuBase(), 0u, PatchInfoAllocationType::IndirectObjectHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INDIRECTOBJECTBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
664-
PatchInfoData instructionPatchInfo = {ih.getGpuBase(), 0u, PatchInfoAllocationType::InstructionHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INSTRUCTIONBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
660+
PatchInfoData instructionPatchInfo = {internalHeapOffset, 0u, PatchInfoAllocationType::InstructionHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INSTRUCTIONBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
665661

666662
setPatchInfoData(dynamicStatePatchInfo);
667663
setPatchInfoData(generalStatePatchInfo);

runtime/device_queue/device_queue_hw.inl

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,11 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &instructionHeap,
325325
for (uint32_t i = 0; i < blockCount; i++) {
326326
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
327327

328+
auto blockAllocation = pBlockInfo->getGraphicsAllocation();
329+
DEBUG_BREAK_IF(!blockAllocation);
330+
331+
auto gpuAddress = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu;
332+
328333
auto bindingTableCount = pBlockInfo->patchInfo.bindingTableState->Count;
329334
maxBindingTableCount = std::max(maxBindingTableCount, bindingTableCount);
330335

@@ -336,17 +341,14 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &instructionHeap,
336341

337342
// Determine SIMD size
338343
uint32_t simd = pBlockInfo->getMaxSimdSize();
339-
// Copy the kernel over to the ISH
340-
uint64_t kernelStartOffset = (uint64_t)KernelCommandsHelper<GfxFamily>::copyKernelBinary(instructionHeap, *pBlockInfo);
341-
342344
DEBUG_BREAK_IF(pBlockInfo->patchInfo.interfaceDescriptorData == nullptr);
343345

344346
uint32_t idOffset = pBlockInfo->patchInfo.interfaceDescriptorData->Offset;
345347
const INTERFACE_DESCRIPTOR_DATA *pBlockID = static_cast<const INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(pBlockInfo->heapInfo.pDsh, idOffset));
346348

347349
pIDDestination[blockIndex + i] = *pBlockID;
348-
pIDDestination[blockIndex + i].setKernelStartPointerHigh(kernelStartOffset >> 32);
349-
pIDDestination[blockIndex + i].setKernelStartPointer((uint32_t)kernelStartOffset);
350+
pIDDestination[blockIndex + i].setKernelStartPointerHigh(gpuAddress >> 32);
351+
pIDDestination[blockIndex + i].setKernelStartPointer((uint32_t)gpuAddress);
350352
pIDDestination[blockIndex + i].setBarrierEnable(pBlockInfo->patchInfo.executionEnvironment->HasBarriers > 0);
351353
pIDDestination[blockIndex + i].setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL);
352354

runtime/helpers/kernel_commands.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,6 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
4545

4646
static uint32_t computeSlmValues(uint32_t valueIn);
4747

48-
static size_t copyKernelBinary(
49-
IndirectHeap &indirectHeap,
50-
const KernelInfo &kernelInfo);
51-
5248
static size_t sendInterfaceDescriptorData(
5349
const IndirectHeap &indirectHeap,
5450
uint64_t offsetInterfaceDescriptor,
@@ -96,8 +92,6 @@ struct KernelCommandsHelper : public PerThreadDataHelper {
9692
static size_t sendIndirectState(
9793
LinearStream &commandStream,
9894
IndirectHeap &dsh,
99-
IndirectHeap &ih,
100-
size_t ihReservedBlockSize,
10195
IndirectHeap &ioh,
10296
IndirectHeap &ssh,
10397
Kernel &kernel,

runtime/helpers/kernel_commands.inl

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -144,24 +144,6 @@ size_t KernelCommandsHelper<GfxFamily>::getTotalSizeRequiredSSH(
144144
return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel()); });
145145
}
146146

147-
template <typename GfxFamily>
148-
size_t KernelCommandsHelper<GfxFamily>::copyKernelBinary(
149-
IndirectHeap &indirectHeap,
150-
const KernelInfo &kernelInfo) {
151-
const auto alignKernelBinary = 64 * sizeof(uint8_t);
152-
indirectHeap.align(alignKernelBinary);
153-
154-
auto kernelStartOffset = indirectHeap.getUsed();
155-
156-
auto pKernelHeap = kernelInfo.heapInfo.pKernelHeap;
157-
auto kernelHeapSize = kernelInfo.heapInfo.pKernelHeader->KernelHeapSize;
158-
159-
auto pKernelDataDst = indirectHeap.getSpace(kernelHeapSize);
160-
memcpy_s(pKernelDataDst, kernelHeapSize, pKernelHeap, kernelHeapSize);
161-
162-
return kernelStartOffset;
163-
}
164-
165147
template <typename GfxFamily>
166148
size_t KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
167149
const IndirectHeap &indirectHeap,
@@ -331,8 +313,6 @@ template <typename GfxFamily>
331313
size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
332314
LinearStream &commandStream,
333315
IndirectHeap &dsh,
334-
IndirectHeap &ih,
335-
size_t ihReservedBlockSize,
336316
IndirectHeap &ioh,
337317
IndirectHeap &ssh,
338318
Kernel &kernel,
@@ -349,9 +329,14 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
349329
DEBUG_BREAK_IF(simd != 8 && simd != 16 && simd != 32);
350330

351331
// Copy the kernel over to the ISH
352-
auto kernelStartOffset = copyKernelBinary(ih, kernel.getKernelInfo());
353-
332+
auto kernelStartOffset = 0llu;
354333
const auto &kernelInfo = kernel.getKernelInfo();
334+
auto kernelAllocation = kernelInfo.getGraphicsAllocation();
335+
DEBUG_BREAK_IF(!kernelAllocation);
336+
if (kernelAllocation) {
337+
kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
338+
}
339+
355340
const auto &patchInfo = kernelInfo.patchInfo;
356341

357342
auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, kernel);
@@ -416,7 +401,7 @@ size_t KernelCommandsHelper<GfxFamily>::sendIndirectState(
416401
KernelCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
417402
dsh,
418403
offsetInterfaceDescriptor,
419-
kernelStartOffset + ihReservedBlockSize,
404+
kernelStartOffset,
420405
kernel.getCrossThreadDataSize(),
421406
sizePerThreadData,
422407
dstBindingTablePointer,

runtime/helpers/state_base_address.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ struct StateBaseAddressHelper {
3232
static void programStateBaseAddress(
3333
LinearStream &commandStream,
3434
const LinearStream &dsh,
35-
const LinearStream &ih,
3635
const LinearStream &ioh,
3736
const LinearStream &ssh,
3837
uint64_t generalStateBase,
39-
uint32_t statelessMocsIndex);
38+
uint32_t statelessMocsIndex,
39+
uint64_t internalHeapBase);
4040
};
4141
}

runtime/helpers/state_base_address.inl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ template <typename GfxFamily>
3131
void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
3232
LinearStream &commandStream,
3333
const LinearStream &dsh,
34-
const LinearStream &ih,
3534
const LinearStream &ioh,
3635
const LinearStream &ssh,
3736
uint64_t generalStateBase,
38-
uint32_t statelessMocsIndex) {
37+
uint32_t statelessMocsIndex,
38+
uint64_t internalHeapBase) {
3939
typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
4040

4141
auto pCmd = (STATE_BASE_ADDRESS *)commandStream.getSpace(sizeof(STATE_BASE_ADDRESS));
@@ -52,7 +52,7 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
5252
pCmd->setGeneralStateBaseAddress(generalStateBase);
5353
pCmd->setSurfaceStateBaseAddress(reinterpret_cast<uintptr_t>(ssh.getCpuBase()));
5454
pCmd->setIndirectObjectBaseAddress(reinterpret_cast<uintptr_t>(ioh.getCpuBase()));
55-
pCmd->setInstructionBaseAddress(reinterpret_cast<uintptr_t>(ih.getCpuBase()));
55+
pCmd->setInstructionBaseAddress(internalHeapBase);
5656

5757
pCmd->setDynamicStateBufferSizeModifyEnable(true);
5858
pCmd->setGeneralStateBufferSizeModifyEnable(true);
@@ -62,7 +62,8 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
6262
pCmd->setDynamicStateBufferSize(static_cast<uint32_t>((dsh.getMaxAvailableSpace() + MemoryConstants::pageMask) / MemoryConstants::pageSize));
6363
pCmd->setGeneralStateBufferSize(static_cast<uint32_t>(-1));
6464
pCmd->setIndirectObjectBufferSize(static_cast<uint32_t>((ioh.getMaxAvailableSpace() + MemoryConstants::pageMask) / MemoryConstants::pageSize));
65-
pCmd->setInstructionBufferSize(static_cast<uint32_t>((ih.getMaxAvailableSpace() + +MemoryConstants::pageMask) / MemoryConstants::pageSize));
65+
66+
pCmd->setInstructionBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
6667

6768
//set cache settings
6869
pCmd->setStatelessDataPortAccessMemoryObjectControlState(Gmm::getMOCS(statelessMocsIndex));

0 commit comments

Comments
 (0)