Skip to content

Commit afceaa6

Browse files
Use system fence only when using system allocations or system scope event
Related-To: NEO-6959 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
1 parent 9fa1c9d commit afceaa6

File tree

14 files changed

+1001
-245
lines changed

14 files changed

+1001
-245
lines changed

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ struct CmdListKernelLaunchParams {
3333
bool isCooperative = false;
3434
bool isKernelSplitOperation = false;
3535
bool isBuiltInKernel = false;
36+
bool isDestinationAllocationInSystemMemory = false;
3637
};
3738

3839
struct CommandList : _ze_command_list_handle_t {

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,8 +630,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
630630
ze_group_count_t functionArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY,
631631
pSrcRegion->depth / groupSizeZ};
632632

633+
auto dstAllocationType = allocationStruct.alloc->getAllocationType();
633634
CmdListKernelLaunchParams launchParams = {};
634635
launchParams.isBuiltInKernel = true;
636+
launchParams.isDestinationAllocationInSystemMemory =
637+
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
638+
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
635639
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs,
636640
hEvent, numWaitEvents, phWaitEvents, launchParams);
637641

@@ -862,9 +866,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
862866
uint32_t groups = static_cast<uint32_t>((size + ((static_cast<uint64_t>(groupSizeX) * elementSize) - 1)) / (static_cast<uint64_t>(groupSizeX) * elementSize));
863867
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
864868

869+
auto dstAllocationType = dstPtrAlloc->getAllocationType();
865870
CmdListKernelLaunchParams launchParams = {};
866871
launchParams.isKernelSplitOperation = true;
867872
launchParams.isBuiltInKernel = true;
873+
launchParams.isDestinationAllocationInSystemMemory =
874+
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
875+
(dstAllocationType == NEO::AllocationType::SVM_CPU) ||
876+
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
877+
868878
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, launchParams);
869879
}
870880

@@ -1298,8 +1308,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
12981308
builtinFunction->setArgumentValue(4, sizeof(srcPitches), &srcPitches);
12991309
builtinFunction->setArgumentValue(5, sizeof(dstPitches), &dstPitches);
13001310

1311+
auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType();
13011312
CmdListKernelLaunchParams launchParams = {};
13021313
launchParams.isBuiltInKernel = true;
1314+
launchParams.isDestinationAllocationInSystemMemory =
1315+
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
1316+
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
13031317
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents,
13041318
phWaitEvents, launchParams);
13051319
}
@@ -1354,8 +1368,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
13541368
builtinFunction->setArgumentValue(4, sizeof(srcPitch), &srcPitch);
13551369
builtinFunction->setArgumentValue(5, sizeof(dstPitch), &dstPitch);
13561370

1371+
auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType();
13571372
CmdListKernelLaunchParams launchParams = {};
13581373
launchParams.isBuiltInKernel = true;
1374+
launchParams.isDestinationAllocationInSystemMemory =
1375+
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
1376+
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
13591377
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(),
13601378
&dispatchFuncArgs, hSignalEvent,
13611379
numWaitEvents,
@@ -1429,6 +1447,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
14291447
CmdListKernelLaunchParams launchParams = {};
14301448
launchParams.isKernelSplitOperation = true;
14311449
launchParams.isBuiltInKernel = true;
1450+
launchParams.isDestinationAllocationInSystemMemory = hostPointerNeedsFlush;
14321451

14331452
if (patternSize == 1) {
14341453
Kernel *builtinFunction = nullptr;
@@ -2129,8 +2148,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
21292148
builtinFunction->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
21302149
builtinFunction->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc);
21312150

2151+
auto dstAllocationType = dstPtrAllocationStruct.alloc->getAllocationType();
21322152
CmdListKernelLaunchParams launchParams = {};
21332153
launchParams.isBuiltInKernel = true;
2154+
launchParams.isDestinationAllocationInSystemMemory =
2155+
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
2156+
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
21342157
auto appendResult = appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents,
21352158
phWaitEvents, launchParams);
21362159
if (appendResult != ZE_RESULT_SUCCESS) {

level_zero/core/source/cmdlist/cmdlist_hw_base.inl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
131131
this->containsStatelessUncachedResource, // requiresUncachedMocs
132132
false, // useGlobalAtomics
133133
internalUsage, // isInternal
134-
launchParams.isCooperative // isCooperative
134+
launchParams.isCooperative, // isCooperative
135+
false, // isHostScopeSignalEvent
136+
false // isKernelUsingSystemAllocation
135137
};
136138

137139
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
157157
uint64_t eventAddress = 0;
158158
bool isTimestampEvent = false;
159159
bool l3FlushEnable = false;
160+
bool isHostSignalScopeEvent = false;
160161
if (hEvent) {
161162
auto event = Event::fromHandle(hEvent);
162163
eventAlloc = &event->getAllocation(this->device);
@@ -166,6 +167,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
166167
l3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(flushRequired, hwInfo);
167168
isTimestampEvent = event->isUsingContextEndOffset();
168169
eventAddress = event->getPacketAddress(this->device);
170+
isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
171+
}
172+
173+
bool isKernelUsingSystemAllocation = false;
174+
if (!launchParams.isBuiltInKernel) {
175+
auto &kernelAllocations = kernel->getResidencyContainer();
176+
for (auto &allocation : kernelAllocations) {
177+
if (allocation == nullptr) {
178+
continue;
179+
}
180+
if (allocation->getAllocationType() == NEO::AllocationType::BUFFER_HOST_MEMORY) {
181+
isKernelUsingSystemAllocation = true;
182+
}
183+
}
184+
} else {
185+
isKernelUsingSystemAllocation = launchParams.isDestinationAllocationInSystemMemory;
169186
}
170187

171188
if (kernel->hasIndirectAllocationsAllowed()) {
@@ -176,6 +193,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
176193
}
177194
if (unifiedMemoryControls.indirectHostAllocationsAllowed) {
178195
this->unifiedMemoryControls.indirectHostAllocationsAllowed = true;
196+
isKernelUsingSystemAllocation = true;
179197
}
180198
if (unifiedMemoryControls.indirectSharedAllocationsAllowed) {
181199
this->unifiedMemoryControls.indirectSharedAllocationsAllowed = true;
@@ -227,7 +245,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
227245
this->containsStatelessUncachedResource, // requiresUncachedMocs
228246
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, // useGlobalAtomics
229247
internalUsage, // isInternal
230-
launchParams.isCooperative // isCooperative
248+
launchParams.isCooperative, // isCooperative
249+
isHostSignalScopeEvent, // isHostScopeSignalEvent
250+
isKernelUsingSystemAllocation // isKernelUsingSystemAllocation
231251
};
232252
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);
233253
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;

level_zero/core/test/unit_tests/mocks/mock_cmdlist.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,5 +458,53 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
458458
uint32_t executeCommandListImmediateWithFlushTaskCalledCount = 0;
459459
};
460460

461+
struct CmdListHelper {
462+
NEO::GraphicsAllocation *isaAllocation = nullptr;
463+
NEO::ResidencyContainer residencyContainer;
464+
ze_group_count_t threadGroupDimensions;
465+
const uint32_t *groupSize = nullptr;
466+
uint32_t useOnlyGlobalTimestamp = std::numeric_limits<uint32_t>::max();
467+
bool isBuiltin = false;
468+
bool isDstInSystem = false;
469+
};
470+
471+
template <GFXCORE_FAMILY gfxCoreFamily>
472+
class MockCommandListForAppendLaunchKernel : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
473+
474+
public:
475+
CmdListHelper cmdListHelper;
476+
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel,
477+
const ze_group_count_t *pThreadGroupDimensions,
478+
ze_event_handle_t hEvent,
479+
uint32_t numWaitEvents,
480+
ze_event_handle_t *phWaitEvents,
481+
const CmdListKernelLaunchParams &launchParams) override {
482+
483+
const auto kernel = Kernel::fromHandle(hKernel);
484+
cmdListHelper.isaAllocation = kernel->getIsaAllocation();
485+
cmdListHelper.residencyContainer = kernel->getResidencyContainer();
486+
cmdListHelper.groupSize = kernel->getGroupSize();
487+
cmdListHelper.threadGroupDimensions = *pThreadGroupDimensions;
488+
489+
auto kernelName = kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName;
490+
NEO::ArgDescriptor arg;
491+
if (kernelName == "QueryKernelTimestamps") {
492+
arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[2u];
493+
} else if (kernelName == "QueryKernelTimestampsWithOffsets") {
494+
arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[3u];
495+
} else {
496+
return ZE_RESULT_SUCCESS;
497+
}
498+
auto crossThreadData = kernel->getCrossThreadData();
499+
auto element = arg.as<NEO::ArgDescValue>().elements[0];
500+
auto pDst = ptrOffset(crossThreadData, element.offset);
501+
cmdListHelper.useOnlyGlobalTimestamp = *(uint32_t *)(pDst);
502+
cmdListHelper.isBuiltin = launchParams.isBuiltInKernel;
503+
cmdListHelper.isDstInSystem = launchParams.isDestinationAllocationInSystemMemory;
504+
505+
return ZE_RESULT_SUCCESS;
506+
}
507+
};
508+
461509
} // namespace ult
462510
} // namespace L0

0 commit comments

Comments
 (0)