Skip to content

Commit e07f9f0

Browse files
Add kernel algorithm to check any argument is using system memory
Related-To: NEO-6959 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
1 parent 5a3a39a commit e07f9f0

19 files changed

+794
-289
lines changed

opencl/source/api/api.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4898,37 +4898,37 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
48984898
const void *argValue) {
48994899
TRACING_ENTER(ClSetKernelArgSvmPointer, &kernel, &argIndex, &argValue);
49004900

4901-
MultiDeviceKernel *pMultiDeviceKernel = nullptr;
4901+
MultiDeviceKernel *multiDeviceKernel = nullptr;
49024902

4903-
auto retVal = validateObjects(withCastToInternal(kernel, &pMultiDeviceKernel));
4903+
auto retVal = validateObjects(withCastToInternal(kernel, &multiDeviceKernel));
49044904
API_ENTER(&retVal);
49054905

49064906
if (CL_SUCCESS != retVal) {
49074907
TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal);
49084908
return retVal;
49094909
}
49104910

4911-
if (argIndex >= pMultiDeviceKernel->getKernelArgsNumber()) {
4911+
if (argIndex >= multiDeviceKernel->getKernelArgsNumber()) {
49124912
retVal = CL_INVALID_ARG_INDEX;
49134913
TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal);
49144914
return retVal;
49154915
}
49164916

4917-
const auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager();
4917+
const auto svmManager = multiDeviceKernel->getContext().getSVMAllocsManager();
49184918

49194919
if (argValue != nullptr) {
4920-
if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 &&
4921-
pMultiDeviceKernel->getKernelArguments()[argIndex].value == argValue) {
4920+
if (multiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 &&
4921+
multiDeviceKernel->getKernelArguments()[argIndex].value == argValue) {
49224922
bool reuseFromCache = false;
49234923
const auto allocationsCounter = svmManager->allocationsCounter.load();
49244924
if (allocationsCounter > 0) {
4925-
if (allocationsCounter == pMultiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) {
4925+
if (allocationsCounter == multiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) {
49264926
reuseFromCache = true;
49274927
} else {
49284928
const auto svmData = svmManager->getSVMAlloc(argValue);
4929-
if (svmData && pMultiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId()) {
4929+
if (svmData && multiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId()) {
49304930
reuseFromCache = true;
4931-
pMultiDeviceKernel->storeKernelArgAllocIdMemoryManagerCounter(argIndex, allocationsCounter);
4931+
multiDeviceKernel->storeKernelArgAllocIdMemoryManagerCounter(argIndex, allocationsCounter);
49324932
}
49334933
}
49344934
if (reuseFromCache) {
@@ -4938,15 +4938,15 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
49384938
}
49394939
}
49404940
} else {
4941-
if (pMultiDeviceKernel->getKernelArguments()[argIndex].isSetToNullptr) {
4941+
if (multiDeviceKernel->getKernelArguments()[argIndex].isSetToNullptr) {
49424942
TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal);
49434943
return CL_SUCCESS;
49444944
}
49454945
}
49464946

49474947
DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue);
49484948

4949-
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
4949+
for (const auto &pDevice : multiDeviceKernel->getDevices()) {
49504950
const HardwareInfo &hwInfo = pDevice->getHardwareInfo();
49514951
if (!hwInfo.capabilityTable.ftrSvm) {
49524952
retVal = CL_INVALID_OPERATION;
@@ -4955,8 +4955,8 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
49554955
}
49564956
}
49574957

4958-
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
4959-
auto pKernel = pMultiDeviceKernel->getKernel(pDevice->getRootDeviceIndex());
4958+
for (const auto &pDevice : multiDeviceKernel->getDevices()) {
4959+
auto pKernel = multiDeviceKernel->getKernel(pDevice->getRootDeviceIndex());
49604960
cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo()
49614961
.kernelDescriptor.payloadMappings.explicitArgs[argIndex]
49624962
.getTraits()
@@ -4969,25 +4969,25 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
49694969
}
49704970
}
49714971

4972-
MultiGraphicsAllocation *pSvmAllocs = nullptr;
4972+
MultiGraphicsAllocation *svmAllocs = nullptr;
49734973
uint32_t allocId = 0u;
49744974
if (argValue != nullptr) {
49754975
auto svmData = svmManager->getSVMAlloc(argValue);
49764976
if (svmData == nullptr) {
4977-
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
4977+
for (const auto &pDevice : multiDeviceKernel->getDevices()) {
49784978
if (!pDevice->areSharedSystemAllocationsAllowed()) {
49794979
retVal = CL_INVALID_ARG_VALUE;
49804980
TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal);
49814981
return retVal;
49824982
}
49834983
}
49844984
} else {
4985-
pSvmAllocs = &svmData->gpuAllocations;
4985+
svmAllocs = &svmData->gpuAllocations;
49864986
allocId = svmData->getAllocId();
49874987
}
49884988
}
49894989

4990-
retVal = pMultiDeviceKernel->setArgSvmAlloc(argIndex, const_cast<void *>(argValue), pSvmAllocs, allocId);
4990+
retVal = multiDeviceKernel->setArgSvmAlloc(argIndex, const_cast<void *>(argValue), svmAllocs, allocId);
49914991
TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal);
49924992
return retVal;
49934993
}

opencl/source/command_queue/enqueue_common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
447447
dispatchWalkerArgs.timestampPacketDependencies = &timestampPacketDependencies;
448448
dispatchWalkerArgs.currentTimestampPacketNodes = timestampPacketContainer.get();
449449
dispatchWalkerArgs.commandType = commandType;
450+
dispatchWalkerArgs.event = event;
450451

451452
HardwareInterface<GfxFamily>::dispatchWalker(
452453
*this,

opencl/source/command_queue/hardware_interface.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ namespace NEO {
1616

1717
class CommandQueue;
1818
class DispatchInfo;
19+
class Event;
1920
class IndirectHeap;
2021
class Kernel;
2122
class LinearStream;
@@ -37,6 +38,7 @@ struct HardwareInterfaceWalkerArgs {
3738
const Vec3<size_t> *numberOfWorkgroups = nullptr;
3839
const Vec3<size_t> *startOfWorkgroups = nullptr;
3940
KernelOperation *blockedCommandsData = nullptr;
41+
Event *event = nullptr;
4042
size_t currentDispatchIndex = 0;
4143
size_t offsetInterfaceDescriptorTable = 0;
4244
PreemptionMode preemptionMode = PreemptionMode::Initial;

opencl/source/kernel/kernel.cpp

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ cl_int Kernel::cloneKernel(Kernel *pSourceKernel) {
362362
break;
363363
case SVM_OBJ:
364364
setArgSvm(i, pSourceKernel->getKernelArgInfo(i).size, const_cast<void *>(pSourceKernel->getKernelArgInfo(i).value),
365-
pSourceKernel->getKernelArgInfo(i).pSvmAlloc, pSourceKernel->getKernelArgInfo(i).svmFlags);
365+
pSourceKernel->getKernelArgInfo(i).svmAllocation, pSourceKernel->getKernelArgInfo(i).svmFlags);
366366
break;
367367
case SVM_ALLOC_OBJ:
368368
setArgSvmAlloc(i, const_cast<void *>(pSourceKernel->getKernelArgInfo(i).value),
@@ -881,8 +881,10 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G
881881
patchedArgumentsNum++;
882882
kernelArguments[argIndex].isPatched = true;
883883
}
884+
if (svmPtr != nullptr) {
885+
this->anyKernelArgumentUsingSystemMemory |= true;
886+
}
884887
addAllocationToCacheFlushVector(argIndex, svmAlloc);
885-
886888
return CL_SUCCESS;
887889
}
888890

@@ -894,6 +896,8 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
894896
auto patchLocation = ptrOffset(getCrossThreadData(), argAsPtr.stateless);
895897
patchWithRequiredSize(patchLocation, argAsPtr.pointerSize, reinterpret_cast<uintptr_t>(svmPtr));
896898

899+
auto &kernelArgInfo = kernelArguments[argIndex];
900+
897901
bool disableL3 = false;
898902
bool forceNonAuxMode = false;
899903
bool isAuxTranslationKernel = (AuxTranslationDirection::None != auxTranslationDirection);
@@ -910,7 +914,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
910914
forceNonAuxMode = true;
911915
}
912916

913-
bool argWasUncacheable = kernelArguments[argIndex].isStatelessUncacheable;
917+
bool argWasUncacheable = kernelArgInfo.isStatelessUncacheable;
914918
bool argIsUncacheable = svmAlloc ? svmAlloc->isUncacheable() : false;
915919
statelessUncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0);
916920

@@ -929,15 +933,21 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
929933
}
930934

931935
storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t));
932-
kernelArguments[argIndex].allocId = allocId;
933-
kernelArguments[argIndex].allocIdMemoryManagerCounter = allocId ? this->getContext().getSVMAllocsManager()->allocationsCounter.load() : 0u;
934-
kernelArguments[argIndex].isSetToNullptr = nullptr == svmPtr;
935-
if (!kernelArguments[argIndex].isPatched) {
936+
kernelArgInfo.allocId = allocId;
937+
kernelArgInfo.allocIdMemoryManagerCounter = allocId ? this->getContext().getSVMAllocsManager()->allocationsCounter.load() : 0u;
938+
kernelArgInfo.isSetToNullptr = nullptr == svmPtr;
939+
if (!kernelArgInfo.isPatched) {
936940
patchedArgumentsNum++;
937-
kernelArguments[argIndex].isPatched = true;
941+
kernelArgInfo.isPatched = true;
942+
}
943+
if (!kernelArgInfo.isSetToNullptr) {
944+
if (svmAlloc != nullptr) {
945+
this->anyKernelArgumentUsingSystemMemory |= graphicsAllocationTypeUseSystemMemory(svmAlloc->getAllocationType());
946+
} else {
947+
this->anyKernelArgumentUsingSystemMemory |= true;
948+
}
938949
}
939950
addAllocationToCacheFlushVector(argIndex, svmAlloc);
940-
941951
return CL_SUCCESS;
942952
}
943953

@@ -948,7 +958,7 @@ void Kernel::storeKernelArg(uint32_t argIndex, kernelArgType argType, void *argO
948958
kernelArguments[argIndex].object = argObject;
949959
kernelArguments[argIndex].value = argValue;
950960
kernelArguments[argIndex].size = argSize;
951-
kernelArguments[argIndex].pSvmAlloc = argSvmAlloc;
961+
kernelArguments[argIndex].svmAllocation = argSvmAlloc;
952962
kernelArguments[argIndex].svmFlags = argSvmFlags;
953963
}
954964

@@ -1391,8 +1401,12 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
13911401
storeKernelArg(argIndex, BUFFER_OBJ, clMemObj, argVal, argSize);
13921402

13931403
auto buffer = castToObject<Buffer>(clMemObj);
1394-
if (!buffer)
1404+
if (!buffer) {
13951405
return CL_INVALID_MEM_OBJECT;
1406+
}
1407+
1408+
auto gfxAllocationType = buffer->getGraphicsAllocation(rootDeviceIndex)->getAllocationType();
1409+
this->anyKernelArgumentUsingSystemMemory |= graphicsAllocationTypeUseSystemMemory(gfxAllocationType);
13961410

13971411
if (buffer->peekSharingHandler()) {
13981412
usingSharedObjArgs = true;
@@ -1449,7 +1463,6 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
14491463
}
14501464

14511465
addAllocationToCacheFlushVector(argIndex, allocationForCacheFlush);
1452-
14531466
return CL_SUCCESS;
14541467
} else {
14551468
storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize);
@@ -2237,4 +2250,11 @@ int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) {
22372250
return CL_SUCCESS;
22382251
}
22392252

2253+
bool Kernel::graphicsAllocationTypeUseSystemMemory(AllocationType type) {
2254+
return (type == AllocationType::BUFFER_HOST_MEMORY) ||
2255+
(type == AllocationType::EXTERNAL_HOST_PTR) ||
2256+
(type == AllocationType::SVM_CPU) ||
2257+
(type == AllocationType::SVM_ZERO_COPY);
2258+
}
2259+
22402260
} // namespace NEO

0 commit comments

Comments
 (0)