Skip to content

Commit 83c81b6

Browse files
OCL: Fix error for too big dimensions
- cases with null lws should only fail when computed lws sizes result in too big number of workgroups Related-To: NEO-6976 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com> Source: f77f470
1 parent 6540a6d commit 83c81b6

File tree

3 files changed

+74
-7
lines changed

3 files changed

+74
-7
lines changed

opencl/source/command_queue/enqueue_common.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,21 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCou
9494
}
9595
}
9696

97+
if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
98+
if (!multiDispatchInfo.empty()) {
99+
for (auto &dispatchInfo : multiDispatchInfo) {
100+
auto nwgs = dispatchInfo.getNumberOfWorkgroups();
101+
102+
for (auto i = 0u; i < workDim; i++) {
103+
uint64_t dimension = static_cast<uint64_t>(nwgs[i]);
104+
if (dimension > std::numeric_limits<uint32_t>::max()) {
105+
return CL_INVALID_GLOBAL_WORK_SIZE;
106+
}
107+
}
108+
}
109+
}
110+
}
111+
97112
if (AuxTranslationMode::Builtin == auxTranslationMode) {
98113
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
99114
}

opencl/source/command_queue/enqueue_kernel.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
131131
return CL_INVALID_WORK_GROUP_SIZE;
132132
}
133133

134-
for (auto i = 0u; i < workDim; i++) {
135-
uint64_t dimension = static_cast<uint64_t>(region[i]) / workGroupSize[i];
136-
if (dimension > std::numeric_limits<uint32_t>::max()) {
137-
return CL_INVALID_GLOBAL_WORK_SIZE;
138-
}
139-
}
140-
141134
return enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(
142135
surfaces,
143136
false,

opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,6 +1534,65 @@ TEST_F(EnqueueKernelTest, givenEnqueueCommandWithWorkDimsResultingInMoreThan32Bi
15341534
EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, status);
15351535
}
15361536

1537+
TEST_F(EnqueueKernelTest, givenEnqueueCommandWithNullLwsAndWorkDimsResultingInMoreThan32BitMaxGroupsWhenEnqueueNDRangeKernelIsCalledThenInvalidGlobalSizeIsReturned) {
1538+
1539+
if (sizeof(size_t) < 8) {
1540+
GTEST_SKIP();
1541+
}
1542+
1543+
auto maxWgSize = static_cast<uint32_t>(pClDevice->getDevice().getDeviceInfo().maxWorkGroupSize);
1544+
1545+
size_t max32Bit = std::numeric_limits<uint32_t>::max();
1546+
size_t globalWorkSize[3] = {(max32Bit + 1) * maxWgSize, 3, 4};
1547+
MockKernelWithInternals mockKernel(*pClDevice);
1548+
auto testedWorkDim = 3;
1549+
1550+
auto status = clEnqueueNDRangeKernel(pCmdQ, mockKernel.mockMultiDeviceKernel, testedWorkDim, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr);
1551+
EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, status);
1552+
1553+
globalWorkSize[0] = (max32Bit + 1) * maxWgSize + 3;
1554+
status = clEnqueueNDRangeKernel(pCmdQ, mockKernel.mockMultiDeviceKernel, testedWorkDim, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr);
1555+
EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, status);
1556+
1557+
globalWorkSize[0] = 4;
1558+
globalWorkSize[1] = (max32Bit + 1) * maxWgSize;
1559+
1560+
status = clEnqueueNDRangeKernel(pCmdQ, mockKernel.mockMultiDeviceKernel, testedWorkDim, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr);
1561+
EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, status);
1562+
1563+
globalWorkSize[1] = 4;
1564+
globalWorkSize[2] = (max32Bit + 1) * maxWgSize * 2 + 3;
1565+
1566+
status = clEnqueueNDRangeKernel(pCmdQ, mockKernel.mockMultiDeviceKernel, testedWorkDim, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr);
1567+
EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, status);
1568+
}
1569+
1570+
TEST_F(EnqueueKernelTest, givenEnqueueCommandWithNullLwsAndWorkDimsResultingInLessThan32BitMaxGroupsWhenEnqueueNDRangeKernelIsCalledThenSuccessIsReturned) {
1571+
1572+
if (sizeof(size_t) < 8) {
1573+
GTEST_SKIP();
1574+
}
1575+
1576+
size_t max32Bit = std::numeric_limits<uint32_t>::max();
1577+
size_t globalWorkSize[3] = {(max32Bit + 1) * 4, 1, 1};
1578+
MockKernelWithInternals mockKernel(*pClDevice);
1579+
auto testedWorkDim = 3;
1580+
1581+
auto status = clEnqueueNDRangeKernel(pCmdQ, mockKernel.mockMultiDeviceKernel, testedWorkDim, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr);
1582+
EXPECT_EQ(CL_SUCCESS, status);
1583+
1584+
globalWorkSize[0] = 1;
1585+
globalWorkSize[1] = (max32Bit + 1) * 4;
1586+
status = clEnqueueNDRangeKernel(pCmdQ, mockKernel.mockMultiDeviceKernel, testedWorkDim, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr);
1587+
EXPECT_EQ(CL_SUCCESS, status);
1588+
1589+
globalWorkSize[1] = 1;
1590+
globalWorkSize[2] = (max32Bit + 1) * 4;
1591+
1592+
status = clEnqueueNDRangeKernel(pCmdQ, mockKernel.mockMultiDeviceKernel, testedWorkDim, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr);
1593+
EXPECT_EQ(CL_SUCCESS, status);
1594+
}
1595+
15371596
HWTEST_F(EnqueueKernelTest, givenVMEKernelWhenEnqueueKernelThenDispatchFlagsHaveMediaSamplerRequired) {
15381597
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
15391598
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);

0 commit comments

Comments
 (0)