Skip to content

Commit 5336e87

Browse files
Remove limit to 256 work items for local workgroup size.
Change-Id: I30accf0dbf8086a10fb96d29924a7a2c4ef15eb0
1 parent ef02827 commit 5336e87

File tree

4 files changed

+33
-27
lines changed

4 files changed

+33
-27
lines changed

runtime/device/device_caps.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -267,13 +267,10 @@ void Device::initializeCaps() {
267267
? (systemInfo.EUCount / systemInfo.SubSliceCount)
268268
: systemInfo.EuCountPerPoolMin;
269269
deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount;
270-
auto maxWkgSize = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 1024u : 256u;
271270
auto maxWS = deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU * simdSizeUsed;
272271

273272
maxWS = Math::prevPowerOfTwo(uint32_t(maxWS));
274-
deviceInfo.maxWorkGroupSize = std::min(uint32_t(maxWS), maxWkgSize);
275-
276-
DEBUG_BREAK_IF(!DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() && deviceInfo.maxWorkGroupSize > 256);
273+
deviceInfo.maxWorkGroupSize = std::min(uint32_t(maxWS), 1024u);
277274

278275
// calculate a maximum number of subgroups in a workgroup (for the required SIMD size)
279276
deviceInfo.maxNumOfSubGroups = static_cast<uint32_t>(deviceInfo.maxWorkGroupSize / simdSizeUsed);

unit_tests/api/cl_get_kernel_sub_group_info_tests.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture<HelloWorldFixtureFactory> {
1919
maxWorkDim = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkItemDimensions);
2020
ASSERT_EQ(3u, maxWorkDim);
2121
maxWorkGroupSize = static_cast<size_t>(pDevice->getDeviceInfo().maxWorkGroupSize);
22-
ASSERT_EQ(256u, maxWorkGroupSize);
22+
ASSERT_GE(1024u, maxWorkGroupSize);
2323
largestCompiledSIMDSize = static_cast<size_t>(pKernel->getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize);
2424
ASSERT_EQ(32u, largestCompiledSIMDSize);
2525

unit_tests/context/driver_diagnostics_enqueue_tests.cpp

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -731,23 +731,17 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS
731731
}
732732

733733
TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEnqueueKernelIsCallingThenContextProvidesProperHint) {
734-
735-
size_t preferredWorkGroupSize[3];
736734
size_t localWorkGroupSize[3];
737735
int badSizeDimension;
738736
uint32_t workDim = globalWorkGroupSize[1] == 1 ? 1 : globalWorkGroupSize[2] == 1 ? 2 : 3;
739-
auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getDevice(0)->getDeviceInfo().maxWorkGroupSize);
740-
uint32_t simdSize = 32;
741-
if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
742-
WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, simdSize, 0u, IGFX_GEN9_CORE, 32u, 0u, false, false);
743-
computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, workDim);
744-
} else if (DebugManager.flags.EnableComputeWorkSizeSquared.get() && workDim == 2) {
745-
computeWorkgroupSizeSquared(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, simdSize, workDim);
746-
} else
747-
computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, simdSize);
748-
for (auto i = 0; i < 3; i++) {
749-
localWorkGroupSize[i] = preferredWorkGroupSize[i];
750-
}
737+
738+
DispatchInfo dispatchInfo(kernel, workDim, Vec3<size_t>(globalWorkGroupSize), Vec3<size_t>(0u, 0u, 0u), Vec3<size_t>(0u, 0u, 0u));
739+
740+
auto computedLocalWorkgroupSize = computeWorkgroupSize(dispatchInfo);
741+
742+
localWorkGroupSize[0] = computedLocalWorkgroupSize.x;
743+
localWorkGroupSize[1] = computedLocalWorkgroupSize.y;
744+
localWorkGroupSize[2] = computedLocalWorkgroupSize.z;
751745

752746
badSizeDimension = GetParam();
753747
if (localWorkGroupSize[badSizeDimension] > 1) {
@@ -761,7 +755,7 @@ TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEn
761755

762756
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BAD_LOCAL_WORKGROUP_SIZE],
763757
localWorkGroupSize[0], localWorkGroupSize[1], localWorkGroupSize[2], kernel->getKernelInfo().name.c_str(),
764-
preferredWorkGroupSize[0], preferredWorkGroupSize[1], preferredWorkGroupSize[2]);
758+
computedLocalWorkgroupSize.x, computedLocalWorkgroupSize.y, computedLocalWorkgroupSize.z);
765759
EXPECT_TRUE(containsHint(expectedHint, userData));
766760
}
767761

unit_tests/device/device_caps_tests.cpp

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ TEST(DeviceGetCaps, givenDeviceThatDoesHaveFp64WhenDbgFlagDisablesFp64ThenDontRe
662662
EXPECT_NE(notExpectedSingleFp, actualSingleFp);
663663
}
664664

665-
TEST(Device_GetCaps, givenOclVersionLessThan21WhenCapsAreCreatedThenDeviceReportsNoSupportedIlVersions) {
665+
TEST(DeviceGetCaps, givenOclVersionLessThan21WhenCapsAreCreatedThenDeviceReportsNoSupportedIlVersions) {
666666
DebugManagerStateRestore dbgRestorer;
667667
{
668668
DebugManager.flags.ForceOCLVersion.set(12);
@@ -673,7 +673,7 @@ TEST(Device_GetCaps, givenOclVersionLessThan21WhenCapsAreCreatedThenDeviceReport
673673
}
674674
}
675675

676-
TEST(Device_GetCaps, givenOclVersion21WhenCapsAreCreatedThenDeviceReportsSpirvAsSupportedIl) {
676+
TEST(DeviceGetCaps, givenOclVersion21WhenCapsAreCreatedThenDeviceReportsSpirvAsSupportedIl) {
677677
DebugManagerStateRestore dbgRestorer;
678678
{
679679
DebugManager.flags.ForceOCLVersion.set(21);
@@ -684,7 +684,7 @@ TEST(Device_GetCaps, givenOclVersion21WhenCapsAreCreatedThenDeviceReportsSpirvAs
684684
}
685685
}
686686

687-
TEST(Device_GetCaps, givenDisabledFtrPooledEuWhenCalculatingMaxEuPerSSThenIgnoreEuCountPerPoolMin) {
687+
TEST(DeviceGetCaps, givenDisabledFtrPooledEuWhenCalculatingMaxEuPerSSThenIgnoreEuCountPerPoolMin) {
688688
GT_SYSTEM_INFO mySysInfo = *platformDevices[0]->pSysInfo;
689689
FeatureTable mySkuTable = *platformDevices[0]->pSkuTable;
690690
HardwareInfo myHwInfo = {platformDevices[0]->pPlatform, &mySkuTable, platformDevices[0]->pWaTable,
@@ -698,12 +698,12 @@ TEST(Device_GetCaps, givenDisabledFtrPooledEuWhenCalculatingMaxEuPerSSThenIgnore
698698

699699
auto expectedMaxWGS = (mySysInfo.EUCount / mySysInfo.SubSliceCount) *
700700
(mySysInfo.ThreadCount / mySysInfo.EUCount) * 8;
701-
expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 256u);
701+
expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 1024u);
702702

703703
EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize);
704704
}
705705

706-
TEST(Device_GetCaps, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) {
706+
TEST(DeviceGetCaps, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) {
707707
GT_SYSTEM_INFO mySysInfo = *platformDevices[0]->pSysInfo;
708708
FeatureTable mySkuTable = *platformDevices[0]->pSkuTable;
709709
HardwareInfo myHwInfo = {platformDevices[0]->pPlatform, &mySkuTable, platformDevices[0]->pWaTable,
@@ -716,12 +716,12 @@ TEST(Device_GetCaps, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgn
716716
auto device = std::unique_ptr<Device>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
717717

718718
auto expectedMaxWGS = mySysInfo.EuCountPerPoolMin * (mySysInfo.ThreadCount / mySysInfo.EUCount) * 8;
719-
expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 256u);
719+
expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 1024u);
720720

721721
EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize);
722722
}
723723

724-
TEST(Device_GetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCapsAreCreatedThen1024WorkgroupSizeIsReturned) {
724+
TEST(DeviceGetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCapsAreCreatedThen1024WorkgroupSizeIsReturned) {
725725
DebugManagerStateRestore dbgRestorer;
726726
DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.set(true);
727727

@@ -739,6 +739,21 @@ TEST(Device_GetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCa
739739
EXPECT_EQ(device->getDeviceInfo().maxWorkGroupSize / 32, device->getDeviceInfo().maxNumOfSubGroups);
740740
}
741741

742+
TEST(DeviceGetCaps, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedItIsLimitedTo1024) {
743+
GT_SYSTEM_INFO mySysInfo = *platformDevices[0]->pSysInfo;
744+
FeatureTable mySkuTable = *platformDevices[0]->pSkuTable;
745+
HardwareInfo myHwInfo = {platformDevices[0]->pPlatform, &mySkuTable, platformDevices[0]->pWaTable,
746+
&mySysInfo, platformDevices[0]->capabilityTable};
747+
748+
mySysInfo.EUCount = 32;
749+
mySysInfo.SubSliceCount = 2;
750+
mySysInfo.ThreadCount = 32 * 8; // 128 threads per subslice, in simd 8 gives 1024
751+
auto device = std::unique_ptr<Device>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
752+
753+
EXPECT_EQ(1024u, device->getDeviceInfo().maxWorkGroupSize);
754+
EXPECT_EQ(device->getDeviceInfo().maxWorkGroupSize / 8, device->getDeviceInfo().maxNumOfSubGroups);
755+
}
756+
742757
class DriverInfoMock : public DriverInfo {
743758
public:
744759
DriverInfoMock(){};

0 commit comments

Comments
 (0)