Skip to content

Commit 3cab63f

Browse files
performance: do not enable v2 algorithm in certain scenarios
- do not enable when SubSliceCount != MaxSubSlicesSupported Signed-off-by: Michal Mrozek <michal.mrozek@intel.com> Related-To: NEO-6989 Source: a129c29
1 parent 280f379 commit 3cab63f

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(InterfaceDescri
4343
if (DebugManager.flags.AdjustThreadGroupDispatchSize.get() != -1) {
4444
adjustTGDispatchSize = !!DebugManager.flags.AdjustThreadGroupDispatchSize.get();
4545
}
46-
auto algorithmVersion = 2u;
46+
// apply v2 algorithm only for parts where MaxSubSlicesSupported is equal to SubSliceCount
47+
auto algorithmVersion = hwInfo.gtSystemInfo.MaxSubSlicesSupported == hwInfo.gtSystemInfo.SubSliceCount ? 2 : 1;
4748
if (DebugManager.flags.ForceThreadGroupDispatchSizeAlgorithm.get() != -1) {
4849
algorithmVersion = DebugManager.flags.ForceThreadGroupDispatchSizeAlgorithm.get();
4950
}

shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,31 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenVariousDispatchParamtersWhenAl
819819
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize());
820820
}
821821

822+
XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenDualSubSliceCountNotEqualToMaxSubsliceCounteWhenTgDispatchSizeIsSelectedThenAlgorithmV1IsUsed) {
823+
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
824+
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
825+
WALKER_TYPE walkerCmd{};
826+
const auto &productHelper = pDevice->getProductHelper();
827+
auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
828+
mutableHwInfo->gtSystemInfo.MaxSubSlicesSupported = 64u;
829+
mutableHwInfo->gtSystemInfo.SubSliceCount = 32u;
830+
mutableHwInfo->gtSystemInfo.ThreadCount = 2048u;
831+
auto hwInfo = pDevice->getHardwareInfo();
832+
833+
hwInfo.platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, hwInfo);
834+
uint32_t numGrf = GrfConfig::DefaultGrfNumber;
835+
836+
INTERFACE_DESCRIPTOR_DATA iddArg = FamilyType::cmdInitInterfaceDescriptorData;
837+
838+
iddArg.setNumberOfThreadsInGpgpuThreadGroup(1u);
839+
numGrf = GrfConfig::DefaultGrfNumber;
840+
walkerCmd.setThreadGroupIdXDimension(256);
841+
walkerCmd.setThreadGroupIdYDimension(1);
842+
walkerCmd.setThreadGroupIdZDimension(1);
843+
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, 256u, numGrf, walkerCmd);
844+
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize());
845+
}
846+
822847
XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNumberOfThreadsInThreadGroupAndDebugFlagDisabledWhenCallingAdjustInterfaceDescriptorDataThenThreadGroupDispatchSizeIsDefault) {
823848
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
824849
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;

0 commit comments

Comments
 (0)