Skip to content

Commit c53c09d

Browse files
Limit local work sizes where local ids limit is applied.
Change-Id: Id9a84d6a7d4530344771f48fd278cff9ab2dd927
1 parent 5e90661 commit c53c09d

File tree

3 files changed

+136
-75
lines changed

3 files changed

+136
-75
lines changed

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#!groovy
22
neoDependenciesRev='790647-1082'
33
strategy='EQUAL'
4-
allowedCD=283
4+
allowedCD=296

runtime/command_queue/local_id_gen.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,12 @@ void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3>
7272
}
7373

7474
bool isCompatibleWith4x4Layout(const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, uint16_t simd) {
75-
uint8_t rowWidth = simd == 32u ? 32u : 16u;
76-
uint8_t xDelta = simd == 8u ? 2u : 4u;
77-
uint8_t yDelta = simd == 8u || localWorkgroupSize.at(1) == 4u ? 4u : rowWidth / xDelta;
75+
//limit support to 8x4x1 and 8x8x1 LWS
7876
return dimensionsOrder.at(0) == 0 &&
7977
dimensionsOrder.at(1) == 1 &&
80-
(localWorkgroupSize.at(0) & (xDelta - 1)) == 0 &&
81-
(localWorkgroupSize.at(1) & (yDelta - 1)) == 0;
78+
localWorkgroupSize[2] == 1 &&
79+
localWorkgroupSize[0] == 8 &&
80+
(localWorkgroupSize[1] == 4 || localWorkgroupSize[1] == 8);
8281
}
8382

8483
inline void generateLocalIDsWith4x4Layout(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t simd) {

unit_tests/command_queue/local_id_tests.cpp

Lines changed: 131 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,14 @@ TEST_P(LocalIDFixture, sizeCalculationLocalIDs) {
298298

299299
using LocalIds4x4LayoutTest = ::testing::TestWithParam<uint8_t>;
300300

301-
TEST(LocalIds4x4LayoutTest, given4x4x1LocalWorkSizeWithDefaultDimensionsOrderWhenCheck2x4CompatibilityThenReturnTrue) {
302-
std::array<uint16_t, 3> localWorkSize{{4u, 4u, 1u}};
301+
TEST(LocalIds4x4LayoutTest, given8x4x1LocalWorkSizeWithDefaultDimensionsOrderWhenCheck2x4CompatibilityThenReturnTrue) {
302+
std::array<uint16_t, 3> localWorkSize{{8u, 4u, 1u}};
303+
std::array<uint8_t, 3> dimensionsOrder = {{0u, 1u, 2u}};
304+
EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, 16));
305+
}
306+
307+
TEST(LocalIds4x4LayoutTest, given8x8x1LocalWorkSizeWithDefaultDimensionsOrderWhenCheck2x4CompatibilityThenReturnTrue) {
308+
std::array<uint16_t, 3> localWorkSize{{8u, 8u, 1u}};
303309
std::array<uint8_t, 3> dimensionsOrder = {{0u, 1u, 2u}};
304310
EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, 16));
305311
}
@@ -366,32 +372,34 @@ TEST_P(LocalIds4x4LayoutTest, givenLWS4x4x2WhenGenerateLocalIdsWithKernelWithOnl
366372
auto alignedMemory = allocateAlignedMemory(size, 32);
367373
auto buffer = reinterpret_cast<uint16_t *>(alignedMemory.get());
368374
memset(buffer, 0xff, size);
369-
EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd));
370-
generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true);
375+
if (isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)) {
376+
EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd));
377+
generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true);
371378

372-
auto numRows = elemsInBuffer / rowWidth;
373-
auto numGrfs = numRows / 3u;
379+
auto numRows = elemsInBuffer / rowWidth;
380+
auto numGrfs = numRows / 3u;
374381

375-
for (auto i = 0u; i < numGrfs; i++) {
382+
for (auto i = 0u; i < numGrfs; i++) {
376383

377-
// validate X row
378-
uint16_t baseX = buffer[i * 3 * rowWidth];
379-
uint16_t currentX = baseX;
380-
for (int j = 1; j < simd; j++) {
381-
currentX = baseX + ((currentX + 1) & (xDelta - 1));
382-
EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX);
383-
}
384+
// validate X row
385+
uint16_t baseX = buffer[i * 3 * rowWidth];
386+
uint16_t currentX = baseX;
387+
for (int j = 1; j < simd; j++) {
388+
currentX = baseX + ((currentX + 1) & (xDelta - 1));
389+
EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX);
390+
}
384391

385-
// validate Y row
386-
for (int j = 0; j < simd; j++) {
387-
uint16_t expectedY = ((j / xDelta) & 0b11);
388-
EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
389-
}
392+
// validate Y row
393+
for (int j = 0; j < simd; j++) {
394+
uint16_t expectedY = ((j / xDelta) & 0b11);
395+
EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
396+
}
390397

391-
// validate Z row
392-
for (int j = 0; j < simd; j++) {
393-
uint16_t expectedZ = 2 * i / numGrfs + j / (simd / zDelta); //early grow Z
394-
EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
398+
// validate Z row
399+
for (int j = 0; j < simd; j++) {
400+
uint16_t expectedZ = 2 * i / numGrfs + j / (simd / zDelta); //early grow Z
401+
EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
402+
}
395403
}
396404
}
397405
}
@@ -410,36 +418,88 @@ TEST_P(LocalIds4x4LayoutTest, givenLWS8x4x2WhenGenerateLocalIdsWithKernelWithOnl
410418
auto alignedMemory = allocateAlignedMemory(size, 32);
411419
auto buffer = reinterpret_cast<uint16_t *>(alignedMemory.get());
412420
memset(buffer, 0xff, size);
413-
EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd));
414-
generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true);
415-
416-
auto numRows = elemsInBuffer / rowWidth;
417-
auto numGrfs = numRows / 3u;
421+
if (isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)) {
422+
EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd));
423+
generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true);
424+
425+
auto numRows = elemsInBuffer / rowWidth;
426+
auto numGrfs = numRows / 3u;
427+
428+
for (auto i = 0u; i < numGrfs; i++) {
429+
430+
// validate X row
431+
uint16_t baseX = buffer[i * 3 * rowWidth];
432+
uint16_t currentX = baseX;
433+
for (int j = 1; j < simd; j++) {
434+
if (j == 16) {
435+
//early grow X
436+
baseX += xDelta;
437+
}
438+
currentX = baseX + ((currentX + 1) & (xDelta - 1));
439+
EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX);
440+
}
418441

419-
for (auto i = 0u; i < numGrfs; i++) {
442+
// validate Y row
443+
for (int j = 0; j < simd; j++) {
444+
uint16_t expectedY = ((j / xDelta) & 0b11);
445+
EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
446+
}
420447

421-
// validate X row
422-
uint16_t baseX = buffer[i * 3 * rowWidth];
423-
uint16_t currentX = baseX;
424-
for (int j = 1; j < simd; j++) {
425-
if (j == 16) {
426-
//early grow X
427-
baseX += xDelta;
448+
// validate Z row
449+
for (int j = 0; j < simd; j++) {
450+
uint16_t expectedZ = 2 * i / numGrfs;
451+
EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
428452
}
429-
currentX = baseX + ((currentX + 1) & (xDelta - 1));
430-
EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX);
431453
}
454+
}
455+
}
432456

433-
// validate Y row
434-
for (int j = 0; j < simd; j++) {
435-
uint16_t expectedY = ((j / xDelta) & 0b11);
436-
EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
437-
}
457+
TEST_P(LocalIds4x4LayoutTest, givenLWS8x4x1WhenGenerateLocalIdsWithKernelWithOnlyImagesThenApplies4x4Layout) {
458+
uint16_t simd = GetParam();
459+
uint8_t rowWidth = simd == 32 ? 32 : 16;
460+
uint16_t xDelta = simd == 8u ? 2u : 4u;
461+
std::array<uint16_t, 3> localWorkSize{8u, 4u, 1u};
462+
auto dimensionsOrder = std::array<uint8_t, 3>{{0u, 1u, 2u}};
463+
auto elemsInBuffer = 3u * localWorkSize.at(0) * localWorkSize.at(1) * localWorkSize.at(2);
464+
if (simd == 8u) {
465+
elemsInBuffer *= 2;
466+
}
467+
auto size = elemsInBuffer * sizeof(uint16_t);
468+
auto alignedMemory = allocateAlignedMemory(size, 32);
469+
auto buffer = reinterpret_cast<uint16_t *>(alignedMemory.get());
470+
memset(buffer, 0xff, size);
471+
if (isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)) {
472+
EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd));
473+
generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true);
474+
475+
auto numRows = elemsInBuffer / rowWidth;
476+
auto numGrfs = numRows / 3u;
477+
478+
for (auto i = 0u; i < numGrfs; i++) {
479+
480+
// validate X row
481+
uint16_t baseX = buffer[i * 3 * rowWidth];
482+
uint16_t currentX = baseX;
483+
for (int j = 1; j < simd; j++) {
484+
if (j == 16) {
485+
//early grow X
486+
baseX += xDelta;
487+
}
488+
currentX = baseX + ((currentX + 1) & (xDelta - 1));
489+
EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX);
490+
}
491+
492+
// validate Y row
493+
for (int j = 0; j < simd; j++) {
494+
uint16_t expectedY = ((j / xDelta) & 0b11);
495+
EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
496+
}
438497

439-
// validate Z row
440-
for (int j = 0; j < simd; j++) {
441-
uint16_t expectedZ = 2 * i / numGrfs;
442-
EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
498+
// validate Z row
499+
for (int j = 0; j < simd; j++) {
500+
uint16_t expectedZ = 0;
501+
EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
502+
}
443503
}
444504
}
445505
}
@@ -458,33 +518,35 @@ TEST_P(LocalIds4x4LayoutTest, givenLWS8x8x2WhenGenerateLocalIdsWithKernelWithOnl
458518
auto alignedMemory = allocateAlignedMemory(size, 32);
459519
auto buffer = reinterpret_cast<uint16_t *>(alignedMemory.get());
460520
memset(buffer, 0xff, size);
461-
EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd));
462-
generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true);
521+
if (isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd)) {
522+
EXPECT_TRUE(isCompatibleWith4x4Layout(localWorkSize, dimensionsOrder, simd));
523+
generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true);
463524

464-
auto numRows = elemsInBuffer / rowWidth;
465-
auto numGrfs = numRows / 3u;
525+
auto numRows = elemsInBuffer / rowWidth;
526+
auto numGrfs = numRows / 3u;
466527

467-
for (auto i = 0u; i < numGrfs; i++) {
528+
for (auto i = 0u; i < numGrfs; i++) {
468529

469-
// validate X row
470-
uint16_t baseX = buffer[i * 3 * rowWidth];
471-
uint16_t currentX = baseX;
472-
for (int j = 1; j < simd; j++) {
473-
currentX = baseX + ((currentX + 1) & (xDelta - 1));
474-
EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX);
475-
}
530+
// validate X row
531+
uint16_t baseX = buffer[i * 3 * rowWidth];
532+
uint16_t currentX = baseX;
533+
for (int j = 1; j < simd; j++) {
534+
currentX = baseX + ((currentX + 1) & (xDelta - 1));
535+
EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX);
536+
}
476537

477-
// validate Y row
478-
uint16_t baseY = buffer[i * 3 * rowWidth + rowWidth];
479-
for (int j = 0; j < simd; j++) {
480-
uint16_t expectedY = baseY + ((j / xDelta) & 0b111);
481-
EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
482-
}
538+
// validate Y row
539+
uint16_t baseY = buffer[i * 3 * rowWidth + rowWidth];
540+
for (int j = 0; j < simd; j++) {
541+
uint16_t expectedY = baseY + ((j / xDelta) & 0b111);
542+
EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
543+
}
483544

484-
// validate Z row
485-
for (int j = 0; j < simd; j++) {
486-
uint16_t expectedZ = 2 * i / numGrfs;
487-
EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
545+
// validate Z row
546+
for (int j = 0; j < simd; j++) {
547+
uint16_t expectedZ = 2 * i / numGrfs;
548+
EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
549+
}
488550
}
489551
}
490552
}

0 commit comments

Comments
 (0)