@@ -298,8 +298,14 @@ TEST_P(LocalIDFixture, sizeCalculationLocalIDs) {
298298
299299using LocalIds4x4LayoutTest = ::testing::TestWithParam<uint8_t >;
300300
301- TEST (LocalIds4x4LayoutTest, given4x4x1LocalWorkSizeWithDefaultDimensionsOrderWhenCheck2x4CompatibilityThenReturnTrue) {
302- std::array<uint16_t , 3 > localWorkSize{{4u , 4u , 1u }};
301+ TEST (LocalIds4x4LayoutTest, given8x4x1LocalWorkSizeWithDefaultDimensionsOrderWhenCheck2x4CompatibilityThenReturnTrue) {
302+ std::array<uint16_t , 3 > localWorkSize{{8u , 4u , 1u }};
303+ std::array<uint8_t , 3 > dimensionsOrder = {{0u , 1u , 2u }};
304+ EXPECT_TRUE (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, 16 ));
305+ }
306+
307+ TEST (LocalIds4x4LayoutTest, given8x8x1LocalWorkSizeWithDefaultDimensionsOrderWhenCheck2x4CompatibilityThenReturnTrue) {
308+ std::array<uint16_t , 3 > localWorkSize{{8u , 8u , 1u }};
303309 std::array<uint8_t , 3 > dimensionsOrder = {{0u , 1u , 2u }};
304310 EXPECT_TRUE (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, 16 ));
305311}
@@ -366,32 +372,34 @@ TEST_P(LocalIds4x4LayoutTest, givenLWS4x4x2WhenGenerateLocalIdsWithKernelWithOnl
366372 auto alignedMemory = allocateAlignedMemory (size, 32 );
367373 auto buffer = reinterpret_cast <uint16_t *>(alignedMemory.get ());
368374 memset (buffer, 0xff , size);
369- EXPECT_TRUE (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd));
370- generateLocalIDs (buffer, simd, localWorkSize, dimensionsOrder, true );
375+ if (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd)) {
376+ EXPECT_TRUE (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd));
377+ generateLocalIDs (buffer, simd, localWorkSize, dimensionsOrder, true );
371378
372- auto numRows = elemsInBuffer / rowWidth;
373- auto numGrfs = numRows / 3u ;
379+ auto numRows = elemsInBuffer / rowWidth;
380+ auto numGrfs = numRows / 3u ;
374381
375- for (auto i = 0u ; i < numGrfs; i++) {
382+ for (auto i = 0u ; i < numGrfs; i++) {
376383
377- // validate X row
378- uint16_t baseX = buffer[i * 3 * rowWidth];
379- uint16_t currentX = baseX;
380- for (int j = 1 ; j < simd; j++) {
381- currentX = baseX + ((currentX + 1 ) & (xDelta - 1 ));
382- EXPECT_EQ (buffer[i * 3 * rowWidth + j], currentX);
383- }
384+ // validate X row
385+ uint16_t baseX = buffer[i * 3 * rowWidth];
386+ uint16_t currentX = baseX;
387+ for (int j = 1 ; j < simd; j++) {
388+ currentX = baseX + ((currentX + 1 ) & (xDelta - 1 ));
389+ EXPECT_EQ (buffer[i * 3 * rowWidth + j], currentX);
390+ }
384391
385- // validate Y row
386- for (int j = 0 ; j < simd; j++) {
387- uint16_t expectedY = ((j / xDelta) & 0b11 );
388- EXPECT_EQ (buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
389- }
392+ // validate Y row
393+ for (int j = 0 ; j < simd; j++) {
394+ uint16_t expectedY = ((j / xDelta) & 0b11 );
395+ EXPECT_EQ (buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
396+ }
390397
391- // validate Z row
392- for (int j = 0 ; j < simd; j++) {
393- uint16_t expectedZ = 2 * i / numGrfs + j / (simd / zDelta); // early grow Z
394- EXPECT_EQ (buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
398+ // validate Z row
399+ for (int j = 0 ; j < simd; j++) {
400+ uint16_t expectedZ = 2 * i / numGrfs + j / (simd / zDelta); // early grow Z
401+ EXPECT_EQ (buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
402+ }
395403 }
396404 }
397405}
@@ -410,36 +418,88 @@ TEST_P(LocalIds4x4LayoutTest, givenLWS8x4x2WhenGenerateLocalIdsWithKernelWithOnl
410418 auto alignedMemory = allocateAlignedMemory (size, 32 );
411419 auto buffer = reinterpret_cast <uint16_t *>(alignedMemory.get ());
412420 memset (buffer, 0xff , size);
413- EXPECT_TRUE (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd));
414- generateLocalIDs (buffer, simd, localWorkSize, dimensionsOrder, true );
415-
416- auto numRows = elemsInBuffer / rowWidth;
417- auto numGrfs = numRows / 3u ;
421+ if (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd)) {
422+ EXPECT_TRUE (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd));
423+ generateLocalIDs (buffer, simd, localWorkSize, dimensionsOrder, true );
424+
425+ auto numRows = elemsInBuffer / rowWidth;
426+ auto numGrfs = numRows / 3u ;
427+
428+ for (auto i = 0u ; i < numGrfs; i++) {
429+
430+ // validate X row
431+ uint16_t baseX = buffer[i * 3 * rowWidth];
432+ uint16_t currentX = baseX;
433+ for (int j = 1 ; j < simd; j++) {
434+ if (j == 16 ) {
435+ // early grow X
436+ baseX += xDelta;
437+ }
438+ currentX = baseX + ((currentX + 1 ) & (xDelta - 1 ));
439+ EXPECT_EQ (buffer[i * 3 * rowWidth + j], currentX);
440+ }
418441
419- for (auto i = 0u ; i < numGrfs; i++) {
442+ // validate Y row
443+ for (int j = 0 ; j < simd; j++) {
444+ uint16_t expectedY = ((j / xDelta) & 0b11 );
445+ EXPECT_EQ (buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
446+ }
420447
421- // validate X row
422- uint16_t baseX = buffer[i * 3 * rowWidth];
423- uint16_t currentX = baseX;
424- for (int j = 1 ; j < simd; j++) {
425- if (j == 16 ) {
426- // early grow X
427- baseX += xDelta;
448+ // validate Z row
449+ for (int j = 0 ; j < simd; j++) {
450+ uint16_t expectedZ = 2 * i / numGrfs;
451+ EXPECT_EQ (buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
428452 }
429- currentX = baseX + ((currentX + 1 ) & (xDelta - 1 ));
430- EXPECT_EQ (buffer[i * 3 * rowWidth + j], currentX);
431453 }
454+ }
455+ }
432456
433- // validate Y row
434- for (int j = 0 ; j < simd; j++) {
435- uint16_t expectedY = ((j / xDelta) & 0b11 );
436- EXPECT_EQ (buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
437- }
457+ TEST_P (LocalIds4x4LayoutTest, givenLWS8x4x1WhenGenerateLocalIdsWithKernelWithOnlyImagesThenApplies4x4Layout) {
458+ uint16_t simd = GetParam ();
459+ uint8_t rowWidth = simd == 32 ? 32 : 16 ;
460+ uint16_t xDelta = simd == 8u ? 2u : 4u ;
461+ std::array<uint16_t , 3 > localWorkSize{8u , 4u , 1u };
462+ auto dimensionsOrder = std::array<uint8_t , 3 >{{0u , 1u , 2u }};
463+ auto elemsInBuffer = 3u * localWorkSize.at (0 ) * localWorkSize.at (1 ) * localWorkSize.at (2 );
464+ if (simd == 8u ) {
465+ elemsInBuffer *= 2 ;
466+ }
467+ auto size = elemsInBuffer * sizeof (uint16_t );
468+ auto alignedMemory = allocateAlignedMemory (size, 32 );
469+ auto buffer = reinterpret_cast <uint16_t *>(alignedMemory.get ());
470+ memset (buffer, 0xff , size);
471+ if (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd)) {
472+ EXPECT_TRUE (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd));
473+ generateLocalIDs (buffer, simd, localWorkSize, dimensionsOrder, true );
474+
475+ auto numRows = elemsInBuffer / rowWidth;
476+ auto numGrfs = numRows / 3u ;
477+
478+ for (auto i = 0u ; i < numGrfs; i++) {
479+
480+ // validate X row
481+ uint16_t baseX = buffer[i * 3 * rowWidth];
482+ uint16_t currentX = baseX;
483+ for (int j = 1 ; j < simd; j++) {
484+ if (j == 16 ) {
485+ // early grow X
486+ baseX += xDelta;
487+ }
488+ currentX = baseX + ((currentX + 1 ) & (xDelta - 1 ));
489+ EXPECT_EQ (buffer[i * 3 * rowWidth + j], currentX);
490+ }
491+
492+ // validate Y row
493+ for (int j = 0 ; j < simd; j++) {
494+ uint16_t expectedY = ((j / xDelta) & 0b11 );
495+ EXPECT_EQ (buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
496+ }
438497
439- // validate Z row
440- for (int j = 0 ; j < simd; j++) {
441- uint16_t expectedZ = 2 * i / numGrfs;
442- EXPECT_EQ (buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
498+ // validate Z row
499+ for (int j = 0 ; j < simd; j++) {
500+ uint16_t expectedZ = 0 ;
501+ EXPECT_EQ (buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
502+ }
443503 }
444504 }
445505}
@@ -458,33 +518,35 @@ TEST_P(LocalIds4x4LayoutTest, givenLWS8x8x2WhenGenerateLocalIdsWithKernelWithOnl
458518 auto alignedMemory = allocateAlignedMemory (size, 32 );
459519 auto buffer = reinterpret_cast <uint16_t *>(alignedMemory.get ());
460520 memset (buffer, 0xff , size);
461- EXPECT_TRUE (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd));
462- generateLocalIDs (buffer, simd, localWorkSize, dimensionsOrder, true );
521+ if (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd)) {
522+ EXPECT_TRUE (isCompatibleWith4x4Layout (localWorkSize, dimensionsOrder, simd));
523+ generateLocalIDs (buffer, simd, localWorkSize, dimensionsOrder, true );
463524
464- auto numRows = elemsInBuffer / rowWidth;
465- auto numGrfs = numRows / 3u ;
525+ auto numRows = elemsInBuffer / rowWidth;
526+ auto numGrfs = numRows / 3u ;
466527
467- for (auto i = 0u ; i < numGrfs; i++) {
528+ for (auto i = 0u ; i < numGrfs; i++) {
468529
469- // validate X row
470- uint16_t baseX = buffer[i * 3 * rowWidth];
471- uint16_t currentX = baseX;
472- for (int j = 1 ; j < simd; j++) {
473- currentX = baseX + ((currentX + 1 ) & (xDelta - 1 ));
474- EXPECT_EQ (buffer[i * 3 * rowWidth + j], currentX);
475- }
530+ // validate X row
531+ uint16_t baseX = buffer[i * 3 * rowWidth];
532+ uint16_t currentX = baseX;
533+ for (int j = 1 ; j < simd; j++) {
534+ currentX = baseX + ((currentX + 1 ) & (xDelta - 1 ));
535+ EXPECT_EQ (buffer[i * 3 * rowWidth + j], currentX);
536+ }
476537
477- // validate Y row
478- uint16_t baseY = buffer[i * 3 * rowWidth + rowWidth];
479- for (int j = 0 ; j < simd; j++) {
480- uint16_t expectedY = baseY + ((j / xDelta) & 0b111 );
481- EXPECT_EQ (buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
482- }
538+ // validate Y row
539+ uint16_t baseY = buffer[i * 3 * rowWidth + rowWidth];
540+ for (int j = 0 ; j < simd; j++) {
541+ uint16_t expectedY = baseY + ((j / xDelta) & 0b111 );
542+ EXPECT_EQ (buffer[i * 3 * rowWidth + rowWidth + j], expectedY);
543+ }
483544
484- // validate Z row
485- for (int j = 0 ; j < simd; j++) {
486- uint16_t expectedZ = 2 * i / numGrfs;
487- EXPECT_EQ (buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
545+ // validate Z row
546+ for (int j = 0 ; j < simd; j++) {
547+ uint16_t expectedZ = 2 * i / numGrfs;
548+ EXPECT_EQ (buffer[i * 3 * rowWidth + 2 * rowWidth + j], expectedZ);
549+ }
488550 }
489551 }
490552}
0 commit comments