Skip to content

Commit 779bca3

Browse files
Debug flag to add sfence instruction prior to DirectSubmission dispatch
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
1 parent beff001 commit 779bca3

File tree

8 files changed

+62
-4
lines changed

8 files changed

+62
-4
lines changed

opencl/test/unit_test/test_files/igdrcl.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,3 +411,4 @@ DirectSubmissionReadBackRingBuffer = -1
411411
ReadBackCommandBufferAllocation = -1
412412
PrintImageBlitBlockCopyCmdDetails = 0
413413
DirectSubmissionInsertExtraMiMemFenceCommands = -1
414+
DirectSubmissionInsertSfenceInstructionPriorToSubmission = -1

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableRingSwitchTagUpdateWa, -1, "-1: default, 0
291291
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionReadBackCommandBuffer, -1, "-1: default - disabled, 0 - disable, 1 - enable. If enabled, read first dword of cmd buffer after handling residency.")
292292
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionReadBackRingBuffer, -1, "-1: default - disabled, 0 - disable, 1 - enable. If enabled, read first dword of ring buffer after handling residency.")
293293
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertExtraMiMemFenceCommands, -1, "-1: default, 0 - disable, 1 - enable. If enabled, add extra MI_MEM_FENCE instructions with acquire bit set")
294+
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSubmission, -1, "-1: default, 0 - disable, 1 - Instert _mm_sfence before unlocking semaphore only, 2 - insert before and after semaphore")
294295

295296
/* IMPLICIT SCALING */
296297
DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disable, 1: enable, Enables Walker Partitioning via WPARID.")

shared/source/direct_submission/direct_submission_hw.inl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,17 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
435435
reserved = *ringBufferStart;
436436
}
437437

438+
if (DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get() >= 1) {
439+
CpuIntrinsics::sfence();
440+
}
441+
438442
//unblock GPU
439443
semaphoreData->QueueWorkCount = currentQueueWorkCount;
444+
445+
if (DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get() == 2) {
446+
CpuIntrinsics::sfence();
447+
}
448+
440449
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
441450
currentQueueWorkCount++;
442451
DirectSubmissionDiagnostics::diagnosticModeOneSubmit(diagnostic.get());

shared/source/utilities/cpuintrinsics.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2021 Intel Corporation
2+
* Copyright (C) 2020-2022 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -20,6 +20,10 @@ void clFlush(void const *ptr) {
2020
_mm_clflush(ptr);
2121
}
2222

23+
void sfence() {
24+
_mm_sfence();
25+
}
26+
2327
void pause() {
2428
_mm_pause();
2529
}

shared/source/utilities/cpuintrinsics.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2021 Intel Corporation
2+
* Copyright (C) 2020-2022 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -10,6 +10,8 @@
1010
namespace NEO {
1111
namespace CpuIntrinsics {
1212

13+
void sfence();
14+
1315
void clFlush(void const *ptr);
1416

1517
void pause();

shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "shared/source/debug_settings/debug_settings_manager.h"
1111
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
1212
#include "shared/source/helpers/flush_stamp.h"
13+
#include "shared/source/utilities/cpuintrinsics.h"
1314
#include "shared/test/common/cmd_parse/hw_parse.h"
1415
#include "shared/test/common/fixtures/direct_submission_fixture.h"
1516
#include "shared/test/common/helpers/debug_manager_state_restore.h"
@@ -23,6 +24,10 @@
2324
#include "shared/test/common/mocks/mock_io_functions.h"
2425
#include "shared/test/common/test_macros/test.h"
2526

27+
namespace CpuIntrinsicsTests {
28+
extern std::atomic<uint32_t> sfenceCounter;
29+
} // namespace CpuIntrinsicsTests
30+
2631
using DirectSubmissionTest = Test<DirectSubmissionFixture>;
2732

2833
using DirectSubmissionDispatchBufferTest = Test<DirectSubmissionDispatchBufferFixture>;
@@ -699,3 +704,27 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
699704
auto expectedValue = reinterpret_cast<uint32_t *>(directSubmission.ringCommandStream.getSpace(0))[0];
700705
EXPECT_EQ(expectedValue, directSubmission.reserved);
701706
}
707+
708+
HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenDispatchingWorkloadThenProgramSfenceInstruction) {
709+
DebugManagerStateRestore restorer{};
710+
711+
DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(1);
712+
using Dispatcher = BlitterDispatcher<FamilyType>;
713+
714+
FlushStampTracker flushStamp(true);
715+
716+
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice, *osContext.get());
717+
EXPECT_TRUE(directSubmission.initialize(true, true));
718+
719+
auto initialCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
720+
721+
EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp));
722+
723+
EXPECT_EQ(initialCounterValue + 1, CpuIntrinsicsTests::sfenceCounter);
724+
725+
DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(2);
726+
727+
EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp));
728+
729+
EXPECT_EQ(initialCounterValue + 3, CpuIntrinsicsTests::sfenceCounter);
730+
}

shared/test/unit_test/utilities/cpuintrinsics.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2021 Intel Corporation
2+
* Copyright (C) 2020-2022 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -18,6 +18,7 @@ namespace CpuIntrinsicsTests {
1818
std::atomic<uintptr_t> lastClFlushedPtr(0u);
1919
std::atomic<uint32_t> clFlushCounter(0u);
2020
std::atomic<uint32_t> pauseCounter(0u);
21+
std::atomic<uint32_t> sfenceCounter(0u);
2122

2223
volatile uint32_t *pauseAddress = nullptr;
2324
uint32_t pauseValue = 0u;
@@ -34,6 +35,10 @@ void clFlush(void const *ptr) {
3435
CpuIntrinsicsTests::lastClFlushedPtr = reinterpret_cast<uintptr_t>(ptr);
3536
}
3637

38+
void sfence() {
39+
CpuIntrinsicsTests::sfenceCounter++;
40+
}
41+
3742
void pause() {
3843
CpuIntrinsicsTests::pauseCounter++;
3944
if (CpuIntrinsicsTests::pauseAddress != nullptr) {

shared/test/unit_test/utilities/cpuintrinsics_tests.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2021 Intel Corporation
2+
* Copyright (C) 2020-2022 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -15,6 +15,7 @@
1515
namespace CpuIntrinsicsTests {
1616
extern std::atomic<uintptr_t> lastClFlushedPtr;
1717
extern std::atomic<uint32_t> pauseCounter;
18+
extern std::atomic<uint32_t> sfenceCounter;
1819
} // namespace CpuIntrinsicsTests
1920

2021
TEST(CpuIntrinsicsTest, whenClFlushIsCalledThenExpectToPassPtrToSystemCall) {
@@ -29,3 +30,9 @@ TEST(CpuIntrinsicsTest, whenPauseCalledThenExpectToIncreaseCounter) {
2930
NEO::CpuIntrinsics::pause();
3031
EXPECT_EQ(oldCount + 1, CpuIntrinsicsTests::pauseCounter);
3132
}
33+
34+
TEST(CpuIntrinsicsTest, whenSfenceCalledThenExpectToIncreaseCounter) {
35+
uint32_t oldCount = CpuIntrinsicsTests::sfenceCounter.load();
36+
NEO::CpuIntrinsics::sfence();
37+
EXPECT_EQ(oldCount + 1, CpuIntrinsicsTests::sfenceCounter);
38+
}

0 commit comments

Comments
 (0)