Skip to content

Commit bae8735

Browse files
Propagate exec buffer error to L0 API level on Xe HPC
This change makes that drm file is opened in nonblocking mode for prelim kernels. In such case when calling exec buffer ioctl and get EAGAIN (aka EWOULDBLOCK) we may return error to API level Related-To: NEO-7144 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com> Source: 9816f81
1 parent 06a2cc6 commit bae8735

26 files changed

+312
-40
lines changed

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,6 +1117,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::handleSubmissionAndCompletionResults(
11171117
if (submitRet == NEO::SubmissionStatus::OUT_OF_MEMORY) {
11181118
completionRet = ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
11191119
}
1120+
if (submitRet == NEO::SubmissionStatus::OUT_OF_HOST_MEMORY) {
1121+
completionRet = ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
1122+
}
11201123
}
11211124

11221125
return completionRet;

level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,17 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
9292

9393
NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr, bool isCooperative) override {
9494
residencyContainerSnapshot = residencyContainer;
95+
if (submitBatchBufferReturnValue.has_value()) {
96+
return *submitBatchBufferReturnValue;
97+
}
9598
return BaseClass::submitBatchBuffer(offset, residencyContainer, endingCmdPtr, isCooperative);
9699
}
97100

98101
uint32_t synchronizedCalled = 0;
99102
NEO::ResidencyContainer residencyContainerSnapshot;
100103
ze_result_t synchronizeReturnValue{ZE_RESULT_SUCCESS};
101104
std::optional<NEO::WaitStatus> reserveLinearStreamSizeReturnValue{};
105+
std::optional<NEO::SubmissionStatus> submitBatchBufferReturnValue{};
102106
};
103107

104108
struct Deleter {
@@ -108,4 +112,4 @@ struct Deleter {
108112
};
109113

110114
} // namespace ult
111-
} // namespace L0
115+
} // namespace L0
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#
2+
# Copyright (C) 2022 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: MIT
5+
#
6+
7+
if(UNIX)
8+
target_sources(${TARGET_NAME} PRIVATE
9+
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
10+
${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_linux_tests.cpp
11+
)
12+
endif()
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright (C) 2022 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#include "shared/test/common/helpers/ult_hw_config.h"
9+
#include "shared/test/common/helpers/variable_backup.h"
10+
#include "shared/test/common/libult/create_command_stream.h"
11+
#include "shared/test/common/libult/linux/drm_mock.h"
12+
#include "shared/test/common/test_macros/hw_test.h"
13+
14+
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
15+
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
16+
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
17+
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
18+
19+
namespace L0 {
20+
namespace ult {
21+
22+
struct CommandQueueLinuxTests : public Test<DeviceFixture> {
23+
24+
void SetUp() override {
25+
VariableBackup<UltHwConfig> backup(&ultHwConfig);
26+
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
27+
ultHwConfig.useHwCsr = true;
28+
ultHwConfig.forceOsAgnosticMemoryManager = false;
29+
auto *executionEnvironment = new NEO::ExecutionEnvironment();
30+
prepareDeviceEnvironments(*executionEnvironment);
31+
executionEnvironment->initializeMemoryManager();
32+
setupWithExecutionEnvironment(*executionEnvironment);
33+
}
34+
};
35+
36+
HWTEST2_F(CommandQueueLinuxTests, givenExecBufferErrorOnXeHpcWhenExecutingCommandListsThenOutOfHostMemoryIsReturned, IsXeHpcCore) {
37+
auto drm = neoDevice->getRootDeviceEnvironment().osInterface->getDriverModel()->as<DrmMock>();
38+
39+
drm->execBufferResult = -1;
40+
drm->baseErrno = false;
41+
drm->errnoRetVal = EWOULDBLOCK;
42+
const ze_command_queue_desc_t desc = {};
43+
ze_result_t returnValue;
44+
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
45+
device,
46+
neoDevice->getDefaultEngine().commandStreamReceiver,
47+
&desc,
48+
false,
49+
false,
50+
returnValue));
51+
52+
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
53+
Mock<Kernel> kernel;
54+
kernel.immutableData.isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
55+
{device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()}));
56+
kernel.immutableData.device = device;
57+
58+
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
59+
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
60+
ASSERT_NE(nullptr, commandList);
61+
62+
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
63+
CmdListKernelLaunchParams launchParams = {};
64+
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
65+
66+
ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()};
67+
68+
returnValue = commandQueue->executeCommandLists(1, cmdListHandles, nullptr, false);
69+
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, returnValue);
70+
commandQueue->destroy();
71+
neoDevice->getMemoryManager()->freeGraphicsMemory(kernel.immutableData.isaGraphicsAllocation.release());
72+
}
73+
} // namespace ult
74+
} // namespace L0

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,30 @@ HWTEST2_F(CommandQueueCreate, givenLogicalStateHelperAndImmediateCmdListWhenExec
357357
commandQueue->destroy();
358358
}
359359

360+
HWTEST2_F(CommandQueueCreate, givenOutOfHostMemoryErrorFromSubmitBatchBufferWhenExecutingCommandListsThenOutOfHostMemoryIsReturned, IsAtLeastSkl) {
361+
const ze_command_queue_desc_t desc = {};
362+
auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc);
363+
commandQueue->initialize(false, false);
364+
commandQueue->submitBatchBufferReturnValue = NEO::SubmissionStatus::OUT_OF_HOST_MEMORY;
365+
366+
Mock<Kernel> kernel;
367+
kernel.immutableData.device = device;
368+
369+
ze_result_t returnValue;
370+
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
371+
ASSERT_NE(nullptr, commandList);
372+
373+
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
374+
CmdListKernelLaunchParams launchParams = {};
375+
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
376+
377+
ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()};
378+
379+
const auto result = commandQueue->executeCommandLists(1, cmdListHandles, nullptr, false);
380+
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, result);
381+
commandQueue->destroy();
382+
}
383+
360384
HWTEST2_F(CommandQueueCreate, givenGpuHangInReservingLinearStreamWhenExecutingCommandListsThenDeviceLostIsReturned, IsSKL) {
361385
const ze_command_queue_desc_t desc = {};
362386
MockCommandQueueHw<gfxCoreFamily> commandQueue(device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc);

shared/source/command_stream/submission_status.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ enum class SubmissionStatus : uint32_t {
1414
SUCCESS = 0,
1515
FAILED,
1616
OUT_OF_MEMORY,
17+
OUT_OF_HOST_MEMORY,
1718
UNSUPPORTED,
1819
DEVICE_UNINITIALIZED,
1920
};

shared/source/os_interface/hw_info_config.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ class HwInfoConfig {
139139
virtual bool isPrefetcherDisablingInDirectSubmissionRequired() const = 0;
140140
virtual bool isStatefulAddressingModeSupported() const = 0;
141141
virtual bool isPlatformQuerySupported() const = 0;
142+
virtual bool isNonBlockingGpuSubmissionSupported() const = 0;
142143

143144
virtual bool getFrontEndPropertyScratchSizeSupport() const = 0;
144145
virtual bool getFrontEndPropertyPrivateScratchSizeSupport() const = 0;
@@ -275,6 +276,7 @@ class HwInfoConfigHw : public HwInfoConfig {
275276
bool isPrefetcherDisablingInDirectSubmissionRequired() const override;
276277
bool isStatefulAddressingModeSupported() const override;
277278
bool isPlatformQuerySupported() const override;
279+
bool isNonBlockingGpuSubmissionSupported() const override;
278280

279281
bool getFrontEndPropertyScratchSizeSupport() const override;
280282
bool getFrontEndPropertyPrivateScratchSizeSupport() const override;

shared/source/os_interface/hw_info_config.inl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,11 @@ bool HwInfoConfigHw<gfxProduct>::isPlatformQuerySupported() const {
501501
return false;
502502
}
503503

504+
template <PRODUCT_FAMILY gfxProduct>
505+
bool HwInfoConfigHw<gfxProduct>::isNonBlockingGpuSubmissionSupported() const {
506+
return false;
507+
}
508+
504509
template <PRODUCT_FAMILY gfxProduct>
505510
void HwInfoConfigHw<gfxProduct>::fillScmPropertiesSupportStructureBase(StateComputeModePropertiesSupport &propertiesSupport) {
506511
propertiesSupport.coherencyRequired = getScmPropertyCoherencyRequiredSupport();

shared/source/os_interface/linux/drm_command_stream_bdw_and_later.inl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ SubmissionStatus DrmCommandStreamReceiver<GfxFamily>::flushInternal(const BatchB
2020
int ret = this->exec(batchBuffer, 0u, static_cast<const OsContextLinux *>(osContext)->getDrmContextIds()[0], 0);
2121

2222
if (ret) {
23+
if (ret == EWOULDBLOCK) {
24+
return SubmissionStatus::OUT_OF_HOST_MEMORY;
25+
}
2326
return SubmissionStatus::FAILED;
2427
}
2528
return SubmissionStatus::SUCCESS;

shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ SubmissionStatus DrmCommandStreamReceiver<GfxFamily>::flushInternal(const BatchB
3939

4040
int ret = this->exec(batchBuffer, tileIterator, drmContextIds[contextIndex], contextIndex);
4141
if (ret) {
42+
if (ret == EWOULDBLOCK) {
43+
return SubmissionStatus::OUT_OF_HOST_MEMORY;
44+
}
4245
return SubmissionStatus::FAILED;
4346
}
4447

0 commit comments

Comments
 (0)