Skip to content

Commit 5094c63

Browse files
pwilmaCompute-Runtime-Automation
authored andcommitted
Force resource locking on transfer calls
Add debug variables to force resource locking on memory transfer calls and to call makeResident() on mapVirtualAddress() call. Change-Id: Ifa78d951fcb81812b10a98252bd414124dec9c74
1 parent 3581bdb commit 5094c63

File tree

17 files changed

+280
-6
lines changed

17 files changed

+280
-6
lines changed

runtime/command_queue/cpu_data_transfer_handler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,11 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
128128
}
129129
break;
130130
case CL_COMMAND_READ_BUFFER:
131-
memcpy_s(transferProperties.ptr, transferProperties.size[0], ptrOffset(transferProperties.memObj->getCpuAddressForMemoryTransfer(), transferProperties.offset[0]), transferProperties.size[0]);
131+
memcpy_s(transferProperties.ptr, transferProperties.size[0], transferProperties.getCpuPtrForReadWrite(), transferProperties.size[0]);
132132
eventCompleted = true;
133133
break;
134134
case CL_COMMAND_WRITE_BUFFER:
135-
memcpy_s(ptrOffset(transferProperties.memObj->getCpuAddressForMemoryTransfer(), transferProperties.offset[0]), transferProperties.size[0], transferProperties.ptr, transferProperties.size[0]);
135+
memcpy_s(transferProperties.getCpuPtrForReadWrite(), transferProperties.size[0], transferProperties.ptr, transferProperties.size[0]);
136136
eventCompleted = true;
137137
break;
138138
case CL_COMMAND_MARKER:

runtime/command_queue/enqueue_read_buffer.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
5454
TransferProperties transferProperties(buffer, CL_COMMAND_READ_BUFFER, 0, true, &offset, &size, ptr);
5555
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
5656
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
57-
57+
if (DebugManager.flags.ForceResourceLockOnTransferCalls.get()) {
58+
if (transferProperties.lockedPtr != nullptr) {
59+
buffer->getMemoryManager()->unlockResource(buffer->getGraphicsAllocation());
60+
}
61+
}
5862
return retVal;
5963
}
6064
MultiDispatchInfo dispatchInfo;

runtime/command_queue/enqueue_write_buffer.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
5252
TransferProperties transferProperties(buffer, CL_COMMAND_WRITE_BUFFER, 0, true, &offset, &size, const_cast<void *>(ptr));
5353
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
5454
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
55+
if (DebugManager.flags.ForceResourceLockOnTransferCalls.get()) {
56+
if (transferProperties.lockedPtr != nullptr) {
57+
buffer->getMemoryManager()->unlockResource(buffer->getGraphicsAllocation());
58+
}
59+
}
5560

5661
return retVal;
5762
}
63+
5864
MultiDispatchInfo dispatchInfo;
5965
if (!isMemTransferNeeded) {
6066
NullSurface s;

runtime/helpers/properties_helper.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "runtime/helpers/properties_helper.h"
1010
#include "runtime/mem_obj/image.h"
1111
#include "runtime/mem_obj/mem_obj.h"
12+
#include "runtime/memory_manager/memory_manager.h"
1213

1314
namespace OCLRT {
1415
TransferProperties::TransferProperties(MemObj *memObj, cl_command_type cmdType, cl_map_flags mapFlags, bool blocking,
@@ -20,6 +21,11 @@ TransferProperties::TransferProperties(MemObj *memObj, cl_command_type cmdType,
2021
if (memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) {
2122
size[0] = *sizePtr;
2223
offset[0] = *offsetPtr;
24+
if (DebugManager.flags.ForceResourceLockOnTransferCalls.get()) {
25+
if ((false == MemoryPool::isSystemMemoryPool(memObj->getGraphicsAllocation()->getMemoryPool())) && (memObj->getMemoryManager() != nullptr)) {
26+
this->lockedPtr = memObj->getMemoryManager()->lockResource(memObj->getGraphicsAllocation());
27+
}
28+
}
2329
} else {
2430
size = {{sizePtr[0], sizePtr[1], sizePtr[2]}};
2531
offset = {{offsetPtr[0], offsetPtr[1], offsetPtr[2]}};
@@ -35,4 +41,9 @@ TransferProperties::TransferProperties(MemObj *memObj, cl_command_type cmdType,
3541
}
3642
}
3743
}
44+
45+
void *TransferProperties::getCpuPtrForReadWrite() {
46+
return ptrOffset(lockedPtr ? lockedPtr : memObj->getCpuAddressForMemoryTransfer(), offset[0]);
47+
}
48+
3849
} // namespace OCLRT

runtime/helpers/properties_helper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ struct TransferProperties {
5757
void *ptr = nullptr;
5858
uint32_t mipLevel = 0;
5959
uint32_t mipPtrOffset = 0;
60+
61+
void *lockedPtr = nullptr;
62+
void *getCpuPtrForReadWrite();
6063
};
6164

6265
struct MapInfo {

runtime/mem_obj/mem_obj.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ class MemObj : public BaseObject<_cl_mem> {
105105
size_t calculateMappedPtrLength(const MemObjSizeArray &size) const { return calculateOffsetForMapping(size); }
106106
cl_mem_object_type peekClMemObjType() const { return memObjectType; }
107107
size_t getOffset() const { return offset; }
108+
MemoryManager *getMemoryManager() const {
109+
return memoryManager;
110+
}
108111

109112
protected:
110113
void getOsSpecificMemObjectInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam);

runtime/memory_manager/os_agnostic_memory_manager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class OsAgnosticMemoryManager : public MemoryManager {
5454
void addAllocationToHostPtrManager(GraphicsAllocation *gfxAllocation) override;
5555
void removeAllocationFromHostPtrManager(GraphicsAllocation *gfxAllocation) override;
5656
void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override;
57-
void *lockResource(GraphicsAllocation *graphicsAllocation) override { return nullptr; };
57+
void *lockResource(GraphicsAllocation *graphicsAllocation) override { return ptrOffset(graphicsAllocation->getUnderlyingBuffer(), static_cast<size_t>(graphicsAllocation->allocationOffset)); };
5858
void unlockResource(GraphicsAllocation *graphicsAllocation) override{};
5959

6060
AllocationStatus populateOsHandles(OsHandleStorage &handleStorage) override;

runtime/os_interface/debug_variables_base.inl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ DECLARE_DEBUG_VARIABLE(bool, UseNewHeapAllocator, true, "Custom 4GB heap allocat
7373
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
7474
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
7575
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForBuffers, false, "When active all buffer allocations will not share memory with CPU.")
76+
DECLARE_DEBUG_VARIABLE(bool, ForceResourceLockOnTransferCalls, 0, "Forces resource locking on memory transfer calls")
77+
DECLARE_DEBUG_VARIABLE(bool, EnableMakeResidentOnMapGpuVa, 0, "Make allocations resident on call mapGpuVirtualAddress")
7678

7779
/*FEATURE FLAGS*/
7880
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")

runtime/os_interface/windows/wddm/wddm.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,10 @@ bool Wddm::mapGpuVirtualAddressImpl(Gmm *gmm, D3DKMT_HANDLE handle, void *cpuPtr
370370

371371
kmDafListener->notifyMapGpuVA(featureTable->ftrKmdDaf, adapter, device, handle, MapGPUVA.VirtualAddress, gdi->escape);
372372

373+
if (DebugManager.flags.EnableMakeResidentOnMapGpuVa.get()) {
374+
this->makeResident(&handle, 1, true, nullptr);
375+
}
376+
373377
if (gmm->isRenderCompressed && pageTableManager.get()) {
374378
return updateAuxTable(gpuPtr, gmm, true);
375379
}

unit_tests/command_queue/enqueue_read_buffer_tests.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,62 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenCommandQueueWhenEnqueueReadBufferIsCall
453453
EXPECT_TRUE(mockCmdQ->notifyEnqueueReadBufferCalled);
454454
}
455455

456+
HWTEST_F(EnqueueReadBufferTypeTest, givenEnqueueReadBufferCalledWhenLockedPtrInTransferPropertisIsAvailableThenItIsUnlocked) {
457+
DebugManagerStateRestore dbgRestore;
458+
DebugManager.flags.ForceResourceLockOnTransferCalls.set(true);
459+
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
460+
461+
ExecutionEnvironment executionEnvironment;
462+
MockMemoryManager memoryManager(false, true, executionEnvironment);
463+
MockContext ctx;
464+
cl_int retVal;
465+
ctx.setMemoryManager(&memoryManager);
466+
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pDevice, nullptr);
467+
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal));
468+
static_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible);
469+
void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer();
470+
471+
retVal = mockCmdQ->enqueueReadBuffer(buffer.get(),
472+
CL_TRUE,
473+
0,
474+
MemoryConstants::cacheLineSize,
475+
ptr,
476+
0,
477+
nullptr,
478+
nullptr);
479+
480+
EXPECT_EQ(CL_SUCCESS, retVal);
481+
EXPECT_EQ(1u, memoryManager.unlockResourceCalled);
482+
}
483+
484+
HWTEST_F(EnqueueReadBufferTypeTest, gicenEnqueueReadBufferCalledWhenLockedPtrInTransferPropertisIsNotAvailableThenItIsNotUnlocked) {
485+
DebugManagerStateRestore dbgRestore;
486+
DebugManager.flags.ForceResourceLockOnTransferCalls.set(true);
487+
DebugManager.flags.DoCpuCopyOnReadBuffer.set(true);
488+
489+
ExecutionEnvironment executionEnvironment;
490+
MockMemoryManager memoryManager(false, true, executionEnvironment);
491+
MockContext ctx;
492+
cl_int retVal;
493+
ctx.setMemoryManager(&memoryManager);
494+
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pDevice, nullptr);
495+
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal));
496+
static_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::System4KBPages);
497+
void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer();
498+
499+
retVal = mockCmdQ->enqueueReadBuffer(buffer.get(),
500+
CL_TRUE,
501+
0,
502+
MemoryConstants::cacheLineSize,
503+
ptr,
504+
0,
505+
nullptr,
506+
nullptr);
507+
508+
EXPECT_EQ(CL_SUCCESS, retVal);
509+
EXPECT_EQ(0u, memoryManager.unlockResourceCalled);
510+
}
511+
456512
using NegativeFailAllocationTest = Test<NegativeFailAllocationCommandEnqueueBaseFixture>;
457513

458514
HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadBufferWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {

0 commit comments

Comments
 (0)