Skip to content

Commit 815ae85

Browse files
Graphics Allocation: store task count per context id
Move definition of allocations list method to internal_allocation_storage.cpp Change-Id: I4c6038df8fd1b9335e8a74edbab33b78f9293d8f Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
1 parent ea2e634 commit 815ae85

32 files changed

+239
-160
lines changed

runtime/command_stream/command_stream_receiver.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ void CommandStreamReceiver::makeResident(GraphicsAllocation &gfxAllocation) {
6161
auto submissionTaskCount = this->taskCount + 1;
6262
if (gfxAllocation.residencyTaskCount[deviceIndex] < (int)submissionTaskCount) {
6363
this->getResidencyAllocations().push_back(&gfxAllocation);
64-
gfxAllocation.taskCount = submissionTaskCount;
64+
gfxAllocation.updateTaskCount(submissionTaskCount, deviceIndex);
6565
if (gfxAllocation.residencyTaskCount[deviceIndex] == ObjectNotResident) {
6666
this->totalMemoryUsed += gfxAllocation.getUnderlyingBufferSize();
6767
}
@@ -103,13 +103,13 @@ void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gf
103103
}
104104
}
105105

106-
void CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationType) {
106+
void CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) {
107107
auto address = getTagAddress();
108108
if (address && requiredTaskCount != ObjectNotUsed) {
109109
while (*address < requiredTaskCount)
110110
;
111111
}
112-
internalAllocationStorage->cleanAllocationList(requiredTaskCount, allocationType);
112+
internalAllocationStorage->cleanAllocationList(requiredTaskCount, allocationUsage);
113113
}
114114

115115
MemoryManager *CommandStreamReceiver::getMemoryManager() const {
@@ -359,7 +359,7 @@ bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surfa
359359
if (allocation == nullptr) {
360360
return false;
361361
}
362-
allocation->taskCount = Event::eventNotReady;
362+
allocation->updateTaskCount(Event::eventNotReady, deviceIndex);
363363
surface.setAllocation(allocation);
364364
internalAllocationStorage->storeAllocation(std::unique_ptr<GraphicsAllocation>(allocation), TEMPORARY_ALLOCATION);
365365
return true;

runtime/command_stream/command_stream_receiver.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class CommandStreamReceiver {
8282

8383
virtual GmmPageTableMngr *createPageTableManager() { return nullptr; }
8484

85-
MOCKABLE_VIRTUAL void waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationType);
85+
MOCKABLE_VIRTUAL void waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage);
8686

8787
LinearStream &getCS(size_t minRequiredSize = 1024u);
8888
OSInterface *getOSInterface() { return osInterface; };
@@ -149,6 +149,7 @@ class CommandStreamReceiver {
149149
size_t defaultSshSize;
150150

151151
void setDeviceIndex(uint32_t deviceIndex) { this->deviceIndex = deviceIndex; }
152+
uint32_t getDeviceIndex() const { return this->deviceIndex; }
152153
AllocationsList &getTemporaryAllocations();
153154
AllocationsList &getAllocationsForReuse();
154155
InternalAllocationStorage *getInternalAllocationStorage() const { return internalAllocationStorage.get(); }

runtime/command_stream/command_stream_receiver_hw.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
248248

249249
if (requiredScratchSize && (!scratchAllocation || scratchAllocation->getUnderlyingBufferSize() < requiredScratchSizeInBytes)) {
250250
if (scratchAllocation) {
251-
scratchAllocation->taskCount = this->taskCount;
251+
scratchAllocation->updateTaskCount(this->taskCount, this->deviceIndex);
252252
internalAllocationStorage->storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);
253253
}
254254
createScratchSpaceAllocation(requiredScratchSizeInBytes);

runtime/mem_obj/mem_obj.cpp

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "runtime/context/context.h"
99
#include "runtime/command_queue/command_queue.h"
10+
#include "runtime/command_stream/command_stream_receiver.h"
1011
#include "runtime/device/device.h"
1112
#include "runtime/mem_obj/mem_obj.h"
1213
#include "runtime/memory_manager/deferred_deleter.h"
@@ -15,7 +16,6 @@
1516
#include "runtime/gmm_helper/gmm.h"
1617
#include "runtime/helpers/aligned_memory.h"
1718
#include "runtime/helpers/get_info.h"
18-
#include "runtime/command_stream/command_stream_receiver.h"
1919
#include <algorithm>
2020

2121
namespace OCLRT {
@@ -64,7 +64,7 @@ MemObj::~MemObj() {
6464
if (!doAsyncDestrucions) {
6565
needWait = true;
6666
}
67-
if (needWait && graphicsAllocation->taskCount != ObjectNotUsed) {
67+
if (needWait && graphicsAllocation->peekWasUsed()) {
6868
waitForCsrCompletion();
6969
}
7070
destroyGraphicsAllocation(graphicsAllocation, doAsyncDestrucions);
@@ -288,22 +288,15 @@ void MemObj::releaseAllocatedMapPtr() {
288288
}
289289

290290
void MemObj::waitForCsrCompletion() {
291-
if (graphicsAllocation) {
292-
memoryManager->getCommandStreamReceiver(0)->waitForCompletionWithTimeout(false, TimeoutControls::maxTimeout, graphicsAllocation->taskCount);
293-
}
291+
memoryManager->getCommandStreamReceiver(0)->waitForCompletionWithTimeout(false, TimeoutControls::maxTimeout, graphicsAllocation->getTaskCount(0u));
294292
}
295293

296294
void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy) {
297-
if (asyncDestroy && allocation->taskCount != ObjectNotUsed) {
298-
auto commandStreamReceiver = memoryManager->getCommandStreamReceiver(0);
299-
auto currentTag = *commandStreamReceiver->getTagAddress();
300-
if (currentTag < allocation->taskCount) {
301-
auto storageForAllocation = commandStreamReceiver->getInternalAllocationStorage();
302-
storageForAllocation->storeAllocation(std::unique_ptr<GraphicsAllocation>(allocation), TEMPORARY_ALLOCATION);
303-
return;
304-
}
295+
if (asyncDestroy) {
296+
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(allocation);
297+
} else {
298+
memoryManager->freeGraphicsMemory(allocation);
305299
}
306-
memoryManager->freeGraphicsMemory(allocation);
307300
}
308301

309302
bool MemObj::checkIfMemoryTransferIsRequired(size_t offsetInMemObjest, size_t offsetInHostPtr, const void *hostPtr, cl_command_type cmdType) {

runtime/memory_manager/allocations_list.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111

1212
namespace OCLRT {
1313
class GraphicsAllocation;
14+
class CommandStreamReceiver;
1415

1516
class AllocationsList : public IDList<GraphicsAllocation, true, true> {
1617
public:
17-
std::unique_ptr<GraphicsAllocation> detachAllocation(size_t requiredMinimalSize, volatile uint32_t *csrTagAddress, bool internalAllocationRequired);
18+
std::unique_ptr<GraphicsAllocation> detachAllocation(size_t requiredMinimalSize, CommandStreamReceiver &commandStreamReceiver, bool internalAllocationRequired);
1819

1920
private:
2021
GraphicsAllocation *detachAllocationImpl(GraphicsAllocation *, void *);

runtime/memory_manager/graphics_allocation.cpp

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,54 @@
55
*
66
*/
77

8-
#include "graphics_allocation.h"
98
#include "runtime/helpers/aligned_memory.h"
9+
#include "runtime/memory_manager/graphics_allocation.h"
1010

11-
bool OCLRT::GraphicsAllocation::isL3Capable() {
11+
namespace OCLRT {
12+
bool GraphicsAllocation::isL3Capable() {
1213
auto ptr = ptrOffset(cpuPtr, static_cast<size_t>(this->allocationOffset));
1314
if (alignUp(ptr, MemoryConstants::cacheLineSize) == ptr && alignUp(this->size, MemoryConstants::cacheLineSize) == this->size) {
1415
return true;
1516
}
1617
return false;
1718
}
19+
GraphicsAllocation::GraphicsAllocation(void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn) : gpuBaseAddress(baseAddress),
20+
size(sizeIn),
21+
cpuPtr(cpuPtrIn),
22+
gpuAddress(gpuAddress),
23+
taskCounts(maxOsContextCount) {
24+
initTaskCounts();
25+
}
26+
27+
GraphicsAllocation::GraphicsAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn) : size(sizeIn),
28+
cpuPtr(cpuPtrIn),
29+
gpuAddress(castToUint64(cpuPtrIn)),
30+
sharedHandle(sharedHandleIn),
31+
taskCounts(maxOsContextCount) {
32+
initTaskCounts();
33+
}
34+
GraphicsAllocation::~GraphicsAllocation() = default;
35+
36+
bool GraphicsAllocation::peekWasUsed() const { return registeredContextsNum > 0; }
37+
38+
void GraphicsAllocation::updateTaskCount(uint32_t newTaskCount, uint32_t contextId) {
39+
UNRECOVERABLE_IF(contextId >= taskCounts.size());
40+
if (taskCounts[contextId] == ObjectNotUsed) {
41+
registeredContextsNum++;
42+
}
43+
if (newTaskCount == ObjectNotUsed) {
44+
registeredContextsNum--;
45+
}
46+
taskCounts[contextId] = newTaskCount;
47+
}
48+
49+
uint32_t GraphicsAllocation::getTaskCount(uint32_t contextId) const {
50+
UNRECOVERABLE_IF(contextId >= taskCounts.size());
51+
return taskCounts[contextId];
52+
}
53+
void GraphicsAllocation::initTaskCounts() {
54+
for (auto i = 0u; i < taskCounts.size(); i++) {
55+
taskCounts[i] = ObjectNotUsed;
56+
}
57+
}
58+
} // namespace OCLRT

runtime/memory_manager/graphics_allocation.h

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "runtime/memory_manager/memory_pool.h"
1818
#include "runtime/memory_manager/residency_container.h"
1919
#include "runtime/utilities/idlist.h"
20+
#include "runtime/utilities/stackvec.h"
2021

2122
namespace OCLRT {
2223

@@ -34,19 +35,7 @@ const uint32_t ObjectNotUsed = (uint32_t)-1;
3435
class Gmm;
3536

3637
class GraphicsAllocation : public IDNode<GraphicsAllocation> {
37-
protected:
38-
size_t size = 0;
39-
void *cpuPtr = nullptr;
40-
uint64_t gpuAddress = 0;
41-
bool coherent = false;
42-
osHandle sharedHandle;
43-
bool locked = false;
44-
uint32_t reuseCount = 0; // GraphicsAllocation can be reused by shared resources
45-
bool evictable = true;
46-
MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
47-
4838
public:
49-
uint32_t taskCount = ObjectNotUsed;
5039
OsHandleStorage fragmentsStorage;
5140
bool is32BitAllocation = false;
5241
uint64_t gpuBaseAddress = 0;
@@ -81,20 +70,13 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
8170
SHARED_RESOURCE,
8271
};
8372

84-
virtual ~GraphicsAllocation() = default;
73+
virtual ~GraphicsAllocation();
8574
GraphicsAllocation &operator=(const GraphicsAllocation &) = delete;
8675
GraphicsAllocation(const GraphicsAllocation &) = delete;
8776

88-
GraphicsAllocation(void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn) : size(sizeIn),
89-
cpuPtr(cpuPtrIn),
90-
gpuAddress(gpuAddress),
91-
sharedHandle(Sharing::nonSharedResource),
92-
gpuBaseAddress(baseAddress) {}
77+
GraphicsAllocation(void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn);
9378

94-
GraphicsAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn) : size(sizeIn),
95-
cpuPtr(cpuPtrIn),
96-
gpuAddress(castToUint64(cpuPtrIn)),
97-
sharedHandle(sharedHandleIn) {}
79+
GraphicsAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn);
9880

9981
void *getUnderlyingBuffer() const { return cpuPtr; }
10082
void setCpuPtrAndGpuAddress(void *cpuPtr, uint64_t gpuAddress) {
@@ -141,14 +123,30 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
141123
MemoryPool::Type getMemoryPool() {
142124
return memoryPool;
143125
}
126+
bool peekWasUsed() const;
127+
void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
128+
uint32_t getTaskCount(uint32_t contextId) const;
144129

145130
protected:
131+
void initTaskCounts();
132+
146133
//this variable can only be modified from SubmissionAggregator
147134
friend class SubmissionAggregator;
135+
size_t size = 0;
136+
void *cpuPtr = nullptr;
137+
uint64_t gpuAddress = 0;
138+
bool coherent = false;
139+
osHandle sharedHandle = Sharing::nonSharedResource;
140+
bool locked = false;
141+
uint32_t reuseCount = 0; // GraphicsAllocation can be reused by shared resources
142+
bool evictable = true;
143+
MemoryPool::Type memoryPool = MemoryPool::MemoryNull;
148144
uint32_t inspectionId = 0;
149145
AllocationType allocationType = AllocationType::UNKNOWN;
150146
bool aubWritable = true;
151147
bool allocDumpable = false;
152148
bool memObjectsAllocationWithWritableFlags = false;
149+
StackVec<uint32_t, maxOsContextCount> taskCounts;
150+
std::atomic<uint32_t> registeredContextsNum{0};
153151
};
154152
} // namespace OCLRT

runtime/memory_manager/internal_allocation_storage.cpp

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
#include "runtime/memory_manager/memory_manager.h"
1111

1212
namespace OCLRT {
13-
InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver) : commandStreamReceiver(commandStreamReceiver){};
13+
InternalAllocationStorage::InternalAllocationStorage(CommandStreamReceiver &commandStreamReceiver) : commandStreamReceiver(commandStreamReceiver), contextId(commandStreamReceiver.getDeviceIndex()){};
1414
void InternalAllocationStorage::storeAllocation(std::unique_ptr<GraphicsAllocation> gfxAllocation, uint32_t allocationUsage) {
15-
uint32_t taskCount = gfxAllocation->taskCount;
15+
uint32_t taskCount = gfxAllocation->getTaskCount(contextId);
1616

1717
if (allocationUsage == REUSABLE_ALLOCATION) {
1818
taskCount = commandStreamReceiver.peekTaskCount();
@@ -28,7 +28,7 @@ void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptr<Gra
2828
}
2929
}
3030
auto &allocationsList = (allocationUsage == TEMPORARY_ALLOCATION) ? temporaryAllocations : allocationsForReuse;
31-
gfxAllocation->taskCount = taskCount;
31+
gfxAllocation->updateTaskCount(taskCount, contextId);
3232
allocationsList.pushTailOne(*gfxAllocation.release());
3333
}
3434

@@ -43,7 +43,7 @@ void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, Allo
4343
IDList<GraphicsAllocation, false, true> allocationsLeft;
4444
while (curr != nullptr) {
4545
auto *next = curr->next;
46-
if (curr->taskCount <= waitTaskCount) {
46+
if (curr->getTaskCount(contextId) <= waitTaskCount) {
4747
memoryManager->freeGraphicsMemory(curr);
4848
} else {
4949
allocationsLeft.pushTailOne(*curr);
@@ -57,8 +57,41 @@ void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, Allo
5757
}
5858

5959
std::unique_ptr<GraphicsAllocation> InternalAllocationStorage::obtainReusableAllocation(size_t requiredSize, bool internalAllocation) {
60-
auto allocation = allocationsForReuse.detachAllocation(requiredSize, commandStreamReceiver.getTagAddress(), internalAllocation);
60+
auto allocation = allocationsForReuse.detachAllocation(requiredSize, commandStreamReceiver, internalAllocation);
6161
return allocation;
6262
}
6363

64+
struct ReusableAllocationRequirements {
65+
size_t requiredMinimalSize;
66+
volatile uint32_t *csrTagAddress;
67+
bool internalAllocationRequired;
68+
uint32_t contextId;
69+
};
70+
71+
std::unique_ptr<GraphicsAllocation> AllocationsList::detachAllocation(size_t requiredMinimalSize, CommandStreamReceiver &commandStreamReceiver, bool internalAllocationRequired) {
72+
ReusableAllocationRequirements req;
73+
req.requiredMinimalSize = requiredMinimalSize;
74+
req.csrTagAddress = commandStreamReceiver.getTagAddress();
75+
req.internalAllocationRequired = internalAllocationRequired;
76+
req.contextId = commandStreamReceiver.getDeviceIndex();
77+
GraphicsAllocation *a = nullptr;
78+
GraphicsAllocation *retAlloc = processLocked<AllocationsList, &AllocationsList::detachAllocationImpl>(a, static_cast<void *>(&req));
79+
return std::unique_ptr<GraphicsAllocation>(retAlloc);
80+
}
81+
82+
GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, void *data) {
83+
ReusableAllocationRequirements *req = static_cast<ReusableAllocationRequirements *>(data);
84+
auto *curr = head;
85+
while (curr != nullptr) {
86+
auto currentTagValue = *req->csrTagAddress;
87+
if ((req->internalAllocationRequired == curr->is32BitAllocation) &&
88+
(curr->getUnderlyingBufferSize() >= req->requiredMinimalSize) &&
89+
((currentTagValue > curr->getTaskCount(req->contextId)) || (curr->getTaskCount(req->contextId) == 0))) {
90+
return removeOneImpl(curr, nullptr);
91+
}
92+
curr = curr->next;
93+
}
94+
return nullptr;
95+
}
96+
6497
} // namespace OCLRT

runtime/memory_manager/internal_allocation_storage.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class InternalAllocationStorage {
2727
protected:
2828
void freeAllocationsList(uint32_t waitTaskCount, AllocationsList &allocationsList);
2929
CommandStreamReceiver &commandStreamReceiver;
30+
const uint32_t contextId;
3031

3132
AllocationsList temporaryAllocations;
3233
AllocationsList allocationsForReuse;

runtime/memory_manager/memory_manager.cpp

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -27,37 +27,6 @@
2727
namespace OCLRT {
2828
constexpr size_t TagCount = 512;
2929

30-
struct ReusableAllocationRequirements {
31-
size_t requiredMinimalSize;
32-
volatile uint32_t *csrTagAddress;
33-
bool internalAllocationRequired;
34-
};
35-
36-
std::unique_ptr<GraphicsAllocation> AllocationsList::detachAllocation(size_t requiredMinimalSize, volatile uint32_t *csrTagAddress, bool internalAllocationRequired) {
37-
ReusableAllocationRequirements req;
38-
req.requiredMinimalSize = requiredMinimalSize;
39-
req.csrTagAddress = csrTagAddress;
40-
req.internalAllocationRequired = internalAllocationRequired;
41-
GraphicsAllocation *a = nullptr;
42-
GraphicsAllocation *retAlloc = processLocked<AllocationsList, &AllocationsList::detachAllocationImpl>(a, static_cast<void *>(&req));
43-
return std::unique_ptr<GraphicsAllocation>(retAlloc);
44-
}
45-
46-
GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, void *data) {
47-
ReusableAllocationRequirements *req = static_cast<ReusableAllocationRequirements *>(data);
48-
auto *curr = head;
49-
while (curr != nullptr) {
50-
auto currentTagValue = req->csrTagAddress ? *req->csrTagAddress : -1;
51-
if ((req->internalAllocationRequired == curr->is32BitAllocation) &&
52-
(curr->getUnderlyingBufferSize() >= req->requiredMinimalSize) &&
53-
((currentTagValue > curr->taskCount) || (curr->taskCount == 0))) {
54-
return removeOneImpl(curr, nullptr);
55-
}
56-
curr = curr->next;
57-
}
58-
return nullptr;
59-
}
60-
6130
MemoryManager::MemoryManager(bool enable64kbpages, bool enableLocalMemory,
6231
ExecutionEnvironment &executionEnvironment) : allocator32Bit(nullptr), enable64kbpages(enable64kbpages),
6332
localMemorySupported(enableLocalMemory),
@@ -198,7 +167,7 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation) {
198167
//if not in use destroy in place
199168
//if in use pass to temporary allocation list that is cleaned on blocking calls
200169
void MemoryManager::checkGpuUsageAndDestroyGraphicsAllocations(GraphicsAllocation *gfxAllocation) {
201-
if (gfxAllocation->taskCount == ObjectNotUsed || gfxAllocation->taskCount <= *getCommandStreamReceiver(0)->getTagAddress()) {
170+
if (!gfxAllocation->peekWasUsed() || gfxAllocation->getTaskCount(0u) <= *getCommandStreamReceiver(0)->getTagAddress()) {
202171
freeGraphicsMemory(gfxAllocation);
203172
} else {
204173
getCommandStreamReceiver(0)->getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>(gfxAllocation), TEMPORARY_ALLOCATION);

0 commit comments

Comments
 (0)