Skip to content

Commit d900bdf

Browse files
[33/n] Internal 4GB allocator.
- Move indirect heap to internal allocator domain. - Add logic in getIndirectHeap to allocate with proper API depending on heap type - Add State base Address programming, reflecting that now Indirect Object Heap is placed in 4GB domain. - For AddPatchInfoCommentsForAUBDump mode , keep all heaps in non 4GB mode. Change-Id: I6862f6a249e444d0d6cfe7e499a10d43f284553e
1 parent 81362d5 commit d900bdf

15 files changed

+104
-46
lines changed

runtime/command_queue/command_queue.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -635,15 +635,24 @@ void CommandQueue::allocateHeapMemory(IndirectHeap::Type heapType,
635635
auto memoryManager = device->getMemoryManager();
636636
size_t reservedSize = 0;
637637
auto finalHeapSize = defaultHeapSize;
638+
bool requireInternalHeap = IndirectHeap::INDIRECT_OBJECT == heapType ? true : false;
639+
640+
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
641+
requireInternalHeap = false;
642+
}
638643

639644
minRequiredSize += reservedSize;
640645

641646
finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize);
642647

643-
auto heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize, false).release();
648+
auto heapMemory = memoryManager->obtainReusableAllocation(finalHeapSize, requireInternalHeap).release();
644649

645650
if (!heapMemory) {
646-
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
651+
if (requireInternalHeap) {
652+
heapMemory = memoryManager->createInternalGraphicsAllocation(nullptr, finalHeapSize);
653+
} else {
654+
heapMemory = memoryManager->allocateGraphicsMemory(finalHeapSize, MemoryConstants::pageSize);
655+
}
647656
} else {
648657
finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize);
649658
}
@@ -659,7 +668,7 @@ void CommandQueue::allocateHeapMemory(IndirectHeap::Type heapType,
659668
indirectHeap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize);
660669
indirectHeap->replaceGraphicsAllocation(heapMemory);
661670
} else {
662-
indirectHeap = new IndirectHeap(heapMemory);
671+
indirectHeap = new IndirectHeap(heapMemory, requireInternalHeap);
663672
indirectHeap->overrideMaxSize(finalHeapSize);
664673
}
665674
}

runtime/helpers/state_base_address.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -25,15 +25,16 @@
2525

2626
namespace OCLRT {
2727

28+
class IndirectHeap;
2829
class LinearStream;
2930

3031
template <typename GfxFamily>
3132
struct StateBaseAddressHelper {
3233
static void programStateBaseAddress(
3334
LinearStream &commandStream,
34-
const LinearStream &dsh,
35-
const LinearStream &ioh,
36-
const LinearStream &ssh,
35+
const IndirectHeap &dsh,
36+
const IndirectHeap &ioh,
37+
const IndirectHeap &ssh,
3738
uint64_t generalStateBase,
3839
uint32_t statelessMocsIndex,
3940
uint64_t internalHeapBase);

runtime/helpers/state_base_address.inl

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -21,7 +21,7 @@
2121
*/
2222

2323
#include "hw_cmds.h"
24-
#include "runtime/command_stream/linear_stream.h"
24+
#include "runtime/indirect_heap/indirect_heap.h"
2525
#include "runtime/helpers/cache_policy.h"
2626
#include "runtime/gmm_helper/gmm_helper.h"
2727
#include "runtime/memory_manager/memory_constants.h"
@@ -30,9 +30,9 @@ namespace OCLRT {
3030
template <typename GfxFamily>
3131
void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
3232
LinearStream &commandStream,
33-
const LinearStream &dsh,
34-
const LinearStream &ioh,
35-
const LinearStream &ssh,
33+
const IndirectHeap &dsh,
34+
const IndirectHeap &ioh,
35+
const IndirectHeap &ssh,
3636
uint64_t generalStateBase,
3737
uint32_t statelessMocsIndex,
3838
uint64_t internalHeapBase) {
@@ -51,7 +51,6 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
5151
// GSH must be set to 0 for stateless
5252
pCmd->setGeneralStateBaseAddress(generalStateBase);
5353
pCmd->setSurfaceStateBaseAddress(reinterpret_cast<uintptr_t>(ssh.getCpuBase()));
54-
pCmd->setIndirectObjectBaseAddress(reinterpret_cast<uintptr_t>(ioh.getCpuBase()));
5554
pCmd->setInstructionBaseAddress(internalHeapBase);
5655

5756
pCmd->setDynamicStateBufferSizeModifyEnable(true);
@@ -61,7 +60,9 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
6160

6261
pCmd->setDynamicStateBufferSize(static_cast<uint32_t>((dsh.getMaxAvailableSpace() + MemoryConstants::pageMask) / MemoryConstants::pageSize));
6362
pCmd->setGeneralStateBufferSize(static_cast<uint32_t>(-1));
64-
pCmd->setIndirectObjectBufferSize(static_cast<uint32_t>((ioh.getMaxAvailableSpace() + MemoryConstants::pageMask) / MemoryConstants::pageSize));
63+
64+
pCmd->setIndirectObjectBaseAddress(ioh.getHeapGpuBase());
65+
pCmd->setIndirectObjectBufferSize(ioh.getHeapSizeInPages());
6566

6667
pCmd->setInstructionBufferSize(MemoryConstants::sizeOf4GBinPageEntities);
6768

runtime/indirect_heap/indirect_heap.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "runtime/helpers/aligned_memory.h"
2626
#include "runtime/helpers/ptr_math.h"
2727
#include "runtime/helpers/basic_math.h"
28+
#include "runtime/memory_manager/memory_constants.h"
2829

2930
namespace OCLRT {
3031
class GraphicsAllocation;
@@ -53,8 +54,9 @@ class IndirectHeap : public LinearStream {
5354
IndirectHeap &operator=(const IndirectHeap &) = delete;
5455

5556
void align(size_t alignment);
56-
uint64_t getHeapGpuStartOffset();
57-
uint64_t getHeapGpuBase();
57+
uint64_t getHeapGpuStartOffset() const;
58+
uint64_t getHeapGpuBase() const;
59+
uint32_t getHeapSizeInPages() const;
5860

5961
protected:
6062
bool canBeUtilizedAs4GbHeap = false;
@@ -64,18 +66,25 @@ inline void IndirectHeap::align(size_t alignment) {
6466
auto address = alignUp(ptrOffset(buffer, sizeUsed), alignment);
6567
sizeUsed = ptrDiff(address, buffer);
6668
}
67-
inline uint64_t IndirectHeap::getHeapGpuStartOffset() {
69+
inline uint64_t IndirectHeap::getHeapGpuStartOffset() const {
6870
if (this->canBeUtilizedAs4GbHeap) {
6971
return this->graphicsAllocation->getGpuAddressToPatch();
7072
} else {
7173
return 0llu;
7274
}
7375
}
74-
inline uint64_t IndirectHeap::getHeapGpuBase() {
76+
inline uint64_t IndirectHeap::getHeapGpuBase() const {
7577
if (this->canBeUtilizedAs4GbHeap) {
7678
return this->graphicsAllocation->gpuBaseAddress;
7779
} else {
7880
return this->graphicsAllocation->getGpuAddress();
7981
}
8082
}
83+
inline uint32_t IndirectHeap::getHeapSizeInPages() const {
84+
if (this->canBeUtilizedAs4GbHeap) {
85+
return MemoryConstants::sizeOf4GBinPageEntities;
86+
} else {
87+
return (static_cast<uint32_t>(getMaxAvailableSpace()) + MemoryConstants::pageMask) / MemoryConstants::pageSize;
88+
}
89+
}
8190
}

unit_tests/aub_tests/command_queue/enqueue_copy_buffer_rect_aub_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -60,7 +60,7 @@ HWTEST_P(AUBCopyBufferRect, simple) {
6060
static const size_t rowPitch = 20;
6161
static const size_t slicePitch = rowPitch * rowPitch;
6262
static const size_t elementCount = slicePitch * rowPitch;
63-
MockContext context;
63+
MockContext context(this->pDevice);
6464

6565
cl_uchar *srcMemory = new uint8_t[elementCount + 8];
6666
cl_uchar *dstMemory = new uint8_t[elementCount + 8];

unit_tests/aub_tests/command_queue/enqueue_read_buffer_aub_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ struct ReadBufferHw
4848
typedef ReadBufferHw AUBReadBuffer;
4949

5050
HWTEST_P(AUBReadBuffer, simple) {
51-
MockContext context;
51+
MockContext context(this->pDevice);
5252

5353
cl_float srcMemory[] = {1.0f, 2.0f, 3.0f, 4.0f};
5454
cl_float destMemory[] = {0.0f, 0.0f, 0.0f, 0.0f};
@@ -118,7 +118,7 @@ HWTEST_F(AUBReadBuffer, reserveCanonicalGpuAddress) {
118118
return;
119119
}
120120

121-
MockContext context;
121+
MockContext context(this->pDevice);
122122

123123
cl_float srcMemory[] = {1.0f, 2.0f, 3.0f, 4.0f};
124124
cl_float dstMemory[] = {0.0f, 0.0f, 0.0f, 0.0f};

unit_tests/aub_tests/command_queue/enqueue_read_buffer_rect_aub_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -49,7 +49,7 @@ typedef ReadBufferRectHw AUBReadBufferRect;
4949
static const size_t width = 10;
5050

5151
HWTEST_P(AUBReadBufferRect, simple3D) {
52-
MockContext context;
52+
MockContext context(this->pDevice);
5353
size_t rowPitch = width;
5454
size_t slicePitch = rowPitch * rowPitch;
5555

unit_tests/aub_tests/command_queue/enqueue_write_buffer_rect_aub_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -50,7 +50,7 @@ typedef WriteBufferRectHw AUBWriteBufferRect;
5050
static const size_t width = 10;
5151

5252
HWTEST_P(AUBWriteBufferRect, simple3D) {
53-
MockContext context;
53+
MockContext context(this->pDevice);
5454
size_t rowPitch = width;
5555
size_t slicePitch = rowPitch * rowPitch;
5656

unit_tests/command_queue/command_queue_tests.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,18 @@ TEST_P(CommandQueueIndirectHeapTest, IndirectHeapIsProvidedByDevice) {
419419
EXPECT_NE(nullptr, &indirectHeap);
420420
}
421421

422+
TEST_P(CommandQueueIndirectHeapTest, givenIndirectObjectHeapWhenItIsQueriedForInternalAllocationThenTrueIsReturned) {
423+
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
424+
CommandQueue cmdQ(context.get(), pDevice, props);
425+
426+
auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 8192);
427+
if (this->GetParam() == IndirectHeap::INDIRECT_OBJECT) {
428+
EXPECT_TRUE(indirectHeap.getGraphicsAllocation()->is32BitAllocation);
429+
} else {
430+
EXPECT_FALSE(indirectHeap.getGraphicsAllocation()->is32BitAllocation);
431+
}
432+
}
433+
422434
TEST_P(CommandQueueIndirectHeapTest, IndirectHeapContainsAtLeast64KB) {
423435
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
424436
CommandQueue cmdQ(context.get(), pDevice, props);
@@ -482,7 +494,14 @@ TEST_P(CommandQueueIndirectHeapTest, MemoryManagerWithReusableAllocationsWhenAsk
482494
auto memoryManager = pDevice->getMemoryManager();
483495
auto allocationSize = defaultHeapSize * 2;
484496

485-
auto allocation = memoryManager->allocateGraphicsMemory(allocationSize);
497+
GraphicsAllocation *allocation = nullptr;
498+
499+
if (this->GetParam() == IndirectHeap::INDIRECT_OBJECT) {
500+
allocation = memoryManager->createInternalGraphicsAllocation(nullptr, allocationSize);
501+
} else {
502+
allocation = memoryManager->allocateGraphicsMemory(allocationSize);
503+
}
504+
486505
memoryManager->storeAllocation(std::unique_ptr<GraphicsAllocation>(allocation), REUSABLE_ALLOCATION);
487506

488507
EXPECT_FALSE(memoryManager->allocationsForReuse.peekIsEmpty());

unit_tests/command_queue/enqueue_copy_buffer_tests.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -281,7 +281,7 @@ HWTEST_F(EnqueueCopyBufferTest, argumentZeroShouldMatchSourceAddress) {
281281
ASSERT_NE(nullptr, kernel);
282282

283283
// Determine where the argument is
284-
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 0u);
284+
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0));
285285

286286
EXPECT_EQ((void *)((uintptr_t)srcBuffer->getGraphicsAllocation()->getGpuAddress()), *pArgument);
287287
}
@@ -308,7 +308,7 @@ HWTEST_F(EnqueueCopyBufferTest, argumentOneShouldMatchDestAddress) {
308308
ASSERT_NE(nullptr, kernel);
309309

310310
// Determine where the argument is
311-
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 1);
311+
auto pArgument = (void **)getStatelessArgumentPointer<FamilyType>(*kernel, 1u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0));
312312

313313
EXPECT_EQ((void *)((uintptr_t)dstBuffer->getGraphicsAllocation()->getGpuAddress()), *pArgument);
314314
}

0 commit comments

Comments
 (0)