Skip to content

Commit 7319023

Browse files
Add capability to use malloc for Heap32 base.
- shift page tables to lower bits Change-Id: I54dcba72255215cf5be75ba425fc27727b0bfd98
1 parent 08a55f0 commit 7319023

File tree

6 files changed

+47
-12
lines changed

6 files changed

+47
-12
lines changed

runtime/aub_mem_dump/aub_mem_dump.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,18 @@ const uint64_t PageTableTraits<48>::physicalMemory = 0; // 1ull <<addressingBits
3737

3838
const uint64_t PageTableTraits<48>::numPTEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS);
3939
const uint64_t PageTableTraits<48>::sizePT = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS) * sizeof(uint64_t);
40-
const uint64_t PageTableTraits<48>::ptBaseAddress = BIT(34);
40+
const uint64_t PageTableTraits<48>::ptBaseAddress = BIT(32);
4141

4242
const uint64_t PageTableTraits<48>::numPDEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS);
4343
const uint64_t PageTableTraits<48>::sizePD = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS) * sizeof(uint64_t);
44-
const uint64_t PageTableTraits<48>::pdBaseAddress = BIT(33);
44+
const uint64_t PageTableTraits<48>::pdBaseAddress = BIT(31);
4545

4646
const uint64_t PageTableTraits<48>::numPDPEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS - PageTableTraits<48>::NUM_PDE_BITS);
4747
const uint64_t PageTableTraits<48>::sizePDP = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS - PageTableTraits<48>::NUM_PDE_BITS) * sizeof(uint64_t);
48-
const uint64_t PageTableTraits<48>::pdpBaseAddress = BIT(32);
48+
const uint64_t PageTableTraits<48>::pdpBaseAddress = BIT(30);
4949
const uint64_t PageTableTraits<48>::numPML4Entries = BIT(NUM_PML4_BITS);
5050
const uint64_t PageTableTraits<48>::sizePML4 = BIT(NUM_PML4_BITS) * sizeof(uint64_t);
51-
const uint64_t PageTableTraits<48>::pml4BaseAddress = BIT(31);
51+
const uint64_t PageTableTraits<48>::pml4BaseAddress = BIT(29);
5252
// clang-format on
5353

5454
void LrcaHelper::setRingTail(void *pLRCIn, uint32_t ringTail) const {

runtime/memory_manager/os_agnostic_memory_manager.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
namespace OCLRT {
2020

2121
OsAgnosticMemoryManager::~OsAgnosticMemoryManager() {
22+
if (DebugManager.flags.UseMallocToObtainHeap32Base.get()) {
23+
alignedFreeWrapper(reinterpret_cast<void *>(allocator32Bit->getBase()));
24+
}
2225
applyCommonCleanup();
2326
}
2427

@@ -92,8 +95,15 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemory(size_t
9295
void *ptrAlloc = nullptr;
9396
auto gpuAddress = allocator32Bit->allocate(allocationSize);
9497

95-
if (size < 0xfffff000) {
96-
ptrAlloc = alignedMallocWrapper(allocationSize, MemoryConstants::allocationAlignment);
98+
auto freeCpuPointer = true;
99+
100+
if (DebugManager.flags.UseMallocToObtainHeap32Base.get()) {
101+
ptrAlloc = reinterpret_cast<void *>(gpuAddress);
102+
freeCpuPointer = false;
103+
} else {
104+
if (size < 0xfffff000) {
105+
ptrAlloc = alignedMallocWrapper(allocationSize, MemoryConstants::allocationAlignment);
106+
}
97107
}
98108

99109
MemoryAllocation *memoryAllocation = nullptr;
@@ -102,7 +112,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemory(size_t
102112
memoryAllocation->is32BitAllocation = true;
103113
memoryAllocation->gpuBaseAddress = GmmHelper::canonize(allocator32Bit->getBase());
104114
memoryAllocation->sizeToFree = allocationSize;
105-
memoryAllocation->cpuPtrAllocated = true;
115+
memoryAllocation->cpuPtrAllocated = freeCpuPointer;
106116
}
107117
counter++;
108118
return memoryAllocation;
@@ -242,6 +252,13 @@ Allocator32bit *OsAgnosticMemoryManager::create32BitAllocator(bool aubUsage) {
242252
if (is32bit) {
243253
heap32Base = 0x0;
244254
}
255+
256+
if (DebugManager.flags.UseMallocToObtainHeap32Base.get()) {
257+
size_t allocationSize = 40 * 1024 * 1024u;
258+
allocatorSize = static_cast<uint64_t>(allocationSize);
259+
heap32Base = castToUint64(alignedMallocWrapper(allocationSize, MemoryConstants::allocationAlignment));
260+
}
261+
245262
return new Allocator32bit(heap32Base, allocatorSize);
246263
}
247264
} // namespace OCLRT

runtime/os_interface/DebugVariables_base.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, SetCommandStreamReceiver, 0, "Set command stream
2424
DECLARE_DEBUG_VARIABLE(int32_t, TbxPort, 4321, "TCP-IP port of TBX server")
2525
DECLARE_DEBUG_VARIABLE(bool, FlattenBatchBufferForAUBDump, false, "Dump multi-level batch buffers to AUB as single, flat batch buffer")
2626
DECLARE_DEBUG_VARIABLE(bool, AddPatchInfoCommentsForAUBDump, false, "Dump comments containing allocations and patching information")
27+
DECLARE_DEBUG_VARIABLE(bool, UseMallocToObtainHeap32Base, false, "Instead of using dedicated ranges, use pointer from malloc as heap base.")
2728

2829
/*DEBUG FLAGS*/
2930
DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints execution model related debug information")

unit_tests/aub_tests/command_stream/aub_mem_dump_tests.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ std::string getAubFileName(const OCLRT::Device *pDevice, const std::string baseN
2626
}
2727

2828
TEST(PageTableTraits, when48BitTraitsAreUsedThenPageTableAddressesAreCorrect) {
29-
EXPECT_EQ(BIT(34), AubMemDump::PageTableTraits<48>::ptBaseAddress);
30-
EXPECT_EQ(BIT(33), AubMemDump::PageTableTraits<48>::pdBaseAddress);
31-
EXPECT_EQ(BIT(32), AubMemDump::PageTableTraits<48>::pdpBaseAddress);
32-
EXPECT_EQ(BIT(31), AubMemDump::PageTableTraits<48>::pml4BaseAddress);
29+
EXPECT_EQ(BIT(32), AubMemDump::PageTableTraits<48>::ptBaseAddress);
30+
EXPECT_EQ(BIT(31), AubMemDump::PageTableTraits<48>::pdBaseAddress);
31+
EXPECT_EQ(BIT(30), AubMemDump::PageTableTraits<48>::pdpBaseAddress);
32+
EXPECT_EQ(BIT(29), AubMemDump::PageTableTraits<48>::pml4BaseAddress);
3333
}
3434

3535
TEST(PageTableTraits, when32BitTraitsAreUsedThenPageTableAddressesAreCorrect) {

unit_tests/memory_manager/memory_manager_tests.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1878,3 +1878,19 @@ TEST(ResidencyDataTest, givenResidencyDataWhenUpdateCompletionDataIsCalledThenIt
18781878
EXPECT_EQ(lastFenceValue3, residency.lastFenceValues[1]);
18791879
EXPECT_EQ(lastFenceValue3, residency.getFenceValueForContextId(osContext2.getContextId()));
18801880
}
1881+
1882+
TEST(Heap32AllocationTests, givenDebugModeWhenMallocIsUsedToCreateAllocationWhenAllocationIsCreatedThenItDoesntRequireCpuPointerCleanup) {
1883+
DebugManagerStateRestore restore;
1884+
DebugManager.flags.UseMallocToObtainHeap32Base.set(true);
1885+
ExecutionEnvironment executionEnvironment;
1886+
executionEnvironment.incRefInternal();
1887+
OsAgnosticMemoryManager memoryManager(true, true, executionEnvironment);
1888+
auto internalBase = memoryManager.allocator32Bit->getBase();
1889+
EXPECT_NE(0x40000000000ul, internalBase);
1890+
EXPECT_NE(0x80000000000ul, internalBase);
1891+
EXPECT_NE(0x0ul, internalBase);
1892+
1893+
auto allocation = static_cast<MemoryAllocation *>(memoryManager.allocate32BitGraphicsMemory(4096u, nullptr, AllocationOrigin::EXTERNAL_ALLOCATION));
1894+
EXPECT_FALSE(allocation->cpuPtrAllocated);
1895+
memoryManager.freeGraphicsMemory(allocation);
1896+
}

unit_tests/test_files/igdrcl.config

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,5 @@ ReturnRawGpuTimestamps = 0
8888
DoNotRegisterTrimCallback = false
8989
AddClGlSharing = 0
9090
EnablePassInlineData = false
91-
LimitAmountOfReturnedDevices = 0
91+
LimitAmountOfReturnedDevices = 0
92+
UseMallocToObtainHeap32Base = false

0 commit comments

Comments
 (0)