diff --git a/Engine/src/delta/core/EngineTypes.h b/Engine/src/delta/core/EngineTypes.h index 426dda2..f0614b3 100644 --- a/Engine/src/delta/core/EngineTypes.h +++ b/Engine/src/delta/core/EngineTypes.h @@ -4,6 +4,7 @@ namespace delta::core { + // TODO: revise this structure and its purpose struct ThreadPageCoordinator { uint8_t* virtualAddressBase; @@ -12,20 +13,38 @@ namespace delta::core size_t pageSize; }; - struct EngineArena + struct ThreadArena { uint8_t* backingMemory; size_t capacity; size_t offset; }; + using task_t = void (*)(void*); + struct TaskQueue // SoA structure, Chase-Lev queue + { + std::atomic top; + std::atomic bottom; + + uint64_t size; + uint64_t mask; + + task_t* tasks; + void** payloads; + + static inline constexpr size_t FIELD_SIZE = sizeof(decltype(tasks)) + sizeof(decltype(payloads)); + }; + struct alignas(64) ThreadExecutionContext { uint32_t threadIx; uint32_t threadId; ThreadPageCoordinator pageCoordinator; - EngineArena transientArena; + TaskQueue taskQueue; + ThreadArena transientArena; + ThreadArena componentPoolArena; + ThreadArena sceneArena; delta::platform::Timer perThreadTimer; }; diff --git a/Engine/src/delta/core/MemoryConfig.cpp b/Engine/src/delta/core/MemoryConfig.cpp index fde284a..7d1982e 100644 --- a/Engine/src/delta/core/MemoryConfig.cpp +++ b/Engine/src/delta/core/MemoryConfig.cpp @@ -1,24 +1,35 @@ #include "MemoryConfig.h" - +#include "EngineTypes.h" #include +#include namespace delta::core { EngineMemoryConfig g_MemoryConfig{}; - std::atomic g_TotalLockedBytes{ 0 }; - void MemoryConfig_Initialize(size_t physicalRamInstalled, uint32_t maxEngineWorkers) + void MemoryConfig_Initialize(size_t physicalRamInstalled, size_t pageSize, uint32_t maxEngineWorkers) { g_MemoryConfig.totalPhysicalRam = physicalRamInstalled; - g_MemoryConfig.globalLockCeiling = (g_MemoryConfig.totalPhysicalRam / 10) * 7; - g_MemoryConfig.threadSoftBaseline = g_MemoryConfig.globalLockCeiling / maxEngineWorkers; + g_MemoryConfig.maxAllowedPhysical = (physicalRamInstalled / 10) * 7; // 70% of total capacity + + // calculate soft baseline (ideal case, no stealing from the global pool) + g_MemoryConfig.threadSoftBaseline = MemoryMap::VIRT_ZONE_TA_BASELINE; + + size_t totalControlTrackBytes = maxEngineWorkers * (sizeof(ThreadExecutionContext) + MemoryMap::VIRT_ZONE_QUEUE_SIZE); + size_t totalPrivateArenaBytes = maxEngineWorkers * MemoryMap::VIRT_ZONE_BASELINE_SUM; + size_t rawLockBudget = totalControlTrackBytes + totalPrivateArenaBytes; + + g_MemoryConfig.activeLockAllocation = (rawLockBudget + (pageSize - 1)) & ~(pageSize - 1); + g_MemoryConfig.globalPoolSize = g_MemoryConfig.maxAllowedPhysical - g_MemoryConfig.activeLockAllocation; + assert(g_MemoryConfig.activeLockAllocation <= g_MemoryConfig.globalPoolSize); - g_TotalLockedBytes.store(0, std::memory_order_relaxed); + bool result = delta::platform::Memory_ElevateLockLimit(g_MemoryConfig.activeLockAllocation); + if (!result) + std::cout << "[DeltaEngine-Warning] Failed to elevate process working set quota\n"; } void MemoryConfig_Shutdown() { g_MemoryConfig = {}; - g_TotalLockedBytes.store(0, std::memory_order_relaxed); } } diff --git a/Engine/src/delta/core/MemoryConfig.h b/Engine/src/delta/core/MemoryConfig.h index 1d34f5d..f03f888 100644 --- a/Engine/src/delta/core/MemoryConfig.h +++ b/Engine/src/delta/core/MemoryConfig.h @@ -2,19 +2,55 @@ namespace delta::core { - // TODO: This is actually crap. In this file, there should be a tight, flat memory map for each thread - // possibly modificable via config macros or maybe slightly dynamic. + // FUTURE TODO: Make it configurable via compile definitions + namespace MemoryMap + { + inline constexpr size_t VIRT_ZONE_SPACE_LENGTH = (1ull << 35); + + // COMPONENTS: + // QUEUE + inline constexpr size_t VIRT_ZONE_QUEUE_OFFSET = 0ull; + inline constexpr size_t VIRT_ZONE_QUEUE_SIZE = (1ull << 16); // 64KB + inline constexpr size_t VIRT_ZONE_QUEUE_BASELINE = UINT64_MAX; // No baseline, we commit it all + + // TRANSIENT ARENA + inline constexpr size_t VIRT_ZONE_TA_OFFSET = VIRT_ZONE_QUEUE_OFFSET + VIRT_ZONE_QUEUE_SIZE; + inline constexpr size_t VIRT_ZONE_TA_SIZE = (1ull << 30); // 1GB + inline constexpr size_t VIRT_ZONE_TA_BASELINE = (1ull << 26); // 64MB + + // COMPONENT POOL ARENA + inline constexpr size_t VIRT_ZONE_CPA_OFFSET = VIRT_ZONE_TA_OFFSET + VIRT_ZONE_TA_SIZE; + inline constexpr size_t VIRT_ZONE_CPA_SIZE = (1ull << 33); // 8GB + inline constexpr size_t VIRT_ZONE_CPA_BASELINE = (1ull << 27); // 128MB + + // SCENE ARENA + inline constexpr size_t VIRT_ZONE_SA_OFFSET = VIRT_ZONE_CPA_OFFSET + VIRT_ZONE_CPA_SIZE; + inline constexpr size_t VIRT_ZONE_SA_SIZE = (1ull << 32); // 4GB + inline constexpr size_t VIRT_ZONE_SA_BASELINE = (1ull << 26); // 64MB + + // IO STREAMING SPACE + inline constexpr size_t VIRT_ZONE_IO_OFFSET = VIRT_ZONE_SA_OFFSET + VIRT_ZONE_SA_SIZE; + inline constexpr size_t VIRT_ZONE_IO_SIZE = (1ull << 30); // 1GB (to be reconsidered) + inline constexpr size_t VIRT_ZONE_IO_BASELINE = UINT64_MAX; // No memory commited, thus no baseline + + // BASELINE SUMMARY + inline constexpr size_t VIRT_ZONE_BASELINE_SUM = + VIRT_ZONE_TA_BASELINE + + VIRT_ZONE_CPA_BASELINE + + VIRT_ZONE_SA_BASELINE; + } struct EngineMemoryConfig { size_t totalPhysicalRam; - size_t globalLockCeiling; + size_t maxAllowedPhysical; size_t threadSoftBaseline; + size_t activeLockAllocation; + size_t globalPoolSize; }; extern EngineMemoryConfig g_MemoryConfig; - extern std::atomic g_TotalLockedBytes; // TODO: Remove, redesign. completely unnecessary - void MemoryConfig_Initialize(size_t physicalRamInstalled, uint32_t maxEngineWorkers); + void MemoryConfig_Initialize(size_t physicalRamInstalled, size_t pageSize, uint32_t maxEngineWorkers); void MemoryConfig_Shutdown(); } diff --git a/Engine/src/delta/core/ThreadContext.cpp b/Engine/src/delta/core/ThreadContext.cpp index c289431..08ac97b 100644 --- a/Engine/src/delta/core/ThreadContext.cpp +++ b/Engine/src/delta/core/ThreadContext.cpp @@ -9,6 +9,45 @@ namespace delta::core static ThreadExecutionContext* g_ThreadContexts = nullptr; thread_local ThreadExecutionContext* tl_CurrentThreadContext = nullptr; + DLT_FORCE_INLINE static void InitializePageCoordinator(ThreadPageCoordinator& pageCoord, size_t pageSize, uint8_t* baseAddress) + { + pageCoord.pageSize = pageSize; + pageCoord.virtualAddressBase = baseAddress; + pageCoord.commitedOffset = 0; + pageCoord.reservedCapacity = MemoryMap::VIRT_ZONE_SPACE_LENGTH; + } + + DLT_FORCE_INLINE static void InitializeQueue(const ThreadPageCoordinator& pageCoord, TaskQueue& queue, size_t offset, size_t memSize) + { + queue.size = memSize / TaskQueue::FIELD_SIZE; + queue.mask = queue.size - 1; + queue.top.store(0, std::memory_order_relaxed); + queue.bottom.store(0, std::memory_order_relaxed); + + // a queue is commited in whole + uint8_t* pTarget = pageCoord.virtualAddressBase + offset; + void* p = delta::platform::Memory_Commit(pTarget, memSize); + assert(p != nullptr); + + uint8_t* tasksArrayPtr = pTarget; + uint8_t* payloadsArrayPtr = pTarget + queue.size; + queue.tasks = reinterpret_cast(tasksArrayPtr); + queue.payloads = reinterpret_cast(payloadsArrayPtr); + } + + DLT_FORCE_INLINE static void InitializeArena(const ThreadPageCoordinator& pageCoord, ThreadArena& arena, size_t offset, size_t baseline) + { + uint8_t* pTarget = pageCoord.virtualAddressBase + offset; + void* res = delta::platform::Memory_Commit(pTarget, baseline); + assert(res != nullptr); + + // TODO: Lock + + arena.backingMemory = pTarget; + arena.capacity = baseline; + arena.offset = 0; + } + void ThreadContext_Initialize(uint32_t workerCount, size_t pageSize) { uint32_t totalThreads = workerCount + 1; // include main thread @@ -41,30 +80,17 @@ namespace delta::core ctx.threadIx = i; ctx.threadId = 0; // to be set later - ctx.pageCoordinator.pageSize = pageSize; - ctx.pageCoordinator.virtualAddressBase = virtualRunwayCursor; - ctx.pageCoordinator.commitedOffset = 0; - ctx.pageCoordinator.reservedCapacity = ADDR_SLICE_PER_THREAD; - - virtualRunwayCursor += ADDR_SLICE_PER_THREAD; - - uint8_t* initialPageTarget = ctx.pageCoordinator.virtualAddressBase + ctx.pageCoordinator.commitedOffset; - void* initialPageMemory = delta::platform::Memory_Commit(initialPageTarget, pageSize); - assert(initialPageMemory != nullptr && "Failed to commit initial arena page!"); - - if (!delta::platform::Memory_Lock(initialPageTarget, pageSize)) - std::cout << "[DeltaEngine-Warning] Failed to lock memory resource: Thread-Local Arena " << i << "\n"; - - ctx.pageCoordinator.commitedOffset += pageSize; - ctx.transientArena.backingMemory = reinterpret_cast(initialPageTarget); - ctx.transientArena.capacity = pageSize; - ctx.transientArena.offset = 0; + InitializePageCoordinator(ctx.pageCoordinator, pageSize, virtualRunwayCursor); + InitializeQueue(ctx.pageCoordinator, ctx.taskQueue, MemoryMap::VIRT_ZONE_QUEUE_OFFSET, MemoryMap::VIRT_ZONE_QUEUE_SIZE); + InitializeArena(ctx.pageCoordinator, ctx.transientArena, MemoryMap::VIRT_ZONE_TA_OFFSET, MemoryMap::VIRT_ZONE_TA_BASELINE); + InitializeArena(ctx.pageCoordinator, ctx.componentPoolArena, MemoryMap::VIRT_ZONE_CPA_OFFSET, MemoryMap::VIRT_ZONE_CPA_BASELINE); + InitializeArena(ctx.pageCoordinator, ctx.sceneArena, MemoryMap::VIRT_ZONE_SA_OFFSET, MemoryMap::VIRT_ZONE_SA_BASELINE); delta::platform::Timer_Initialize(&ctx.perThreadTimer); + virtualRunwayCursor += ADDR_SLICE_PER_THREAD; } tl_CurrentThreadContext = &g_ThreadContexts[0]; - g_TotalLockedBytes.fetch_add(totalThreads * pageSize + alignedContextArraySize, std::memory_order_relaxed); } void ThreadContext_Shutdown() @@ -72,7 +98,7 @@ namespace delta::core delta::platform::Memory_Release(g_ThreadContexts); } - void* ThreadArena_Allocate(EngineArena* arena, size_t size, size_t alignment) + void* ThreadArena_Allocate(ThreadArena* arena, size_t size, size_t alignment) { uintptr_t currentAddress = reinterpret_cast(arena->backingMemory) + arena->offset; uintptr_t alignedAddress = ALIGN(currentAddress, alignment); @@ -101,7 +127,7 @@ namespace delta::core return ptr; } - void ThreadArena_Reset(EngineArena* arena) + void ThreadArena_Reset(ThreadArena* arena) { arena->offset = 0; } diff --git a/Engine/src/delta/core/ThreadContext.h b/Engine/src/delta/core/ThreadContext.h index d637b05..e808a29 100644 --- a/Engine/src/delta/core/ThreadContext.h +++ b/Engine/src/delta/core/ThreadContext.h @@ -9,6 +9,6 @@ namespace delta::core void ThreadContext_Shutdown(); // Engine Arena API - void* ThreadArena_Allocate(EngineArena* arena, size_t size, size_t alignment = 8); - void ThreadArena_Reset(EngineArena* arena); + void* ThreadArena_Allocate(ThreadArena* arena, size_t size, size_t alignment = 8); + void ThreadArena_Reset(ThreadArena* arena); } diff --git a/Engine/src/delta/core/engine.cpp b/Engine/src/delta/core/engine.cpp index 93ca542..b12fbbe 100644 --- a/Engine/src/delta/core/engine.cpp +++ b/Engine/src/delta/core/engine.cpp @@ -28,13 +28,7 @@ void delta::Engine::Initialize(Context& context) const auto* osInfo = delta::platform::getOSInfo(); const auto memStatus = delta::platform::getMemoryStatus(); - delta::core::MemoryConfig_Initialize(memStatus.physicalInstalled, osInfo->maxEngineWorkerCount); - if (!delta::platform::Memory_ElevateLockLimit(delta::core::g_MemoryConfig.globalLockCeiling)) - { - context.isRunning = false; - return; - } - + delta::core::MemoryConfig_Initialize(memStatus.physicalInstalled, osInfo->osPageSize, osInfo->maxEngineWorkerCount); delta::core::ThreadContext_Initialize(osInfo->maxEngineWorkerCount, osInfo->osPageSize); } diff --git a/Engine/src/delta/platform/os_win32.cpp b/Engine/src/delta/platform/os_win32.cpp index 007e195..fd3984e 100644 --- a/Engine/src/delta/platform/os_win32.cpp +++ b/Engine/src/delta/platform/os_win32.cpp @@ -20,6 +20,7 @@ #include "os_internal.h" #include #include +#include #define CHECK_CPUID_FLAG(register, flag) ((register & (1 << flag)) != 0) @@ -71,7 +72,7 @@ namespace delta::platform } } - inline static void fetchCpuidValues() + DLT_FORCE_INLINE static void fetchCpuidValues() { int cpuinfo[4]; __cpuid(cpuinfo, 0); @@ -100,6 +101,35 @@ namespace delta::platform memcpy(g_osInfo.cpuBrandString, BrandStringCall::UNSPECIFIED_VALUE, sizeof(BrandStringCall::UNSPECIFIED_VALUE)); } + DLT_FORCE_INLINE static bool SetProcessPrivileges() + { + HANDLE hToken = NULL; + + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) + return false; + + TOKEN_PRIVILEGES tp; + LUID luid; + if (!LookupPrivilegeValueA(NULL, SE_INC_WORKING_SET_NAME, &luid)) + { + CloseHandle(hToken); + return false; + } + + tp.PrivilegeCount = 1; + tp.Privileges[0].Luid = luid; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + BOOL result = AdjustTokenPrivileges(hToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), NULL, NULL); + DWORD error = GetLastError(); + + CloseHandle(hToken); + if (result == FALSE || error == ERROR_NOT_ALL_ASSIGNED) + return false; + + return true; + } + void Initialize() { memset(&g_osInfo, 0u, sizeof(OSInfo)); @@ -152,6 +182,10 @@ namespace delta::platform g_osInfo.maxEngineWorkerCount = physicalCores - 1; else g_osInfo.maxEngineWorkerCount = 1; + + bool privilegesSet = SetProcessPrivileges(); + if (!privilegesSet) + std::cout << "[DeltaEngine-Warning] Failed to elevate SE_INC_WORKING_SET_NAME privilege. You may want to run the game as an administrator.\n"; } void* Memory_Reserve(size_t reservationSize) @@ -182,7 +216,7 @@ namespace delta::platform return VirtualLock(mem, bytes); } - bool Memory_ElevateLockLimit(size_t maxBytesToLock) + bool Memory_ElevateLockLimit(size_t bytesToLock) { HANDLE hProcess = GetCurrentProcess(); size_t minWorkingSet = 0; @@ -191,8 +225,9 @@ namespace delta::platform if (GetProcessWorkingSetSize(hProcess, &minWorkingSet, &maxWorkingSet)) { static constexpr size_t SAFETY_BUFFER_SIZE = (1ull << 26); - size_t newMax = maxWorkingSet + maxBytesToLock + SAFETY_BUFFER_SIZE; - return SetProcessWorkingSetSize(hProcess, minWorkingSet, newMax); + size_t newMin = minWorkingSet + bytesToLock; + size_t newMax = maxWorkingSet + bytesToLock; + return SetProcessWorkingSetSizeEx(hProcess, newMin, newMax, QUOTA_LIMITS_HARDWS_MIN_DISABLE | QUOTA_LIMITS_HARDWS_MAX_ENABLE) == TRUE; } return false;