From 06ecbc964c91bbde283a86040d28baa80309657a Mon Sep 17 00:00:00 2001 From: Gabrieli2806 <[tu-email@ejemplo.com]> Date: Sat, 11 Apr 2026 01:36:04 -0500 Subject: [PATCH 1/8] feat(dlss): add Ultra Performance mode support --- src/core/render/modules/world/dlss/dlss_module.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/render/modules/world/dlss/dlss_module.cpp b/src/core/render/modules/world/dlss/dlss_module.cpp index 3c318b0..eec0bb0 100644 --- a/src/core/render/modules/world/dlss/dlss_module.cpp +++ b/src/core/render/modules/world/dlss/dlss_module.cpp @@ -188,7 +188,9 @@ bool DLSSModule::setOrCreateOutputImages(std::vector &attributeKVs) { for (int i = 0; i < attributeCount; i++) { if (attributeKVs[2 * i] == "render_pipeline.module.dlss.attribute.mode") { - if (attributeKVs[2 * i + 1] == "render_pipeline.module.dlss.attribute.mode.performance") { + if (attributeKVs[2 * i + 1] == "render_pipeline.module.dlss.attribute.mode.ultra_performance") { + mode_ = NVSDK_NGX_PerfQuality_Value_UltraPerformance; + } else if (attributeKVs[2 * i + 1] == "render_pipeline.module.dlss.attribute.mode.performance") { mode_ = NVSDK_NGX_PerfQuality_Value_MaxPerf; } else if (attributeKVs[2 * i + 1] == "render_pipeline.module.dlss.attribute.mode.balanced") { mode_ = NVSDK_NGX_PerfQuality_Value_Balanced; From 38621af1947694455d1ee99a93c8b964dfb82b7f Mon Sep 17 00:00:00 2001 From: Gabrieli2806 <[tu-email@ejemplo.com]> Date: Sat, 11 Apr 2026 02:52:22 -0500 Subject: [PATCH 2/8] feat: add DLSS Frame Generation (DLSS-G) support - Add dlssg_wrapper.hpp/cpp with DlssFG class wrapping NGX Frame Gen - Extend NgxContext with queryFrameGenAvailable() and initFrameGen() - Add FG attribute handling and initialization in DLSSModule - Integrate FG evaluation in render framework (double-present for interp frames) - Create interpolated frame images and blit pipeline --- .../render/modules/world/dlss/dlss_module.cpp | 32 ++ .../render/modules/world/dlss/dlss_module.hpp | 16 + .../modules/world/dlss/dlss_wrapper.cpp | 32 ++ .../modules/world/dlss/dlss_wrapper.hpp | 14 + .../modules/world/dlss/dlssg_wrapper.cpp | 212 +++++++++++++ .../modules/world/dlss/dlssg_wrapper.hpp | 86 ++++++ src/core/render/render_framework.cpp | 278 ++++++++++++++++++ src/core/render/render_framework.hpp | 12 + 8 files changed, 682 insertions(+) create mode 100644 src/core/render/modules/world/dlss/dlssg_wrapper.cpp create mode 100644 src/core/render/modules/world/dlss/dlssg_wrapper.hpp diff --git a/src/core/render/modules/world/dlss/dlss_module.cpp b/src/core/render/modules/world/dlss/dlss_module.cpp index eec0bb0..95140a9 100644 --- a/src/core/render/modules/world/dlss/dlss_module.cpp +++ b/src/core/render/modules/world/dlss/dlss_module.cpp @@ -46,6 +46,11 @@ void DLSSModule::deinitNGXContext() { } } +bool DLSSModule::isFrameGenAvailable() { + if (ngxContext_ == nullptr) return false; + return ngxContext_->queryFrameGenAvailable() == NVSDK_NGX_Result_Success; +} + DLSSModule::DLSSModule() {} void DLSSModule::init(std::shared_ptr framework, std::shared_ptr worldPipeline) { @@ -63,6 +68,7 @@ void DLSSModule::init(std::shared_ptr framework, std::shared_ptr &att } else if (attributeKVs[2 * i + 1] == "render_pipeline.module.dlss.attribute.mode.dlaa") { mode_ = NVSDK_NGX_PerfQuality_Value_DLAA; } + } else if (attributeKVs[2 * i] == "render_pipeline.module.dlss.attribute.frame_generation") { + frameGenEnabled_ = (attributeKVs[2 * i + 1] == "render_pipeline.module.dlss.attribute.frame_generation.on"); } } } @@ -216,6 +224,29 @@ void DLSSModule::build() { dlssRRInitInfo.quality = mode_; ngxContext_->initDlssRR(dlssRRInitInfo, framework->mainCommandPool(), dlss_); + // Initialize Frame Generation if enabled and available + if (frameGenEnabled_ && ngxContext_->queryFrameGenAvailable() == NVSDK_NGX_Result_Success) { + dlssFG_ = DlssFG::create(); + NgxContext::DlssFGInitInfo fgInitInfo{}; + fgInitInfo.width = outputWidth_; + fgInitInfo.height = outputHeight_; + fgInitInfo.backbufferFormat = framework->swapchain()->vkSurfaceFormat().format; + if (ngxContext_->initFrameGen(fgInitInfo, framework->mainCommandPool(), dlssFG_) != NVSDK_NGX_Result_Success) { + std::cerr << "[DLSS] Frame Generation initialization failed" << std::endl; + dlssFG_ = nullptr; + } else { + // Create interpolated frame images + for (uint32_t i = 0; i < size; i++) { + interpFrameImages_[i] = vk::DeviceLocalImage::create( + framework->device(), framework->vma(), false, outputWidth_, outputHeight_, 1, + framework->swapchain()->vkSurfaceFormat().format, + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT); + } + std::cout << "[DLSS] Frame Generation initialized successfully" << std::endl; + } + } + contexts_.resize(size); for (int i = 0; i < size; i++) { @@ -233,6 +264,7 @@ void DLSSModule::bindTexture(std::shared_ptr sampler, int index) {} void DLSSModule::preClose() { + if (dlssFG_) { dlssFG_->deinit(); dlssFG_ = nullptr; } dlss_->deinit(); } diff --git a/src/core/render/modules/world/dlss/dlss_module.hpp b/src/core/render/modules/world/dlss/dlss_module.hpp index 2eede54..a6ad0d6 100644 --- a/src/core/render/modules/world/dlss/dlss_module.hpp +++ b/src/core/render/modules/world/dlss/dlss_module.hpp @@ -6,6 +6,7 @@ #include "core/vulkan/all_core_vulkan.hpp" #include "core/render/modules/world/dlss/dlss_wrapper.hpp" +#include "core/render/modules/world/dlss/dlssg_wrapper.hpp" #include "core/render/modules/world/world_module.hpp" class Framework; @@ -25,6 +26,8 @@ class DLSSModule : public WorldModule, public SharedObject { static bool initNGXContext(); static void deinitNGXContext(); + static std::shared_ptr ngxContext() { return ngxContext_; } + static bool isFrameGenAvailable(); DLSSModule(); @@ -48,6 +51,14 @@ class DLSSModule : public WorldModule, public SharedObject { void preClose() override; + bool isFrameGenEnabled() const { return frameGenEnabled_ && dlssFG_ != nullptr; } + std::shared_ptr dlssFG() { return dlssFG_; } + std::shared_ptr interpFrameImage(uint32_t frameIndex) { return interpFrameImages_[frameIndex]; } + std::shared_ptr motionVectorImage(uint32_t frameIndex) { return motionVectorImages_[frameIndex]; } + std::shared_ptr linearDepthImage(uint32_t frameIndex) { return linearDepthImages_[frameIndex]; } + uint32_t outputWidth() const { return outputWidth_; } + uint32_t outputHeight() const { return outputHeight_; } + private: static std::shared_ptr ngxContext_; @@ -66,6 +77,11 @@ class DLSSModule : public WorldModule, public SharedObject { NgxContext::SupportedSizes supportedSizes_{}; NVSDK_NGX_PerfQuality_Value mode_ = NVSDK_NGX_PerfQuality_Value_Balanced; + // frame generation + bool frameGenEnabled_ = false; + std::shared_ptr dlssFG_; + std::vector> interpFrameImages_; + // output std::vector> processedImages_; std::vector> upscaledFirstHitDepthImages_; diff --git a/src/core/render/modules/world/dlss/dlss_wrapper.cpp b/src/core/render/modules/world/dlss/dlss_wrapper.cpp index d1bf037..4f84438 100644 --- a/src/core/render/modules/world/dlss/dlss_wrapper.cpp +++ b/src/core/render/modules/world/dlss/dlss_wrapper.cpp @@ -38,6 +38,7 @@ */ #include "core/render/modules/world/dlss/dlss_wrapper.hpp" +#include "core/render/modules/world/dlss/dlssg_wrapper.hpp" // #include // #include @@ -48,6 +49,7 @@ #include #include #include +#include #include @@ -214,6 +216,36 @@ NVSDK_NGX_Result NgxContext::initDlssRR(const DlssRRInitInfo &initInfo, return NVSDK_NGX_Result_Success; } +NVSDK_NGX_Result NgxContext::queryFrameGenAvailable() { + assert(ngxParams_); + + int fgAvailable = 0; + NVSDK_NGX_Result res = + NGX_CHECK(ngxParams_->Get(NVSDK_NGX_Parameter_FrameGeneration_Available, &fgAvailable)); + if (NVSDK_NGX_FAILED(res) || !fgAvailable) { + LOGW << "Frame Generation not available on this hardware/platform" << std::endl; + return NVSDK_NGX_Result_FAIL_FeatureNotSupported; + } + + int needsUpdatedDriver = 0; + NVSDK_NGX_Result resDriver = + ngxParams_->Get(NVSDK_NGX_Parameter_FrameGeneration_NeedsUpdatedDriver, &needsUpdatedDriver); + if (NVSDK_NGX_SUCCEED(resDriver) && needsUpdatedDriver) { + LOGW << "Frame Generation requires a newer driver" << std::endl; + return NVSDK_NGX_Result_FAIL_OutOfDate; + } + + LOGI << "Frame Generation is available" << std::endl; + return NVSDK_NGX_Result_Success; +} + +NVSDK_NGX_Result NgxContext::initFrameGen(const DlssFGInitInfo &initInfo, + std::shared_ptr cmdPool, + std::shared_ptr dlssfg) { + NGX_RETURN_ON_FAIL(dlssfg->init(device_, cmdPool, ngxParams_, initInfo.width, initInfo.height, initInfo.backbufferFormat)); + return NVSDK_NGX_Result_Success; +} + NVSDK_NGX_Result NgxContext::getDlssRRRequiredInstanceExtensions(std::vector &extensions) { NVSDK_NGX_FeatureCommonInfo commonInfo = {}; diff --git a/src/core/render/modules/world/dlss/dlss_wrapper.hpp b/src/core/render/modules/world/dlss/dlss_wrapper.hpp index d4db637..bd9db05 100644 --- a/src/core/render/modules/world/dlss/dlss_wrapper.hpp +++ b/src/core/render/modules/world/dlss/dlss_wrapper.hpp @@ -43,6 +43,7 @@ #include "nvsdk_ngx_vk.h" #include "nvsdk_ngx_defs_dlssd.h" +#include "nvsdk_ngx_defs_dlssg.h" #include @@ -71,6 +72,7 @@ NVSDK_NGX_Result checkNgxResult(NVSDK_NGX_Result result, const char *func, int l /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class DlssRR; +class DlssFG; class NgxContext : public SharedObject { public: @@ -120,6 +122,18 @@ class NgxContext : public SharedObject { NVSDK_NGX_Result initDlssRR(const DlssRRInitInfo &initInfo, std::shared_ptr cmdPool, std::shared_ptr dlssrr); + // Check if DLSS Frame Generation is available + NVSDK_NGX_Result queryFrameGenAvailable(); + + struct DlssFGInitInfo { + uint32_t width = 0; + uint32_t height = 0; + VkFormat backbufferFormat = VK_FORMAT_B8G8R8A8_UNORM; + }; + // Initialize a DlssFG instance + NVSDK_NGX_Result + initFrameGen(const DlssFGInitInfo &initInfo, std::shared_ptr cmdPool, std::shared_ptr dlssfg); + // Append 'extensions' with the instance extensions that should be enabled for DLSS_RR static NVSDK_NGX_Result getDlssRRRequiredInstanceExtensions(std::vector &extensions); diff --git a/src/core/render/modules/world/dlss/dlssg_wrapper.cpp b/src/core/render/modules/world/dlss/dlssg_wrapper.cpp new file mode 100644 index 0000000..5d345a8 --- /dev/null +++ b/src/core/render/modules/world/dlss/dlssg_wrapper.cpp @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2026 Radiance + * + * This file is licensed under the GNU General Public License + * as published by the Free Software Foundation; either version 3 of the License, + * or (at your option) any later version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "core/render/modules/world/dlss/dlssg_wrapper.hpp" + +#include +#include + +#include +#include + +#include "core/vulkan/command.hpp" +#include "core/vulkan/device.hpp" +#include "core/vulkan/image.hpp" + +#define LOGE_FG std::cout << "[DLSS-FG Error] " +#define LOGI_FG std::cout << "[DLSS-FG Info] " + +#define NGX_RETURN_ON_FAIL_FG(x) \ + { \ + NVSDK_NGX_Result result = (x); \ + if (NVSDK_NGX_FAILED(result)) { \ + LOGE_FG << "NGX call failed at " << __func__ << ":" << __LINE__ << std::endl; \ + return result; \ + } \ + } + +NVSDK_NGX_Result DlssFG::init(std::shared_ptr device, + std::shared_ptr cmdPool, + NVSDK_NGX_Parameter *ngxParams, + uint32_t width, + uint32_t height, + VkFormat backbufferFormat) { + LOGI_FG << "Initializing DLSS Frame Generation (" << width << "x" << height << ")" << std::endl; + + assert(!m_dlssgHandle && "Cannot call init twice"); + + m_device = device; + m_ngxParams = ngxParams; + m_size = {width, height}; + + m_resources.fill({.Resource = {.ImageViewInfo = {}}}); + + NVSDK_NGX_DLSSG_Create_Params createParams{}; + createParams.Width = width; + createParams.Height = height; + createParams.NativeBackbufferFormat = static_cast(backbufferFormat); + createParams.RenderWidth = width; + createParams.RenderHeight = height; + createParams.DynamicResolutionScaling = false; + + const uint32_t creationNodeMask = 0x1; + const uint32_t visibilityNodeMask = 0x1; + + { + std::shared_ptr cmdBuffer = vk::CommandBuffer::create(device, cmdPool); + cmdBuffer->begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + NGX_RETURN_ON_FAIL_FG(NGX_VK_CREATE_DLSSG(cmdBuffer->vkCommandBuffer(), creationNodeMask, visibilityNodeMask, + &m_dlssgHandle, ngxParams, &createParams)); + cmdBuffer->end()->submitMainQueueIndividual(device); + } + + LOGI_FG << "DLSS Frame Generation initialized successfully" << std::endl; + return NVSDK_NGX_Result_Success; +} + +void DlssFG::deinit() { + LOGI_FG << "Deinitializing DLSS Frame Generation" << std::endl; + if (m_dlssgHandle) { NVSDK_NGX_VULKAN_ReleaseFeature(m_dlssgHandle); } + m_dlssgHandle = nullptr; + m_device = nullptr; +} + +DlssFG::~DlssFG() { + assert(!m_dlssgHandle && "Must call deinit"); +} + +void DlssFG::setResource(FGResource resourceId, std::shared_ptr image, bool readWrite) { + assert(m_dlssgHandle); + + NVSDK_NGX_Resource_VK resource = NVSDK_NGX_Create_ImageView_Resource_VK( + image->vkImageView(), image->vkImage(), vk::wholeColorSubresourceRange, image->vkFormat(), + image->width(), image->height(), readWrite); + + m_resources[resourceId] = resource; +} + +void DlssFG::setResource(FGResource resourceId, std::shared_ptr image, bool readWrite) { + assert(m_dlssgHandle); + + NVSDK_NGX_Resource_VK resource = NVSDK_NGX_Create_ImageView_Resource_VK( + image->vkImageView(), image->vkImage(), vk::wholeColorSubresourceRange, image->vkFormat(), + image->width(), image->height(), readWrite); + + m_resources[resourceId] = resource; +} + +void DlssFG::resetResource(FGResource resourceId) { + m_resources[resourceId] = {}; +} + +NVSDK_NGX_Result DlssFG::evaluate(std::shared_ptr cmdBuffer, + const glm::mat4 &viewMatrix, + const glm::mat4 &projMatrix, + const glm::mat4 &prevViewMatrix, + const glm::mat4 &prevProjMatrix, + glm::vec2 jitter, + glm::vec3 cameraPos, + float cameraNear, + float cameraFar, + float cameraFOV, + float cameraAspectRatio, + bool reset) { + assert(m_dlssgHandle); + + auto getResource = [this](FGResource res) -> NVSDK_NGX_Resource_VK * { + return m_resources[res].Resource.ImageViewInfo.ImageView ? &m_resources[res] : nullptr; + }; + + NVSDK_NGX_VK_DLSSG_Eval_Params evalParams{}; + evalParams.pBackbuffer = getResource(RESOURCE_BACKBUFFER); + evalParams.pDepth = getResource(RESOURCE_DEPTH); + evalParams.pMVecs = getResource(RESOURCE_MVECS); + evalParams.pHudless = nullptr; + evalParams.pUI = nullptr; + evalParams.pNoPostProcessingColor = nullptr; + evalParams.pBidirectionalDistortionField = nullptr; + evalParams.pOutputInterpFrame = getResource(RESOURCE_OUTPUT_INTERP); + evalParams.pOutputRealFrame = nullptr; + evalParams.pOutputDisableInterpolation = nullptr; + + // Compute camera matrices for FG + // viewToClip = projection matrix + // clipToView = inverse projection + // clipToPrevClip = prevViewProj * inv(currentViewProj) + glm::mat4 viewToClip = projMatrix; + glm::mat4 clipToView = glm::inverse(projMatrix); + glm::mat4 currentViewProj = projMatrix * viewMatrix; + glm::mat4 prevViewProj = prevProjMatrix * prevViewMatrix; + glm::mat4 clipToPrevClip = prevViewProj * glm::inverse(currentViewProj); + glm::mat4 prevClipToClip = currentViewProj * glm::inverse(prevViewProj); + + NVSDK_NGX_DLSSG_Opt_Eval_Params optParams{}; + optParams.multiFrameCount = 1; + optParams.multiFrameIndex = 1; + + // GLM is column-major, DLSS-G expects row-major with left-multiply. + // Same trick as DLSS-RR: (M^T)^T = M, so supply original matrices directly. + std::memcpy(optParams.cameraViewToClip, glm::value_ptr(viewToClip), sizeof(float) * 16); + std::memcpy(optParams.clipToCameraView, glm::value_ptr(clipToView), sizeof(float) * 16); + std::memset(optParams.clipToLensClip, 0, sizeof(float) * 16); + // Identity for clipToLensClip (no lens distortion) + optParams.clipToLensClip[0][0] = 1.0f; + optParams.clipToLensClip[1][1] = 1.0f; + optParams.clipToLensClip[2][2] = 1.0f; + optParams.clipToLensClip[3][3] = 1.0f; + std::memcpy(optParams.clipToPrevClip, glm::value_ptr(clipToPrevClip), sizeof(float) * 16); + std::memcpy(optParams.prevClipToClip, glm::value_ptr(prevClipToClip), sizeof(float) * 16); + + optParams.jitterOffset[0] = -jitter.x; + optParams.jitterOffset[1] = -jitter.y; + optParams.mvecScale[0] = 1.0f; + optParams.mvecScale[1] = 1.0f; + optParams.cameraPinholeOffset[0] = 0.0f; + optParams.cameraPinholeOffset[1] = 0.0f; + + optParams.cameraPos[0] = cameraPos.x; + optParams.cameraPos[1] = cameraPos.y; + optParams.cameraPos[2] = cameraPos.z; + + // Extract camera vectors from view matrix + // View matrix rows (after transpose for column-major) give right, up, forward + glm::mat4 viewInv = glm::inverse(viewMatrix); + optParams.cameraRight[0] = viewInv[0][0]; + optParams.cameraRight[1] = viewInv[0][1]; + optParams.cameraRight[2] = viewInv[0][2]; + optParams.cameraUp[0] = viewInv[1][0]; + optParams.cameraUp[1] = viewInv[1][1]; + optParams.cameraUp[2] = viewInv[1][2]; + optParams.cameraFwd[0] = -viewInv[2][0]; + optParams.cameraFwd[1] = -viewInv[2][1]; + optParams.cameraFwd[2] = -viewInv[2][2]; + + optParams.cameraNear = cameraNear; + optParams.cameraFar = cameraFar; + optParams.cameraFOV = cameraFOV; + optParams.cameraAspectRatio = cameraAspectRatio; + + optParams.colorBuffersHDR = false; // post-tonemapping backbuffer is LDR/SDR + optParams.depthInverted = false; + optParams.cameraMotionIncluded = true; + optParams.reset = reset; + optParams.automodeOverrideReset = false; + optParams.notRenderingGameFrames = false; + optParams.orthoProjection = false; + optParams.motionVectorsInvalidValue = 0.0f; + optParams.motionVectorsDilated = false; + optParams.menuDetectionEnabled = false; + + NGX_RETURN_ON_FAIL_FG( + NGX_VK_EVALUATE_DLSSG(cmdBuffer->vkCommandBuffer(), m_dlssgHandle, m_ngxParams, &evalParams, &optParams)); + + return NVSDK_NGX_Result_Success; +} diff --git a/src/core/render/modules/world/dlss/dlssg_wrapper.hpp b/src/core/render/modules/world/dlss/dlssg_wrapper.hpp new file mode 100644 index 0000000..82ffa40 --- /dev/null +++ b/src/core/render/modules/world/dlss/dlssg_wrapper.hpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2026 Radiance + * + * This file is licensed under the GNU General Public License + * as published by the Free Software Foundation; either version 3 of the License, + * or (at your option) any later version. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include "common/shared.hpp" +#include "core/all_extern.hpp" + +#include "nvsdk_ngx_vk.h" +#include "nvsdk_ngx_defs_dlssg.h" + +#include +#include + +namespace vk { +class Device; +class CommandPool; +class CommandBuffer; +class DeviceLocalImage; +class SwapchainImage; +class Image; +}; // namespace vk + +class NgxContext; + +class DlssFG : public SharedObject { + public: + DlssFG() = default; + ~DlssFG(); + + enum FGResource { + RESOURCE_BACKBUFFER = 0, + RESOURCE_DEPTH, + RESOURCE_MVECS, + RESOURCE_OUTPUT_INTERP, + + RESOURCE_NUM + }; + + void setResource(FGResource resourceId, std::shared_ptr image, bool readWrite = false); + void setResource(FGResource resourceId, std::shared_ptr image, bool readWrite = false); + void resetResource(FGResource resourceId); + + NVSDK_NGX_Result evaluate(std::shared_ptr cmdBuffer, + const glm::mat4 &viewMatrix, + const glm::mat4 &projMatrix, + const glm::mat4 &prevViewMatrix, + const glm::mat4 &prevProjMatrix, + glm::vec2 jitter, + glm::vec3 cameraPos, + float cameraNear, + float cameraFar, + float cameraFOV, + float cameraAspectRatio, + bool reset = false); + + void deinit(); + + private: + friend class NgxContext; + NVSDK_NGX_Result init(std::shared_ptr device, + std::shared_ptr cmdPool, + NVSDK_NGX_Parameter *ngxParams, + uint32_t width, + uint32_t height, + VkFormat backbufferFormat); + + DlssFG(const DlssFG &) = delete; + DlssFG(const DlssFG &&) = delete; + DlssFG &operator=(const DlssFG &) = delete; + DlssFG &operator=(const DlssFG &&) = delete; + + std::shared_ptr m_device; + NVSDK_NGX_Parameter *m_ngxParams = nullptr; + NVSDK_NGX_Handle *m_dlssgHandle = nullptr; + VkExtent2D m_size; + std::array m_resources; +}; diff --git a/src/core/render/render_framework.cpp b/src/core/render/render_framework.cpp index 19a8d5f..e79e0c2 100644 --- a/src/core/render/render_framework.cpp +++ b/src/core/render/render_framework.cpp @@ -5,6 +5,7 @@ #include "core/render/chunks.hpp" #include "core/render/entities.hpp" #include "core/render/modules/ui_module.hpp" +#include "core/render/modules/world/dlss/dlss_module.hpp" #include "core/render/pipeline.hpp" #include "core/render/renderer.hpp" #include "core/render/textures.hpp" @@ -144,6 +145,142 @@ void FrameworkContext::fuseFinal() { swapchainImage->imageLayout() = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; } +void FrameworkContext::evaluateFrameGeneration() { + auto f = framework.lock(); + if (!f->isRunning()) return; + + // Find the DLSSModule in the world pipeline + auto worldPipeline = f->pipeline_->worldPipeline(); + if (!worldPipeline) return; + + std::shared_ptr dlssModule; + for (auto &mod : worldPipeline->worldModules()) { + dlssModule = std::dynamic_pointer_cast(mod); + if (dlssModule) break; + } + if (!dlssModule || !dlssModule->isFrameGenEnabled()) { + f->frameGenActive_ = false; + return; + } + + auto dlssFG = dlssModule->dlssFG(); + auto interpFrame = dlssModule->interpFrameImage(frameIndex); + auto depthImage = dlssModule->linearDepthImage(frameIndex); + auto mvecsImage = dlssModule->motionVectorImage(frameIndex); + + if (!dlssFG || !interpFrame || !depthImage || !mvecsImage) { + f->frameGenActive_ = false; + return; + } + + auto mainQueueIndex = physicalDevice->mainQueueIndex(); + + // Transition swapchain from PRESENT_SRC to GENERAL for FG read + // Transition interp frame to GENERAL for FG write + fuseCommandBuffer->barriersBufferImage( + {}, {{ + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT, + .oldLayout = swapchainImage->imageLayout(), + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = swapchainImage, + .subresourceRange = vk::wholeColorSubresourceRange, + }, + { + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT, + .oldLayout = interpFrame->imageLayout(), + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = interpFrame, + .subresourceRange = vk::wholeColorSubresourceRange, + }}); + swapchainImage->imageLayout() = VK_IMAGE_LAYOUT_GENERAL; + interpFrame->imageLayout() = VK_IMAGE_LAYOUT_GENERAL; + + // Set FG resources + dlssFG->setResource(DlssFG::RESOURCE_BACKBUFFER, swapchainImage, false); + dlssFG->setResource(DlssFG::RESOURCE_DEPTH, depthImage, false); + dlssFG->setResource(DlssFG::RESOURCE_MVECS, mvecsImage, false); + dlssFG->setResource(DlssFG::RESOURCE_OUTPUT_INTERP, interpFrame, true); + + // Get camera data from WorldUBO + auto worldUBOBuffer = Renderer::instance().buffers()->worldUniformBuffer(); + auto worldUBO = static_cast(worldUBOBuffer->mappedPtr()); + if (worldUBO == nullptr) { + f->frameGenActive_ = false; + return; + } + + glm::mat4 viewMatrix = worldUBO->cameraViewMat; + glm::mat4 projMatrix = worldUBO->cameraProjMat; + glm::vec2 jitter = worldUBO->cameraJitter; + glm::vec3 cameraPos = glm::vec3(worldUBO->cameraPos); + + // Derive near/far/fov/aspect from projection matrix + float cameraNear = projMatrix[3][2] / (projMatrix[2][2] - 1.0f); + float cameraFar = projMatrix[3][2] / (projMatrix[2][2] + 1.0f); + float cameraFOV = 2.0f * std::atan(1.0f / projMatrix[1][1]); + float cameraAspectRatio = projMatrix[1][1] / projMatrix[0][0]; + + // Use stored previous matrices (identity for first frame triggers reset) + bool reset = !f->hasPrevMatrices_; + glm::mat4 prevView = f->hasPrevMatrices_ ? f->prevViewMatrix_ : viewMatrix; + glm::mat4 prevProj = f->hasPrevMatrices_ ? f->prevProjMatrix_ : projMatrix; + + NVSDK_NGX_Result result = dlssFG->evaluate( + fuseCommandBuffer, viewMatrix, projMatrix, prevView, prevProj, + jitter, cameraPos, cameraNear, cameraFar, cameraFOV, cameraAspectRatio, reset); + + // Store current matrices for next frame + f->prevViewMatrix_ = viewMatrix; + f->prevProjMatrix_ = projMatrix; + f->hasPrevMatrices_ = true; + + if (NVSDK_NGX_SUCCEED(result)) { + f->frameGenActive_ = true; + } else { + f->frameGenActive_ = false; + std::cerr << "[DLSS-FG] Frame Generation evaluate failed" << std::endl; + } + + // Transition swapchain back to PRESENT_SRC_KHR + fuseCommandBuffer->barriersBufferImage( + {}, {{ + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT, + .oldLayout = swapchainImage->imageLayout(), + .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = swapchainImage, + .subresourceRange = vk::wholeColorSubresourceRange, + }, + { + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT, + .oldLayout = interpFrame->imageLayout(), + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = interpFrame, + .subresourceRange = vk::wholeColorSubresourceRange, + }}); + swapchainImage->imageLayout() = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + interpFrame->imageLayout() = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; +} + Framework::Framework() {} void Framework::init(GLFWwindow *window) { @@ -174,6 +311,12 @@ void Framework::init(GLFWwindow *window) { for (int i = 0; i < imageCount; i++) { contexts_.push_back(FrameworkContext::create(shared_from_this(), i)); } + // Frame Generation resources + for (int i = 0; i < imageCount; i++) { + interpBlitCommandBuffers_.emplace_back(vk::CommandBuffer::create(device_, mainCommandPool_)); + interpProcessedSemaphores_.push_back(vk::Semaphore::create(device_)); + } + pipeline_ = Pipeline::create(shared_from_this()); } @@ -273,6 +416,9 @@ void Framework::submitCommand() { currentContext_->fuseFinal(); + // Evaluate DLSS Frame Generation (if enabled) after fuseFinal produces the composited backbuffer + currentContext_->evaluateFrameGeneration(); + currentContext_->uploadCommandBuffer->end(); currentContext_->worldCommandBuffer->end(); currentContext_->overlayCommandBuffer->end(); @@ -326,6 +472,128 @@ void Framework::present() { waitDeviceIdle(); exit(EXIT_FAILURE); } + + // Double-present: present interpolated frame if Frame Generation produced one + if (frameGenActive_) { + // Find the DLSS module to get the interpolated frame + std::shared_ptr dlssModule; + auto worldPipeline = pipeline_->worldPipeline(); + if (worldPipeline) { + for (auto &mod : worldPipeline->worldModules()) { + dlssModule = std::dynamic_pointer_cast(mod); + if (dlssModule) break; + } + } + + if (dlssModule && dlssModule->isFrameGenEnabled()) { + auto interpFrame = dlssModule->interpFrameImage(currentContext_->frameIndex); + if (interpFrame) { + // Acquire a new swapchain image for the interpolated frame + std::shared_ptr interpAcquireSem = acquireSemaphore(); + uint32_t interpIndex; + VkResult acquireResult = vkAcquireNextImageKHR( + device_->vkDevice(), swapchain_->vkSwapchain(), UINT64_MAX, + interpAcquireSem->vkSemaphore(), VK_NULL_HANDLE, &interpIndex); + + if (acquireResult == VK_SUCCESS || acquireResult == VK_SUBOPTIMAL_KHR) { + // Wait for the target image's fence + auto targetFence = commandFinishedFences_[interpIndex]; + vkWaitForFences(device_->vkDevice(), 1, &targetFence->vkFence(), true, UINT64_MAX); + + auto interpBlitCmd = interpBlitCommandBuffers_[interpIndex]; + auto interpSwapImage = swapchain_->swapchainImages()[interpIndex]; + auto mainQueueIndex = physicalDevice_->mainQueueIndex(); + + interpBlitCmd->begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + + // Transition swapchain image to TRANSFER_DST + interpBlitCmd->barriersBufferImage( + {}, {{ + .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, + .srcAccessMask = 0, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, + .oldLayout = interpSwapImage->imageLayout(), + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = interpSwapImage, + .subresourceRange = vk::wholeColorSubresourceRange, + }}); + + // Blit interpolated frame → swapchain image + VkImageBlit blit{}; + blit.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit.srcOffsets[0] = {0, 0, 0}; + blit.srcOffsets[1] = {static_cast(interpFrame->width()), + static_cast(interpFrame->height()), 1}; + blit.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit.dstOffsets[0] = {0, 0, 0}; + blit.dstOffsets[1] = {static_cast(interpSwapImage->width()), + static_cast(interpSwapImage->height()), 1}; + vkCmdBlitImage(interpBlitCmd->vkCommandBuffer(), + interpFrame->vkImage(), interpFrame->imageLayout(), + interpSwapImage->vkImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, &blit, VK_FILTER_LINEAR); + + // Transition swapchain image to PRESENT_SRC + interpBlitCmd->barriersBufferImage( + {}, {{ + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = interpSwapImage, + .subresourceRange = vk::wholeColorSubresourceRange, + }}); + interpSwapImage->imageLayout() = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + interpBlitCmd->end(); + + // Submit blit + VkSemaphore waitSem = interpAcquireSem->vkSemaphore(); + VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + VkSemaphore signalSem = interpProcessedSemaphores_[interpIndex]->vkSemaphore(); + + VkSubmitInfo interpSubmit = {}; + interpSubmit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + interpSubmit.waitSemaphoreCount = 1; + interpSubmit.pWaitSemaphores = &waitSem; + interpSubmit.pWaitDstStageMask = &waitStage; + interpSubmit.commandBufferCount = 1; + interpSubmit.pCommandBuffers = &interpBlitCmd->vkCommandBuffer(); + interpSubmit.signalSemaphoreCount = 1; + interpSubmit.pSignalSemaphores = &signalSem; + + vkResetFences(device_->vkDevice(), 1, &targetFence->vkFence()); + vkQueueSubmit(device_->mainVkQueue(), 1, &interpSubmit, targetFence->vkFence()); + + // Present interpolated frame + VkPresentInfoKHR interpPresentInfo = {}; + interpPresentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + interpPresentInfo.waitSemaphoreCount = 1; + interpPresentInfo.pWaitSemaphores = &signalSem; + interpPresentInfo.swapchainCount = 1; + interpPresentInfo.pSwapchains = &swapchain_->vkSwapchain(); + interpPresentInfo.pImageIndices = &interpIndex; + + VkResult interpResult = vkQueuePresentKHR(device_->mainVkQueue(), &interpPresentInfo); + if (interpResult == VK_ERROR_OUT_OF_DATE_KHR || interpResult == VK_SUBOPTIMAL_KHR) { + // Swapchain needs recreation, will be handled next frame + } + + recycleSemaphore(interpAcquireSem); + } else { + recycleSemaphore(interpAcquireSem); + } + } + } + frameGenActive_ = false; + } } void Framework::recreate() { @@ -356,6 +624,10 @@ void Framework::recreate() { fuseCommandBuffers_.clear(); commandFinishedFences_.clear(); commandProcessedSemaphores_.clear(); + interpBlitCommandBuffers_.clear(); + interpProcessedSemaphores_.clear(); + frameGenActive_ = false; + hasPrevMatrices_ = false; swapchain_->reconstruct(); @@ -377,6 +649,12 @@ void Framework::recreate() { for (int i = 0; i < size; i++) { contexts_.push_back(FrameworkContext::create(shared_from_this(), i)); } + // Recreate Frame Generation resources + for (int i = 0; i < size; i++) { + interpBlitCommandBuffers_.emplace_back(vk::CommandBuffer::create(device_, mainCommandPool_)); + interpProcessedSemaphores_.push_back(vk::Semaphore::create(device_)); + } + pipeline_->recreate(shared_from_this()); Renderer::instance().textures()->bindAllTextures(); diff --git a/src/core/render/render_framework.hpp b/src/core/render/render_framework.hpp index 082c0df..c5d8b52 100644 --- a/src/core/render/render_framework.hpp +++ b/src/core/render/render_framework.hpp @@ -4,6 +4,7 @@ #include "common/singleton.hpp" #include "core/all_extern.hpp" #include "core/render/modules/world/dlss/dlss_wrapper.hpp" +#include "core/render/modules/world/dlss/dlssg_wrapper.hpp" #include "core/render/pipeline.hpp" #include "core/vulkan/all_core_vulkan.hpp" @@ -56,6 +57,7 @@ struct FrameworkContext : public SharedObject { ~FrameworkContext(); void fuseFinal(); + void evaluateFrameGeneration(); }; class Framework : public SharedObject { @@ -137,6 +139,16 @@ class Framework : public SharedObject { bool running_ = true; + // Frame Generation state + bool frameGenActive_ = false; + glm::mat4 prevViewMatrix_ = glm::mat4(1.0f); + glm::mat4 prevProjMatrix_ = glm::mat4(1.0f); + bool hasPrevMatrices_ = false; + + // FG double-present resources + std::vector> interpBlitCommandBuffers_; + std::vector> interpProcessedSemaphores_; + std::shared_ptr gc_; }; From aad5e442208d04f8e41b01edd833b5a0367eb9f8 Mon Sep 17 00:00:00 2001 From: Gabrieli2806 <[tu-email@ejemplo.com]> Date: Sat, 11 Apr 2026 16:56:51 -0500 Subject: [PATCH 3/8] feat: DLSS Multi-Frame Generation multiplier support (x2/x3/x4) - dlssg_wrapper: evaluate() now accepts multiFrameCount and multiFrameIndex params - dlss_wrapper: added queryMaxMultiFrameCount() capability query - dlss_module: changed from bool to uint32_t frameGenMultiFrameCount_, 2D interp images - dlss_module: parse off/x2/x3/x4 enum values, clamp to hardware max - render_framework: multi-frame evaluate loop and multi-present in present() --- .../render/modules/world/dlss/dlss_module.cpp | 40 ++++- .../render/modules/world/dlss/dlss_module.hpp | 11 +- .../modules/world/dlss/dlss_wrapper.cpp | 18 ++ .../modules/world/dlss/dlss_wrapper.hpp | 4 + .../modules/world/dlss/dlssg_wrapper.cpp | 6 +- .../modules/world/dlss/dlssg_wrapper.hpp | 2 + src/core/render/render_framework.cpp | 167 +++++++++++------- src/core/render/render_framework.hpp | 2 + 8 files changed, 174 insertions(+), 76 deletions(-) diff --git a/src/core/render/modules/world/dlss/dlss_module.cpp b/src/core/render/modules/world/dlss/dlss_module.cpp index 95140a9..2610e4f 100644 --- a/src/core/render/modules/world/dlss/dlss_module.cpp +++ b/src/core/render/modules/world/dlss/dlss_module.cpp @@ -206,7 +206,16 @@ void DLSSModule::setAttributes(int attributeCount, std::vector &att mode_ = NVSDK_NGX_PerfQuality_Value_DLAA; } } else if (attributeKVs[2 * i] == "render_pipeline.module.dlss.attribute.frame_generation") { - frameGenEnabled_ = (attributeKVs[2 * i + 1] == "render_pipeline.module.dlss.attribute.frame_generation.on"); + const auto &val = attributeKVs[2 * i + 1]; + if (val == "render_pipeline.module.dlss.attribute.frame_generation.off") { + frameGenMultiFrameCount_ = 0; + } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.x2") { + frameGenMultiFrameCount_ = 1; + } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.x3") { + frameGenMultiFrameCount_ = 2; + } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.x4") { + frameGenMultiFrameCount_ = 3; + } } } } @@ -225,7 +234,15 @@ void DLSSModule::build() { ngxContext_->initDlssRR(dlssRRInitInfo, framework->mainCommandPool(), dlss_); // Initialize Frame Generation if enabled and available - if (frameGenEnabled_ && ngxContext_->queryFrameGenAvailable() == NVSDK_NGX_Result_Success) { + if (frameGenMultiFrameCount_ > 0 && ngxContext_->queryFrameGenAvailable() == NVSDK_NGX_Result_Success) { + // Clamp requested count to hardware max + uint32_t maxCount = ngxContext_->queryMaxMultiFrameCount(); + if (frameGenMultiFrameCount_ > maxCount) { + std::cout << "[DLSS] Requested multi-frame count " << frameGenMultiFrameCount_ + << " exceeds hardware max " << maxCount << ", clamping" << std::endl; + frameGenMultiFrameCount_ = maxCount; + } + dlssFG_ = DlssFG::create(); NgxContext::DlssFGInitInfo fgInitInfo{}; fgInitInfo.width = outputWidth_; @@ -234,16 +251,21 @@ void DLSSModule::build() { if (ngxContext_->initFrameGen(fgInitInfo, framework->mainCommandPool(), dlssFG_) != NVSDK_NGX_Result_Success) { std::cerr << "[DLSS] Frame Generation initialization failed" << std::endl; dlssFG_ = nullptr; + frameGenMultiFrameCount_ = 0; } else { - // Create interpolated frame images + // Create interpolated frame images: one per multi-frame index per swapchain image for (uint32_t i = 0; i < size; i++) { - interpFrameImages_[i] = vk::DeviceLocalImage::create( - framework->device(), framework->vma(), false, outputWidth_, outputHeight_, 1, - framework->swapchain()->vkSurfaceFormat().format, - VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT); + interpFrameImages_[i].resize(frameGenMultiFrameCount_); + for (uint32_t j = 0; j < frameGenMultiFrameCount_; j++) { + interpFrameImages_[i][j] = vk::DeviceLocalImage::create( + framework->device(), framework->vma(), false, outputWidth_, outputHeight_, 1, + framework->swapchain()->vkSurfaceFormat().format, + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT); + } } - std::cout << "[DLSS] Frame Generation initialized successfully" << std::endl; + std::cout << "[DLSS] Frame Generation initialized (multi-frame count: " << frameGenMultiFrameCount_ + << ", multiplier: " << (frameGenMultiFrameCount_ + 1) << "x)" << std::endl; } } diff --git a/src/core/render/modules/world/dlss/dlss_module.hpp b/src/core/render/modules/world/dlss/dlss_module.hpp index a6ad0d6..45c95f4 100644 --- a/src/core/render/modules/world/dlss/dlss_module.hpp +++ b/src/core/render/modules/world/dlss/dlss_module.hpp @@ -51,9 +51,10 @@ class DLSSModule : public WorldModule, public SharedObject { void preClose() override; - bool isFrameGenEnabled() const { return frameGenEnabled_ && dlssFG_ != nullptr; } + bool isFrameGenEnabled() const { return frameGenMultiFrameCount_ > 0 && dlssFG_ != nullptr; } + uint32_t frameGenMultiFrameCount() const { return frameGenMultiFrameCount_; } std::shared_ptr dlssFG() { return dlssFG_; } - std::shared_ptr interpFrameImage(uint32_t frameIndex) { return interpFrameImages_[frameIndex]; } + std::shared_ptr interpFrameImage(uint32_t frameIndex, uint32_t interpIndex) { return interpFrameImages_[frameIndex][interpIndex]; } std::shared_ptr motionVectorImage(uint32_t frameIndex) { return motionVectorImages_[frameIndex]; } std::shared_ptr linearDepthImage(uint32_t frameIndex) { return linearDepthImages_[frameIndex]; } uint32_t outputWidth() const { return outputWidth_; } @@ -78,9 +79,11 @@ class DLSSModule : public WorldModule, public SharedObject { NVSDK_NGX_PerfQuality_Value mode_ = NVSDK_NGX_PerfQuality_Value_Balanced; // frame generation - bool frameGenEnabled_ = false; + // 0 = disabled, 1 = 2x, 2 = 3x, 3 = 4x, etc. + uint32_t frameGenMultiFrameCount_ = 0; std::shared_ptr dlssFG_; - std::vector> interpFrameImages_; + // interpFrameImages_[swapchainIndex][interpIndex] — one per interpolated frame + std::vector>> interpFrameImages_; // output std::vector> processedImages_; diff --git a/src/core/render/modules/world/dlss/dlss_wrapper.cpp b/src/core/render/modules/world/dlss/dlss_wrapper.cpp index 4f84438..0202668 100644 --- a/src/core/render/modules/world/dlss/dlss_wrapper.cpp +++ b/src/core/render/modules/world/dlss/dlss_wrapper.cpp @@ -239,6 +239,24 @@ NVSDK_NGX_Result NgxContext::queryFrameGenAvailable() { return NVSDK_NGX_Result_Success; } +uint32_t NgxContext::queryMaxMultiFrameCount() { + assert(ngxParams_); + unsigned int maxCount = 0; + // Use device capability parameters to query MFG support + NVSDK_NGX_Parameter *capParams = nullptr; + NVSDK_NGX_Result res = NVSDK_NGX_VULKAN_GetCapabilityParameters(&capParams); + if (NVSDK_NGX_SUCCEED(res) && capParams) { + capParams->Get(NVSDK_NGX_DLSSG_Parameter_MultiFrameCountMax, &maxCount); + NVSDK_NGX_VULKAN_DestroyParameters(capParams); + } + if (maxCount <= 1) { + // Also try the regular ngxParams_ as fallback + ngxParams_->Get(NVSDK_NGX_DLSSG_Parameter_MultiFrameCountMax, &maxCount); + } + LOGI << "Max multi-frame count: " << maxCount << " (0 or 1 = 2x only)" << std::endl; + return maxCount <= 1 ? 1 : maxCount; +} + NVSDK_NGX_Result NgxContext::initFrameGen(const DlssFGInitInfo &initInfo, std::shared_ptr cmdPool, std::shared_ptr dlssfg) { diff --git a/src/core/render/modules/world/dlss/dlss_wrapper.hpp b/src/core/render/modules/world/dlss/dlss_wrapper.hpp index bd9db05..7fa35e7 100644 --- a/src/core/render/modules/world/dlss/dlss_wrapper.hpp +++ b/src/core/render/modules/world/dlss/dlss_wrapper.hpp @@ -125,6 +125,10 @@ class NgxContext : public SharedObject { // Check if DLSS Frame Generation is available NVSDK_NGX_Result queryFrameGenAvailable(); + // Query maximum supported multi-frame generation count (1 = 2x only, 3 = up to 4x, etc.) + // Returns 1 if MFG is not supported (plain 2x FG only) + uint32_t queryMaxMultiFrameCount(); + struct DlssFGInitInfo { uint32_t width = 0; uint32_t height = 0; diff --git a/src/core/render/modules/world/dlss/dlssg_wrapper.cpp b/src/core/render/modules/world/dlss/dlssg_wrapper.cpp index 5d345a8..5d37cf0 100644 --- a/src/core/render/modules/world/dlss/dlssg_wrapper.cpp +++ b/src/core/render/modules/world/dlss/dlssg_wrapper.cpp @@ -118,6 +118,8 @@ NVSDK_NGX_Result DlssFG::evaluate(std::shared_ptr cmdBuffer, float cameraFar, float cameraFOV, float cameraAspectRatio, + uint32_t multiFrameCount, + uint32_t multiFrameIndex, bool reset) { assert(m_dlssgHandle); @@ -149,8 +151,8 @@ NVSDK_NGX_Result DlssFG::evaluate(std::shared_ptr cmdBuffer, glm::mat4 prevClipToClip = currentViewProj * glm::inverse(prevViewProj); NVSDK_NGX_DLSSG_Opt_Eval_Params optParams{}; - optParams.multiFrameCount = 1; - optParams.multiFrameIndex = 1; + optParams.multiFrameCount = multiFrameCount; + optParams.multiFrameIndex = multiFrameIndex; // GLM is column-major, DLSS-G expects row-major with left-multiply. // Same trick as DLSS-RR: (M^T)^T = M, so supply original matrices directly. diff --git a/src/core/render/modules/world/dlss/dlssg_wrapper.hpp b/src/core/render/modules/world/dlss/dlssg_wrapper.hpp index 82ffa40..8855ab9 100644 --- a/src/core/render/modules/world/dlss/dlssg_wrapper.hpp +++ b/src/core/render/modules/world/dlss/dlssg_wrapper.hpp @@ -60,6 +60,8 @@ class DlssFG : public SharedObject { float cameraFar, float cameraFOV, float cameraAspectRatio, + uint32_t multiFrameCount = 1, + uint32_t multiFrameIndex = 1, bool reset = false); void deinit(); diff --git a/src/core/render/render_framework.cpp b/src/core/render/render_framework.cpp index e79e0c2..8ddec8e 100644 --- a/src/core/render/render_framework.cpp +++ b/src/core/render/render_framework.cpp @@ -160,62 +160,29 @@ void FrameworkContext::evaluateFrameGeneration() { } if (!dlssModule || !dlssModule->isFrameGenEnabled()) { f->frameGenActive_ = false; + f->frameGenInterpCount_ = 0; return; } auto dlssFG = dlssModule->dlssFG(); - auto interpFrame = dlssModule->interpFrameImage(frameIndex); auto depthImage = dlssModule->linearDepthImage(frameIndex); auto mvecsImage = dlssModule->motionVectorImage(frameIndex); + uint32_t multiFrameCount = dlssModule->frameGenMultiFrameCount(); - if (!dlssFG || !interpFrame || !depthImage || !mvecsImage) { + if (!dlssFG || !depthImage || !mvecsImage || multiFrameCount == 0) { f->frameGenActive_ = false; + f->frameGenInterpCount_ = 0; return; } auto mainQueueIndex = physicalDevice->mainQueueIndex(); - // Transition swapchain from PRESENT_SRC to GENERAL for FG read - // Transition interp frame to GENERAL for FG write - fuseCommandBuffer->barriersBufferImage( - {}, {{ - .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT, - .oldLayout = swapchainImage->imageLayout(), - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = mainQueueIndex, - .dstQueueFamilyIndex = mainQueueIndex, - .image = swapchainImage, - .subresourceRange = vk::wholeColorSubresourceRange, - }, - { - .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .srcAccessMask = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT, - .oldLayout = interpFrame->imageLayout(), - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = mainQueueIndex, - .dstQueueFamilyIndex = mainQueueIndex, - .image = interpFrame, - .subresourceRange = vk::wholeColorSubresourceRange, - }}); - swapchainImage->imageLayout() = VK_IMAGE_LAYOUT_GENERAL; - interpFrame->imageLayout() = VK_IMAGE_LAYOUT_GENERAL; - - // Set FG resources - dlssFG->setResource(DlssFG::RESOURCE_BACKBUFFER, swapchainImage, false); - dlssFG->setResource(DlssFG::RESOURCE_DEPTH, depthImage, false); - dlssFG->setResource(DlssFG::RESOURCE_MVECS, mvecsImage, false); - dlssFG->setResource(DlssFG::RESOURCE_OUTPUT_INTERP, interpFrame, true); - // Get camera data from WorldUBO auto worldUBOBuffer = Renderer::instance().buffers()->worldUniformBuffer(); auto worldUBO = static_cast(worldUBOBuffer->mappedPtr()); if (worldUBO == nullptr) { f->frameGenActive_ = false; + f->frameGenInterpCount_ = 0; return; } @@ -235,20 +202,106 @@ void FrameworkContext::evaluateFrameGeneration() { glm::mat4 prevView = f->hasPrevMatrices_ ? f->prevViewMatrix_ : viewMatrix; glm::mat4 prevProj = f->hasPrevMatrices_ ? f->prevProjMatrix_ : projMatrix; - NVSDK_NGX_Result result = dlssFG->evaluate( - fuseCommandBuffer, viewMatrix, projMatrix, prevView, prevProj, - jitter, cameraPos, cameraNear, cameraFar, cameraFOV, cameraAspectRatio, reset); + // Transition swapchain from PRESENT_SRC to GENERAL for FG read + fuseCommandBuffer->barriersBufferImage( + {}, {{ + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT, + .oldLayout = swapchainImage->imageLayout(), + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = swapchainImage, + .subresourceRange = vk::wholeColorSubresourceRange, + }}); + swapchainImage->imageLayout() = VK_IMAGE_LAYOUT_GENERAL; + + bool allSucceeded = true; + + // Loop over all intermediate frames (multiFrameIndex: 1..multiFrameCount) + for (uint32_t mfi = 1; mfi <= multiFrameCount; mfi++) { + auto interpFrame = dlssModule->interpFrameImage(frameIndex, mfi - 1); + if (!interpFrame) { allSucceeded = false; break; } + + // Transition interp frame to GENERAL for FG write + fuseCommandBuffer->barriersBufferImage( + {}, {{ + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT | VK_ACCESS_2_MEMORY_WRITE_BIT, + .oldLayout = interpFrame->imageLayout(), + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = interpFrame, + .subresourceRange = vk::wholeColorSubresourceRange, + }}); + interpFrame->imageLayout() = VK_IMAGE_LAYOUT_GENERAL; + + // Set FG resources — output changes per iteration + dlssFG->setResource(DlssFG::RESOURCE_BACKBUFFER, swapchainImage, false); + dlssFG->setResource(DlssFG::RESOURCE_DEPTH, depthImage, false); + dlssFG->setResource(DlssFG::RESOURCE_MVECS, mvecsImage, false); + dlssFG->setResource(DlssFG::RESOURCE_OUTPUT_INTERP, interpFrame, true); + + NVSDK_NGX_Result result = dlssFG->evaluate( + fuseCommandBuffer, viewMatrix, projMatrix, prevView, prevProj, + jitter, cameraPos, cameraNear, cameraFar, cameraFOV, cameraAspectRatio, + multiFrameCount, mfi, reset); + + if (NVSDK_NGX_FAILED(result)) { + std::cerr << "[DLSS-FG] Frame Generation evaluate failed for multiFrameIndex " << mfi << std::endl; + allSucceeded = false; + // Transition this interp frame back anyway + fuseCommandBuffer->barriersBufferImage( + {}, {{ + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT, + .oldLayout = interpFrame->imageLayout(), + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = interpFrame, + .subresourceRange = vk::wholeColorSubresourceRange, + }}); + interpFrame->imageLayout() = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + break; + } + + // Transition interp frame to TRANSFER_SRC for later blit + fuseCommandBuffer->barriersBufferImage( + {}, {{ + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT, + .oldLayout = interpFrame->imageLayout(), + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = interpFrame, + .subresourceRange = vk::wholeColorSubresourceRange, + }}); + interpFrame->imageLayout() = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + } // Store current matrices for next frame f->prevViewMatrix_ = viewMatrix; f->prevProjMatrix_ = projMatrix; f->hasPrevMatrices_ = true; + f->backbufferFrameID_++; - if (NVSDK_NGX_SUCCEED(result)) { + if (allSucceeded) { f->frameGenActive_ = true; + f->frameGenInterpCount_ = multiFrameCount; } else { f->frameGenActive_ = false; - std::cerr << "[DLSS-FG] Frame Generation evaluate failed" << std::endl; + f->frameGenInterpCount_ = 0; } // Transition swapchain back to PRESENT_SRC_KHR @@ -264,21 +317,8 @@ void FrameworkContext::evaluateFrameGeneration() { .dstQueueFamilyIndex = mainQueueIndex, .image = swapchainImage, .subresourceRange = vk::wholeColorSubresourceRange, - }, - { - .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT, - .oldLayout = interpFrame->imageLayout(), - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .srcQueueFamilyIndex = mainQueueIndex, - .dstQueueFamilyIndex = mainQueueIndex, - .image = interpFrame, - .subresourceRange = vk::wholeColorSubresourceRange, }}); swapchainImage->imageLayout() = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - interpFrame->imageLayout() = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; } Framework::Framework() {} @@ -473,9 +513,9 @@ void Framework::present() { exit(EXIT_FAILURE); } - // Double-present: present interpolated frame if Frame Generation produced one - if (frameGenActive_) { - // Find the DLSS module to get the interpolated frame + // Multi-present: present interpolated frame(s) if Frame Generation produced them + if (frameGenActive_ && frameGenInterpCount_ > 0) { + // Find the DLSS module to get the interpolated frames std::shared_ptr dlssModule; auto worldPipeline = pipeline_->worldPipeline(); if (worldPipeline) { @@ -486,8 +526,10 @@ void Framework::present() { } if (dlssModule && dlssModule->isFrameGenEnabled()) { - auto interpFrame = dlssModule->interpFrameImage(currentContext_->frameIndex); - if (interpFrame) { + for (uint32_t interpIdx = 0; interpIdx < frameGenInterpCount_; interpIdx++) { + auto interpFrame = dlssModule->interpFrameImage(currentContext_->frameIndex, interpIdx); + if (!interpFrame) break; + // Acquire a new swapchain image for the interpolated frame std::shared_ptr interpAcquireSem = acquireSemaphore(); uint32_t interpIndex; @@ -589,10 +631,12 @@ void Framework::present() { recycleSemaphore(interpAcquireSem); } else { recycleSemaphore(interpAcquireSem); + break; // Can't acquire more images } } } frameGenActive_ = false; + frameGenInterpCount_ = 0; } } @@ -627,6 +671,7 @@ void Framework::recreate() { interpBlitCommandBuffers_.clear(); interpProcessedSemaphores_.clear(); frameGenActive_ = false; + frameGenInterpCount_ = 0; hasPrevMatrices_ = false; swapchain_->reconstruct(); diff --git a/src/core/render/render_framework.hpp b/src/core/render/render_framework.hpp index c5d8b52..1fbf97c 100644 --- a/src/core/render/render_framework.hpp +++ b/src/core/render/render_framework.hpp @@ -141,6 +141,8 @@ class Framework : public SharedObject { // Frame Generation state bool frameGenActive_ = false; + uint32_t frameGenInterpCount_ = 0; // number of interp frames generated this frame + uint64_t backbufferFrameID_ = 0; glm::mat4 prevViewMatrix_ = glm::mat4(1.0f); glm::mat4 prevProjMatrix_ = glm::mat4(1.0f); bool hasPrevMatrices_ = false; From a42b4db552d04f6e948be5ba1342ce9a3bdc8a75 Mon Sep 17 00:00:00 2001 From: Gabrieli2806 <[tu-email@ejemplo.com]> Date: Sun, 12 Apr 2026 00:54:25 -0500 Subject: [PATCH 4/8] refactor: replace async FG presentation thread with pipelined synchronous approach - Remove interpPresentThreadFunc() async thread and all threading infrastructure (mutexes, condition variables, atomics, dedicated command pool) - Implement pipelined synchronous approach: store interp frames from frame N, present them at the START of frame N+1's present() when GPU fence is already signaled - Add PendingInterpPresent struct and presentPendingInterpolatedFrames() method - Fix crash on world entry with Frame Generation enabled (ACCESS_VIOLATION) - Fix watchdog timeout from render thread stuck in present() - Near-zero wait for interp frame fences since a full render frame elapses --- .github/copilot-instructions.md | 66 +++++++ src/core/render/render_framework.cpp | 260 +++++++++++++++------------ src/core/render/render_framework.hpp | 13 +- 3 files changed, 227 insertions(+), 112 deletions(-) create mode 100644 .github/copilot-instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..a3acdce --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,66 @@ +# MCVR — Project Guidelines + +C++23 Vulkan rendering engine for Minecraft, used as a native backend for the [Radiance](../Radiance) Fabric mod. Outputs a shared library (`core.dll`/`libcore.so`) + compiled SPIR-V shaders consumed via JNI. + +## Architecture + +| Component | Path | Purpose | +|-----------|------|---------| +| JNI middleware | `src/core/middleware/` | Java↔C++ bridge (`RendererProxy`, `BufferProxy`, `TextureProxy`, `ChunkProxy`, `EntityProxy`) | +| Vulkan abstraction | `src/core/vulkan/` | Low-level Vulkan wrappers (Instance, Device, Swapchain, Pipeline, AS, SBT, etc.) | +| Render framework | `src/core/render/` | High-level rendering: `Renderer` singleton, `Framework`, `Pipeline`, `Chunks`, `Entities`, `Textures`, `Buffers` | +| Render modules | `src/core/render/modules/` | Pluggable pipeline stages: `ray_tracing`, `dlss`, `fsr_upscaler`, `xess_upscaler`, `nrd`, `svgf`, `temporal_accumulation`, `tone_mapping`, `post_render`, `overlay` | +| Shaders | `src/shader/` | GLSL → SPIR-V (Vulkan 1.4 target via `glslangValidator`), includes ray tracing shaders | +| Common | `src/common/` | Shared utilities: `singleton.hpp`, `shared.hpp` (ref-counted objects), `mapping.hpp` | +| External deps | `extern/` | Git submodules: glfw, volk, vma, glm, vulkan_headers, stb, DLSS, nrd, FidelityFX-SDK, sharc, minizip-ng, json, xess | + +### Key patterns +- **SharedObject**: Ref-counted base for Vulkan resource wrappers +- **Singleton**: `Renderer` is the global entry point +- **Module system**: Each render module registers via `Pipeline::worldModuleConstructors` static map with input/output image formats + +## Build and Test + +### Prerequisites +- CMake 3.15+, Visual Studio 2026 (or 2022), Vulkan SDK (with `glslangValidator` + `shaderc`), JDK 21 (for JNI headers) +- Radiance repo at `../Radiance` with JNI headers generated (`.\gradlew.bat compileJava`) + +### Configure + Build +```powershell +git submodule update --init --recursive +cmake -S . -B build -G "Visual Studio 18 2026" -A x64 ` + -DJAVA_PROJECT_ROOT_DIR=C:/Users/gabri/Documents/CodingRepos/Radiance ` + -DMCVR_ENABLE_NRD=ON -DMCVR_ENABLE_FFX_UPSCALER=ON -DMCVR_ENABLE_XESS=ON -DUSE_AMD=OFF +cmake --build build --config Release +cmake --install build --config Release +``` + +### CMake options +| Option | Default | Effect | +|--------|---------|--------| +| `JAVA_PROJECT_ROOT_DIR` | **required** | Path to Radiance repo (for JNI headers + install destination) | +| `MCVR_ENABLE_NRD` | ON | NVIDIA Ray Tracing Denoiser | +| `MCVR_ENABLE_FFX_UPSCALER` | ON | AMD FidelityFX FSR3 upscaler | +| `MCVR_ENABLE_XESS` | ON | Intel XeSS upscaler | +| `USE_AMD` | ON | AMD-specific optimizations (set OFF for NVIDIA) | + +### Install output +- `core.dll` / `core.lib` → `Radiance/src/main/resources/` +- `shaders/**/*.spv` → `Radiance/src/main/resources/shaders/` +- `internal.zip` (ray tracing GLSL sources) → `Radiance/src/main/resources/shaders/world/ray_tracing/` + +## Conventions + +- C++23 standard, MSVC on Windows +- `VK_NO_PROTOTYPES` defined — Vulkan loaded dynamically via volk +- GLFW function pointers rebound from Java's LWJGL DLL at runtime (no static GLFW linking) +- Shader compilation: GLSL → SPIR-V via `glslangValidator --target-env vulkan1.4` +- NRD shaders compiled via ShaderMake (downloaded during CMake configure) +- `NOMINMAX` and `WIN32_LEAN_AND_MEAN` defined on MSVC + +## Pitfalls + +- First CMake configure downloads DLSS libs, ShaderMake, DXC, and NRD dependencies — requires internet and can be slow +- `JAVA_PROJECT_ROOT_DIR` must use forward slashes even on Windows (CMake normalizes it) +- JNI headers must be regenerated (`gradlew compileJava` in Radiance) whenever native method signatures change +- The `glslangValidator` must be on PATH or found via `VULKAN_SDK` env var diff --git a/src/core/render/render_framework.cpp b/src/core/render/render_framework.cpp index 8ddec8e..d369c97 100644 --- a/src/core/render/render_framework.cpp +++ b/src/core/render/render_framework.cpp @@ -179,6 +179,11 @@ void FrameworkContext::evaluateFrameGeneration() { // Get camera data from WorldUBO auto worldUBOBuffer = Renderer::instance().buffers()->worldUniformBuffer(); + if (!worldUBOBuffer) { + f->frameGenActive_ = false; + f->frameGenInterpCount_ = 0; + return; + } auto worldUBO = static_cast(worldUBOBuffer->mappedPtr()); if (worldUBO == nullptr) { f->frameGenActive_ = false; @@ -351,10 +356,11 @@ void Framework::init(GLFWwindow *window) { for (int i = 0; i < imageCount; i++) { contexts_.push_back(FrameworkContext::create(shared_from_this(), i)); } - // Frame Generation resources + // Frame Generation pipelined-present resources for (int i = 0; i < imageCount; i++) { interpBlitCommandBuffers_.emplace_back(vk::CommandBuffer::create(device_, mainCommandPool_)); interpProcessedSemaphores_.push_back(vk::Semaphore::create(device_)); + interpBlitFences_.push_back(vk::Fence::create(device_, true)); } pipeline_ = Pipeline::create(shared_from_this()); @@ -492,6 +498,10 @@ void Framework::submitCommand() { void Framework::present() { if (!running_) return; + // Present pending interpolated frames from PREVIOUS render frame (pipelined) + // By now the GPU fence for that frame is guaranteed signaled, so waits are near-instant + presentPendingInterpolatedFrames(); + VkPresentInfoKHR presentInfo = {}; presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; presentInfo.waitSemaphoreCount = 1; @@ -513,9 +523,8 @@ void Framework::present() { exit(EXIT_FAILURE); } - // Multi-present: present interpolated frame(s) if Frame Generation produced them + // Store current frame's interpolated frames for pipelined presentation next frame if (frameGenActive_ && frameGenInterpCount_ > 0) { - // Find the DLSS module to get the interpolated frames std::shared_ptr dlssModule; auto worldPipeline = pipeline_->worldPipeline(); if (worldPipeline) { @@ -526,120 +535,143 @@ void Framework::present() { } if (dlssModule && dlssModule->isFrameGenEnabled()) { - for (uint32_t interpIdx = 0; interpIdx < frameGenInterpCount_; interpIdx++) { - auto interpFrame = dlssModule->interpFrameImage(currentContext_->frameIndex, interpIdx); - if (!interpFrame) break; - - // Acquire a new swapchain image for the interpolated frame - std::shared_ptr interpAcquireSem = acquireSemaphore(); - uint32_t interpIndex; - VkResult acquireResult = vkAcquireNextImageKHR( - device_->vkDevice(), swapchain_->vkSwapchain(), UINT64_MAX, - interpAcquireSem->vkSemaphore(), VK_NULL_HANDLE, &interpIndex); - - if (acquireResult == VK_SUCCESS || acquireResult == VK_SUBOPTIMAL_KHR) { - // Wait for the target image's fence - auto targetFence = commandFinishedFences_[interpIndex]; - vkWaitForFences(device_->vkDevice(), 1, &targetFence->vkFence(), true, UINT64_MAX); - - auto interpBlitCmd = interpBlitCommandBuffers_[interpIndex]; - auto interpSwapImage = swapchain_->swapchainImages()[interpIndex]; - auto mainQueueIndex = physicalDevice_->mainQueueIndex(); - - interpBlitCmd->begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - - // Transition swapchain image to TRANSFER_DST - interpBlitCmd->barriersBufferImage( - {}, {{ - .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, - .srcAccessMask = 0, - .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .dstAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, - .oldLayout = interpSwapImage->imageLayout(), - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .srcQueueFamilyIndex = mainQueueIndex, - .dstQueueFamilyIndex = mainQueueIndex, - .image = interpSwapImage, - .subresourceRange = vk::wholeColorSubresourceRange, - }}); - - // Blit interpolated frame → swapchain image - VkImageBlit blit{}; - blit.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - blit.srcOffsets[0] = {0, 0, 0}; - blit.srcOffsets[1] = {static_cast(interpFrame->width()), - static_cast(interpFrame->height()), 1}; - blit.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - blit.dstOffsets[0] = {0, 0, 0}; - blit.dstOffsets[1] = {static_cast(interpSwapImage->width()), - static_cast(interpSwapImage->height()), 1}; - vkCmdBlitImage(interpBlitCmd->vkCommandBuffer(), - interpFrame->vkImage(), interpFrame->imageLayout(), - interpSwapImage->vkImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - 1, &blit, VK_FILTER_LINEAR); - - // Transition swapchain image to PRESENT_SRC - interpBlitCmd->barriersBufferImage( - {}, {{ - .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - .srcQueueFamilyIndex = mainQueueIndex, - .dstQueueFamilyIndex = mainQueueIndex, - .image = interpSwapImage, - .subresourceRange = vk::wholeColorSubresourceRange, - }}); - interpSwapImage->imageLayout() = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - - interpBlitCmd->end(); - - // Submit blit - VkSemaphore waitSem = interpAcquireSem->vkSemaphore(); - VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - VkSemaphore signalSem = interpProcessedSemaphores_[interpIndex]->vkSemaphore(); - - VkSubmitInfo interpSubmit = {}; - interpSubmit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - interpSubmit.waitSemaphoreCount = 1; - interpSubmit.pWaitSemaphores = &waitSem; - interpSubmit.pWaitDstStageMask = &waitStage; - interpSubmit.commandBufferCount = 1; - interpSubmit.pCommandBuffers = &interpBlitCmd->vkCommandBuffer(); - interpSubmit.signalSemaphoreCount = 1; - interpSubmit.pSignalSemaphores = &signalSem; - - vkResetFences(device_->vkDevice(), 1, &targetFence->vkFence()); - vkQueueSubmit(device_->mainVkQueue(), 1, &interpSubmit, targetFence->vkFence()); - - // Present interpolated frame - VkPresentInfoKHR interpPresentInfo = {}; - interpPresentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - interpPresentInfo.waitSemaphoreCount = 1; - interpPresentInfo.pWaitSemaphores = &signalSem; - interpPresentInfo.swapchainCount = 1; - interpPresentInfo.pSwapchains = &swapchain_->vkSwapchain(); - interpPresentInfo.pImageIndices = &interpIndex; - - VkResult interpResult = vkQueuePresentKHR(device_->mainVkQueue(), &interpPresentInfo); - if (interpResult == VK_ERROR_OUT_OF_DATE_KHR || interpResult == VK_SUBOPTIMAL_KHR) { - // Swapchain needs recreation, will be handled next frame - } - - recycleSemaphore(interpAcquireSem); - } else { - recycleSemaphore(interpAcquireSem); - break; // Can't acquire more images - } + pendingInterp_.interpFrames.clear(); + pendingInterp_.sourceFrameIndex = currentContext_->frameIndex; + for (uint32_t i = 0; i < frameGenInterpCount_; i++) { + auto img = dlssModule->interpFrameImage(currentContext_->frameIndex, i); + if (!img) break; + pendingInterp_.interpFrames.push_back(img); } + hasPendingInterp_ = !pendingInterp_.interpFrames.empty(); + } else { + hasPendingInterp_ = false; } + frameGenActive_ = false; frameGenInterpCount_ = 0; } } +void Framework::presentPendingInterpolatedFrames() { + if (!hasPendingInterp_ || pendingInterp_.interpFrames.empty()) return; + hasPendingInterp_ = false; + + // Wait for the source frame's GPU commands (including FG evaluate) to complete + auto sourceFence = commandFinishedFences_[pendingInterp_.sourceFrameIndex]; + vkWaitForFences(device_->vkDevice(), 1, &sourceFence->vkFence(), true, UINT64_MAX); + + for (size_t interpIdx = 0; interpIdx < pendingInterp_.interpFrames.size(); interpIdx++) { + auto interpFrame = pendingInterp_.interpFrames[interpIdx]; + + // Acquire a new swapchain image for the interpolated frame + std::shared_ptr interpAcquireSem = acquireSemaphore(); + uint32_t interpIndex; + VkResult acquireResult = vkAcquireNextImageKHR( + device_->vkDevice(), swapchain_->vkSwapchain(), UINT64_MAX, + interpAcquireSem->vkSemaphore(), VK_NULL_HANDLE, &interpIndex); + + if (acquireResult != VK_SUCCESS && acquireResult != VK_SUBOPTIMAL_KHR) { + recycleSemaphore(interpAcquireSem); + break; + } + + // Wait for the interp blit fence on this swapchain index + auto blitFence = interpBlitFences_[interpIndex]; + vkWaitForFences(device_->vkDevice(), 1, &blitFence->vkFence(), true, UINT64_MAX); + + auto interpBlitCmd = interpBlitCommandBuffers_[interpIndex]; + auto interpSwapImage = swapchain_->swapchainImages()[interpIndex]; + auto mainQueueIndex = physicalDevice_->mainQueueIndex(); + + interpBlitCmd->begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + + // Transition swapchain image to TRANSFER_DST + interpBlitCmd->barriersBufferImage( + {}, {{ + .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, + .srcAccessMask = 0, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, + .oldLayout = interpSwapImage->imageLayout(), + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = interpSwapImage, + .subresourceRange = vk::wholeColorSubresourceRange, + }}); + + // Blit interpolated frame → swapchain image + VkImageBlit blit{}; + blit.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit.srcOffsets[0] = {0, 0, 0}; + blit.srcOffsets[1] = {static_cast(interpFrame->width()), + static_cast(interpFrame->height()), 1}; + blit.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit.dstOffsets[0] = {0, 0, 0}; + blit.dstOffsets[1] = {static_cast(interpSwapImage->width()), + static_cast(interpSwapImage->height()), 1}; + vkCmdBlitImage(interpBlitCmd->vkCommandBuffer(), + interpFrame->vkImage(), interpFrame->imageLayout(), + interpSwapImage->vkImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, &blit, VK_FILTER_LINEAR); + + // Transition swapchain image to PRESENT_SRC + interpBlitCmd->barriersBufferImage( + {}, {{ + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .srcQueueFamilyIndex = mainQueueIndex, + .dstQueueFamilyIndex = mainQueueIndex, + .image = interpSwapImage, + .subresourceRange = vk::wholeColorSubresourceRange, + }}); + interpSwapImage->imageLayout() = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + interpBlitCmd->end(); + + // Submit blit + VkSemaphore waitSem = interpAcquireSem->vkSemaphore(); + VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + VkSemaphore signalSem = interpProcessedSemaphores_[interpIndex]->vkSemaphore(); + + VkSubmitInfo interpSubmit = {}; + interpSubmit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + interpSubmit.waitSemaphoreCount = 1; + interpSubmit.pWaitSemaphores = &waitSem; + interpSubmit.pWaitDstStageMask = &waitStage; + interpSubmit.commandBufferCount = 1; + interpSubmit.pCommandBuffers = &interpBlitCmd->vkCommandBuffer(); + interpSubmit.signalSemaphoreCount = 1; + interpSubmit.pSignalSemaphores = &signalSem; + + vkResetFences(device_->vkDevice(), 1, &blitFence->vkFence()); + vkQueueSubmit(device_->mainVkQueue(), 1, &interpSubmit, blitFence->vkFence()); + + // Present interpolated frame + VkPresentInfoKHR interpPresentInfo = {}; + interpPresentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + interpPresentInfo.waitSemaphoreCount = 1; + interpPresentInfo.pWaitSemaphores = &signalSem; + interpPresentInfo.swapchainCount = 1; + interpPresentInfo.pSwapchains = &swapchain_->vkSwapchain(); + interpPresentInfo.pImageIndices = &interpIndex; + + VkResult interpResult = vkQueuePresentKHR(device_->mainVkQueue(), &interpPresentInfo); + if (interpResult == VK_ERROR_OUT_OF_DATE_KHR || interpResult == VK_SUBOPTIMAL_KHR) { + recycleSemaphore(interpAcquireSem); + break; // Swapchain needs recreation, will be handled in present() + } + + recycleSemaphore(interpAcquireSem); + } + + pendingInterp_.interpFrames.clear(); +} + void Framework::recreate() { if (!running_) return; @@ -649,6 +681,10 @@ void Framework::recreate() { vk::Window::framebufferResized = false; pipeline_->needRecreate = false; + // Discard any pending interpolated frames + hasPendingInterp_ = false; + pendingInterp_.interpFrames.clear(); + waitRenderQueueIdle(); int width = 0, height = 0; @@ -670,6 +706,7 @@ void Framework::recreate() { commandProcessedSemaphores_.clear(); interpBlitCommandBuffers_.clear(); interpProcessedSemaphores_.clear(); + interpBlitFences_.clear(); frameGenActive_ = false; frameGenInterpCount_ = 0; hasPrevMatrices_ = false; @@ -694,10 +731,11 @@ void Framework::recreate() { for (int i = 0; i < size; i++) { contexts_.push_back(FrameworkContext::create(shared_from_this(), i)); } - // Recreate Frame Generation resources + // Recreate Frame Generation pipelined-present resources for (int i = 0; i < size; i++) { interpBlitCommandBuffers_.emplace_back(vk::CommandBuffer::create(device_, mainCommandPool_)); interpProcessedSemaphores_.push_back(vk::Semaphore::create(device_)); + interpBlitFences_.push_back(vk::Fence::create(device_, true)); } pipeline_->recreate(shared_from_this()); diff --git a/src/core/render/render_framework.hpp b/src/core/render/render_framework.hpp index 1fbf97c..72029c1 100644 --- a/src/core/render/render_framework.hpp +++ b/src/core/render/render_framework.hpp @@ -147,9 +147,20 @@ class Framework : public SharedObject { glm::mat4 prevProjMatrix_ = glm::mat4(1.0f); bool hasPrevMatrices_ = false; - // FG double-present resources + // FG pipelined-present resources std::vector> interpBlitCommandBuffers_; std::vector> interpProcessedSemaphores_; + std::vector> interpBlitFences_; + + // Pending interp frames from previous render frame (pipelined presentation) + struct PendingInterpPresent { + std::vector> interpFrames; + uint32_t sourceFrameIndex = 0; // swapchain index that produced these + }; + PendingInterpPresent pendingInterp_; + bool hasPendingInterp_ = false; + + void presentPendingInterpolatedFrames(); std::shared_ptr gc_; }; From 0f7d1f64b7ed7d8dfcb084ad8151f7db85714290 Mon Sep 17 00:00:00 2001 From: Gabrieli2806 <[tu-email@ejemplo.com]> Date: Sun, 12 Apr 2026 01:05:36 -0500 Subject: [PATCH 5/8] feat: add x5, x6, and auto FG multiplier modes in DLSS module - Parse x5 (multiFrameCount=4) and x6 (multiFrameCount=5) attribute values - Auto mode uses UINT32_MAX sentinel, resolved to hardware max in build() - Logs auto mode selection with resolved count --- .../render/modules/world/dlss/dlss_module.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/core/render/modules/world/dlss/dlss_module.cpp b/src/core/render/modules/world/dlss/dlss_module.cpp index 2610e4f..d1b11a4 100644 --- a/src/core/render/modules/world/dlss/dlss_module.cpp +++ b/src/core/render/modules/world/dlss/dlss_module.cpp @@ -215,6 +215,13 @@ void DLSSModule::setAttributes(int attributeCount, std::vector &att frameGenMultiFrameCount_ = 2; } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.x4") { frameGenMultiFrameCount_ = 3; + } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.x5") { + frameGenMultiFrameCount_ = 4; + } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.x6") { + frameGenMultiFrameCount_ = 5; + } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.auto") { + // Sentinel value: will be resolved to hardware max in build() + frameGenMultiFrameCount_ = UINT32_MAX; } } } @@ -235,9 +242,14 @@ void DLSSModule::build() { // Initialize Frame Generation if enabled and available if (frameGenMultiFrameCount_ > 0 && ngxContext_->queryFrameGenAvailable() == NVSDK_NGX_Result_Success) { - // Clamp requested count to hardware max + // Clamp requested count to hardware max (also resolves auto mode) uint32_t maxCount = ngxContext_->queryMaxMultiFrameCount(); - if (frameGenMultiFrameCount_ > maxCount) { + if (frameGenMultiFrameCount_ >= UINT32_MAX) { + // Auto mode: use hardware maximum + std::cout << "[DLSS] Auto Frame Generation: using hardware max " << maxCount + << " (" << (maxCount + 1) << "x)" << std::endl; + frameGenMultiFrameCount_ = maxCount; + } else if (frameGenMultiFrameCount_ > maxCount) { std::cout << "[DLSS] Requested multi-frame count " << frameGenMultiFrameCount_ << " exceeds hardware max " << maxCount << ", clamping" << std::endl; frameGenMultiFrameCount_ = maxCount; From 329c31c7881cef96f8f368692b2210ecb3a38458 Mon Sep 17 00:00:00 2001 From: Gabrieli2806 <[tu-email@ejemplo.com]> Date: Sun, 12 Apr 2026 01:42:32 -0500 Subject: [PATCH 6/8] feat: replace auto with dynamic frame generation mode - Add glfwGetPrimaryMonitor/glfwGetVideoMode GLFW bindings - Query monitor refresh rate at init for dynamic FG target - Dynamic mode: per-frame multiFrameCount = ceil(targetHz / baseFps) - 1 - Allocate interp images to hardware max, use only what's needed - Reset frameGenDynamic_ when switching to non-dynamic modes --- src/core/all_extern.hpp | 8 ++++++ src/core/glfw_bind.cpp | 2 ++ ...ance_client_proxy_vulkan_RendererProxy.cpp | 2 ++ .../render/modules/world/dlss/dlss_module.cpp | 27 ++++++++++++++----- .../render/modules/world/dlss/dlss_module.hpp | 4 +++ src/core/render/render_framework.cpp | 23 ++++++++++++++++ src/core/render/render_framework.hpp | 6 +++++ 7 files changed, 66 insertions(+), 6 deletions(-) diff --git a/src/core/all_extern.hpp b/src/core/all_extern.hpp index 7b79938..ae97a91 100644 --- a/src/core/all_extern.hpp +++ b/src/core/all_extern.hpp @@ -40,6 +40,8 @@ typedef void (*PFN_glfwSetWindowTitle)(GLFWwindow *, const char *); typedef void (*PFN_glfwSetFramebufferSizeCallback)(GLFWwindow *, GLFWframebuffersizefun); typedef void (*PFN_glfwGetFramebufferSize)(GLFWwindow *, int *, int *); typedef void (*PFN_glfwWaitEvents)(void); +typedef GLFWmonitor *(*PFN_glfwGetPrimaryMonitor)(void); +typedef const GLFWvidmode *(*PFN_glfwGetVideoMode)(GLFWmonitor *); extern PFN_glfwInit p_glfwInit; extern PFN_glfwTerminate p_glfwTerminate; @@ -50,6 +52,8 @@ extern PFN_glfwSetWindowTitle p_glfwSetWindowTitle; extern PFN_glfwSetFramebufferSizeCallback p_glfwSetFramebufferSizeCallback; extern PFN_glfwGetFramebufferSize p_glfwGetFramebufferSize; extern PFN_glfwWaitEvents p_glfwWaitEvents; +extern PFN_glfwGetPrimaryMonitor p_glfwGetPrimaryMonitor; +extern PFN_glfwGetVideoMode p_glfwGetVideoMode; # define GLFW_Init p_glfwInit # define GLFW_Terminate p_glfwTerminate @@ -60,6 +64,8 @@ extern PFN_glfwWaitEvents p_glfwWaitEvents; # define GLFW_SetFramebufferSizeCallback p_glfwSetFramebufferSizeCallback # define GLFW_GetFramebufferSize p_glfwGetFramebufferSize # define GLFW_WaitEvents p_glfwWaitEvents +# define GLFW_GetPrimaryMonitor p_glfwGetPrimaryMonitor +# define GLFW_GetVideoMode p_glfwGetVideoMode #else # define GLFW_Init glfwInit # define GLFW_Terminate glfwTerminate @@ -70,6 +76,8 @@ extern PFN_glfwWaitEvents p_glfwWaitEvents; # define GLFW_SetFramebufferSizeCallback glfwSetFramebufferSizeCallback # define GLFW_GetFramebufferSize glfwGetFramebufferSize # define GLFW_WaitEvents glfwWaitEvents +# define GLFW_GetPrimaryMonitor glfwGetPrimaryMonitor +# define GLFW_GetVideoMode glfwGetVideoMode #endif #include diff --git a/src/core/glfw_bind.cpp b/src/core/glfw_bind.cpp index d6c91f1..014749c 100644 --- a/src/core/glfw_bind.cpp +++ b/src/core/glfw_bind.cpp @@ -12,4 +12,6 @@ PFN_glfwSetWindowTitle p_glfwSetWindowTitle = nullptr; PFN_glfwSetFramebufferSizeCallback p_glfwSetFramebufferSizeCallback = nullptr; PFN_glfwGetFramebufferSize p_glfwGetFramebufferSize = nullptr; PFN_glfwWaitEvents p_glfwWaitEvents = nullptr; +PFN_glfwGetPrimaryMonitor p_glfwGetPrimaryMonitor = nullptr; +PFN_glfwGetVideoMode p_glfwGetVideoMode = nullptr; #endif \ No newline at end of file diff --git a/src/core/middleware/com_radiance_client_proxy_vulkan_RendererProxy.cpp b/src/core/middleware/com_radiance_client_proxy_vulkan_RendererProxy.cpp index 1e9d6c0..64bb2e8 100644 --- a/src/core/middleware/com_radiance_client_proxy_vulkan_RendererProxy.cpp +++ b/src/core/middleware/com_radiance_client_proxy_vulkan_RendererProxy.cpp @@ -89,6 +89,8 @@ static void bind_symbols(DYNLIB_HANDLE h) { reinterpret_cast(gp("glfwSetFramebufferSizeCallback")); p_glfwGetFramebufferSize = reinterpret_cast(gp("glfwGetFramebufferSize")); p_glfwWaitEvents = reinterpret_cast(gp("glfwWaitEvents")); + p_glfwGetPrimaryMonitor = reinterpret_cast(gp("glfwGetPrimaryMonitor")); + p_glfwGetVideoMode = reinterpret_cast(gp("glfwGetVideoMode")); } static std::u16string JStringToU16(JNIEnv* env, jstring jstr) { diff --git a/src/core/render/modules/world/dlss/dlss_module.cpp b/src/core/render/modules/world/dlss/dlss_module.cpp index d1b11a4..0b93075 100644 --- a/src/core/render/modules/world/dlss/dlss_module.cpp +++ b/src/core/render/modules/world/dlss/dlss_module.cpp @@ -1,5 +1,6 @@ #include "core/render/modules/world/dlss/dlss_module.hpp" +#include #include "core/render/buffers.hpp" #include "core/render/pipeline.hpp" #include "core/render/render_framework.hpp" @@ -207,6 +208,7 @@ void DLSSModule::setAttributes(int attributeCount, std::vector &att } } else if (attributeKVs[2 * i] == "render_pipeline.module.dlss.attribute.frame_generation") { const auto &val = attributeKVs[2 * i + 1]; + frameGenDynamic_ = false; if (val == "render_pipeline.module.dlss.attribute.frame_generation.off") { frameGenMultiFrameCount_ = 0; } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.x2") { @@ -219,7 +221,8 @@ void DLSSModule::setAttributes(int attributeCount, std::vector &att frameGenMultiFrameCount_ = 4; } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.x6") { frameGenMultiFrameCount_ = 5; - } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.auto") { + } else if (val == "render_pipeline.module.dlss.attribute.frame_generation.dynamic") { + frameGenDynamic_ = true; // Sentinel value: will be resolved to hardware max in build() frameGenMultiFrameCount_ = UINT32_MAX; } @@ -242,13 +245,14 @@ void DLSSModule::build() { // Initialize Frame Generation if enabled and available if (frameGenMultiFrameCount_ > 0 && ngxContext_->queryFrameGenAvailable() == NVSDK_NGX_Result_Success) { - // Clamp requested count to hardware max (also resolves auto mode) + // Clamp requested count to hardware max (also resolves dynamic/sentinel mode) uint32_t maxCount = ngxContext_->queryMaxMultiFrameCount(); - if (frameGenMultiFrameCount_ >= UINT32_MAX) { - // Auto mode: use hardware maximum - std::cout << "[DLSS] Auto Frame Generation: using hardware max " << maxCount - << " (" << (maxCount + 1) << "x)" << std::endl; + frameGenMaxMultiFrameCount_ = maxCount; + if (frameGenDynamic_ || frameGenMultiFrameCount_ >= UINT32_MAX) { + // Dynamic mode: allocate to hardware max, actual count adjusted per-frame frameGenMultiFrameCount_ = maxCount; + std::cout << "[DLSS] Dynamic Frame Generation: hardware max " << maxCount + << " (" << (maxCount + 1) << "x)" << std::endl; } else if (frameGenMultiFrameCount_ > maxCount) { std::cout << "[DLSS] Requested multi-frame count " << frameGenMultiFrameCount_ << " exceeds hardware max " << maxCount << ", clamping" << std::endl; @@ -293,6 +297,17 @@ std::vector> &DLSSModule::contexts() { return contexts_; } +void DLSSModule::updateDynamicFrameCount(double frameTimeSec, int targetFps) { + if (!frameGenDynamic_ || frameGenMaxMultiFrameCount_ == 0) return; + + double baseFps = 1.0 / frameTimeSec; + // Total frames needed to hit target = ceil(targetFps / baseFps) + // Interp frames = total - 1 (the real frame counts as one) + uint32_t totalNeeded = static_cast(std::ceil(static_cast(targetFps) / baseFps)); + uint32_t interpNeeded = (totalNeeded > 1) ? (totalNeeded - 1) : 1; + frameGenMultiFrameCount_ = std::min(interpNeeded, frameGenMaxMultiFrameCount_); +} + void DLSSModule::bindTexture(std::shared_ptr sampler, std::shared_ptr image, int index) {} diff --git a/src/core/render/modules/world/dlss/dlss_module.hpp b/src/core/render/modules/world/dlss/dlss_module.hpp index 45c95f4..9465183 100644 --- a/src/core/render/modules/world/dlss/dlss_module.hpp +++ b/src/core/render/modules/world/dlss/dlss_module.hpp @@ -53,6 +53,8 @@ class DLSSModule : public WorldModule, public SharedObject { bool isFrameGenEnabled() const { return frameGenMultiFrameCount_ > 0 && dlssFG_ != nullptr; } uint32_t frameGenMultiFrameCount() const { return frameGenMultiFrameCount_; } + bool isFrameGenDynamic() const { return frameGenDynamic_; } + void updateDynamicFrameCount(double frameTimeSec, int targetFps); std::shared_ptr dlssFG() { return dlssFG_; } std::shared_ptr interpFrameImage(uint32_t frameIndex, uint32_t interpIndex) { return interpFrameImages_[frameIndex][interpIndex]; } std::shared_ptr motionVectorImage(uint32_t frameIndex) { return motionVectorImages_[frameIndex]; } @@ -81,6 +83,8 @@ class DLSSModule : public WorldModule, public SharedObject { // frame generation // 0 = disabled, 1 = 2x, 2 = 3x, 3 = 4x, etc. uint32_t frameGenMultiFrameCount_ = 0; + uint32_t frameGenMaxMultiFrameCount_ = 0; + bool frameGenDynamic_ = false; std::shared_ptr dlssFG_; // interpFrameImages_[swapchainIndex][interpIndex] — one per interpolated frame std::vector>> interpFrameImages_; diff --git a/src/core/render/render_framework.cpp b/src/core/render/render_framework.cpp index d369c97..341c210 100644 --- a/src/core/render/render_framework.cpp +++ b/src/core/render/render_framework.cpp @@ -167,6 +167,20 @@ void FrameworkContext::evaluateFrameGeneration() { auto dlssFG = dlssModule->dlssFG(); auto depthImage = dlssModule->linearDepthImage(frameIndex); auto mvecsImage = dlssModule->motionVectorImage(frameIndex); + + // Update dynamic frame count based on measured frame time + if (dlssModule->isFrameGenDynamic()) { + auto now = std::chrono::high_resolution_clock::now(); + if (f->hasLastBaseFrameTime_) { + double frameTimeSec = std::chrono::duration(now - f->lastBaseFrameTime_).count(); + if (frameTimeSec > 0.0) { + dlssModule->updateDynamicFrameCount(frameTimeSec, f->monitorRefreshRate_); + } + } + f->lastBaseFrameTime_ = now; + f->hasLastBaseFrameTime_ = true; + } + uint32_t multiFrameCount = dlssModule->frameGenMultiFrameCount(); if (!dlssFG || !depthImage || !mvecsImage || multiFrameCount == 0) { @@ -363,6 +377,15 @@ void Framework::init(GLFWwindow *window) { interpBlitFences_.push_back(vk::Fence::create(device_, true)); } + // Query monitor refresh rate for dynamic FG + GLFWmonitor *monitor = GLFW_GetPrimaryMonitor(); + if (monitor) { + const GLFWvidmode *mode = GLFW_GetVideoMode(monitor); + if (mode) monitorRefreshRate_ = mode->refreshRate; + } + if (monitorRefreshRate_ <= 0) monitorRefreshRate_ = 60; + std::cout << "[Framework] Monitor refresh rate: " << monitorRefreshRate_ << " Hz" << std::endl; + pipeline_ = Pipeline::create(shared_from_this()); } diff --git a/src/core/render/render_framework.hpp b/src/core/render/render_framework.hpp index 72029c1..c75037d 100644 --- a/src/core/render/render_framework.hpp +++ b/src/core/render/render_framework.hpp @@ -10,6 +10,7 @@ #include #include +#include class Framework; class UIModule; @@ -147,6 +148,11 @@ class Framework : public SharedObject { glm::mat4 prevProjMatrix_ = glm::mat4(1.0f); bool hasPrevMatrices_ = false; + // Dynamic FG state + int monitorRefreshRate_ = 0; + std::chrono::high_resolution_clock::time_point lastBaseFrameTime_{}; + bool hasLastBaseFrameTime_ = false; + // FG pipelined-present resources std::vector> interpBlitCommandBuffers_; std::vector> interpProcessedSemaphores_; From ce73ced9d27c1565a6d62abc7096d32fb32ba3d2 Mon Sep 17 00:00:00 2001 From: Gabrieli2806 <[tu-email@ejemplo.com]> Date: Sun, 12 Apr 2026 02:13:03 -0500 Subject: [PATCH 7/8] fix: present interp frames same-frame to fix FG indicator flickering - Replace pipelined presentation (deferred to next frame) with same-frame - Present interpolated frames immediately after real frame in present() - Remove PendingInterpPresent struct and related state - Rename presentPendingInterpolatedFrames -> presentInterpolatedFrames --- src/core/render/render_framework.cpp | 65 ++++++++++------------------ src/core/render/render_framework.hpp | 10 +---- 2 files changed, 23 insertions(+), 52 deletions(-) diff --git a/src/core/render/render_framework.cpp b/src/core/render/render_framework.cpp index 341c210..536462c 100644 --- a/src/core/render/render_framework.cpp +++ b/src/core/render/render_framework.cpp @@ -521,10 +521,6 @@ void Framework::submitCommand() { void Framework::present() { if (!running_) return; - // Present pending interpolated frames from PREVIOUS render frame (pipelined) - // By now the GPU fence for that frame is guaranteed signaled, so waits are near-instant - presentPendingInterpolatedFrames(); - VkPresentInfoKHR presentInfo = {}; presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; presentInfo.waitSemaphoreCount = 1; @@ -546,45 +542,34 @@ void Framework::present() { exit(EXIT_FAILURE); } - // Store current frame's interpolated frames for pipelined presentation next frame + // Present interpolated frames immediately after the real frame (same-frame presentation) + // This keeps the DLSS-G SDK's internal present tracking consistent so the FG indicator stays on if (frameGenActive_ && frameGenInterpCount_ > 0) { - std::shared_ptr dlssModule; - auto worldPipeline = pipeline_->worldPipeline(); - if (worldPipeline) { - for (auto &mod : worldPipeline->worldModules()) { - dlssModule = std::dynamic_pointer_cast(mod); - if (dlssModule) break; - } - } - - if (dlssModule && dlssModule->isFrameGenEnabled()) { - pendingInterp_.interpFrames.clear(); - pendingInterp_.sourceFrameIndex = currentContext_->frameIndex; - for (uint32_t i = 0; i < frameGenInterpCount_; i++) { - auto img = dlssModule->interpFrameImage(currentContext_->frameIndex, i); - if (!img) break; - pendingInterp_.interpFrames.push_back(img); - } - hasPendingInterp_ = !pendingInterp_.interpFrames.empty(); - } else { - hasPendingInterp_ = false; - } - - frameGenActive_ = false; - frameGenInterpCount_ = 0; + presentInterpolatedFrames(); } + + frameGenActive_ = false; + frameGenInterpCount_ = 0; } -void Framework::presentPendingInterpolatedFrames() { - if (!hasPendingInterp_ || pendingInterp_.interpFrames.empty()) return; - hasPendingInterp_ = false; +void Framework::presentInterpolatedFrames() { + // Wait for GPU to finish current frame's commands (includes FG evaluate writing interp images) + auto fence = commandFinishedFences_[currentContextIndex_]; + vkWaitForFences(device_->vkDevice(), 1, &fence->vkFence(), true, UINT64_MAX); - // Wait for the source frame's GPU commands (including FG evaluate) to complete - auto sourceFence = commandFinishedFences_[pendingInterp_.sourceFrameIndex]; - vkWaitForFences(device_->vkDevice(), 1, &sourceFence->vkFence(), true, UINT64_MAX); + std::shared_ptr dlssModule; + auto worldPipeline = pipeline_->worldPipeline(); + if (worldPipeline) { + for (auto &mod : worldPipeline->worldModules()) { + dlssModule = std::dynamic_pointer_cast(mod); + if (dlssModule) break; + } + } + if (!dlssModule || !dlssModule->isFrameGenEnabled()) return; - for (size_t interpIdx = 0; interpIdx < pendingInterp_.interpFrames.size(); interpIdx++) { - auto interpFrame = pendingInterp_.interpFrames[interpIdx]; + for (uint32_t interpIdx = 0; interpIdx < frameGenInterpCount_; interpIdx++) { + auto interpFrame = dlssModule->interpFrameImage(currentContextIndex_, interpIdx); + if (!interpFrame) break; // Acquire a new swapchain image for the interpolated frame std::shared_ptr interpAcquireSem = acquireSemaphore(); @@ -691,8 +676,6 @@ void Framework::presentPendingInterpolatedFrames() { recycleSemaphore(interpAcquireSem); } - - pendingInterp_.interpFrames.clear(); } void Framework::recreate() { @@ -704,10 +687,6 @@ void Framework::recreate() { vk::Window::framebufferResized = false; pipeline_->needRecreate = false; - // Discard any pending interpolated frames - hasPendingInterp_ = false; - pendingInterp_.interpFrames.clear(); - waitRenderQueueIdle(); int width = 0, height = 0; diff --git a/src/core/render/render_framework.hpp b/src/core/render/render_framework.hpp index c75037d..0f61c44 100644 --- a/src/core/render/render_framework.hpp +++ b/src/core/render/render_framework.hpp @@ -158,15 +158,7 @@ class Framework : public SharedObject { std::vector> interpProcessedSemaphores_; std::vector> interpBlitFences_; - // Pending interp frames from previous render frame (pipelined presentation) - struct PendingInterpPresent { - std::vector> interpFrames; - uint32_t sourceFrameIndex = 0; // swapchain index that produced these - }; - PendingInterpPresent pendingInterp_; - bool hasPendingInterp_ = false; - - void presentPendingInterpolatedFrames(); + void presentInterpolatedFrames(); std::shared_ptr gc_; }; From cf494d840d91aa5c148a38682b8f17cbecb07e23 Mon Sep 17 00:00:00 2001 From: Gabrieli2806 <88515074+Gabrieli2806@users.noreply.github.com> Date: Sun, 12 Apr 2026 02:33:15 -0500 Subject: [PATCH 8/8] Delete .github/copilot-instructions.md --- .github/copilot-instructions.md | 66 --------------------------------- 1 file changed, 66 deletions(-) delete mode 100644 .github/copilot-instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md deleted file mode 100644 index a3acdce..0000000 --- a/.github/copilot-instructions.md +++ /dev/null @@ -1,66 +0,0 @@ -# MCVR — Project Guidelines - -C++23 Vulkan rendering engine for Minecraft, used as a native backend for the [Radiance](../Radiance) Fabric mod. Outputs a shared library (`core.dll`/`libcore.so`) + compiled SPIR-V shaders consumed via JNI. - -## Architecture - -| Component | Path | Purpose | -|-----------|------|---------| -| JNI middleware | `src/core/middleware/` | Java↔C++ bridge (`RendererProxy`, `BufferProxy`, `TextureProxy`, `ChunkProxy`, `EntityProxy`) | -| Vulkan abstraction | `src/core/vulkan/` | Low-level Vulkan wrappers (Instance, Device, Swapchain, Pipeline, AS, SBT, etc.) | -| Render framework | `src/core/render/` | High-level rendering: `Renderer` singleton, `Framework`, `Pipeline`, `Chunks`, `Entities`, `Textures`, `Buffers` | -| Render modules | `src/core/render/modules/` | Pluggable pipeline stages: `ray_tracing`, `dlss`, `fsr_upscaler`, `xess_upscaler`, `nrd`, `svgf`, `temporal_accumulation`, `tone_mapping`, `post_render`, `overlay` | -| Shaders | `src/shader/` | GLSL → SPIR-V (Vulkan 1.4 target via `glslangValidator`), includes ray tracing shaders | -| Common | `src/common/` | Shared utilities: `singleton.hpp`, `shared.hpp` (ref-counted objects), `mapping.hpp` | -| External deps | `extern/` | Git submodules: glfw, volk, vma, glm, vulkan_headers, stb, DLSS, nrd, FidelityFX-SDK, sharc, minizip-ng, json, xess | - -### Key patterns -- **SharedObject**: Ref-counted base for Vulkan resource wrappers -- **Singleton**: `Renderer` is the global entry point -- **Module system**: Each render module registers via `Pipeline::worldModuleConstructors` static map with input/output image formats - -## Build and Test - -### Prerequisites -- CMake 3.15+, Visual Studio 2026 (or 2022), Vulkan SDK (with `glslangValidator` + `shaderc`), JDK 21 (for JNI headers) -- Radiance repo at `../Radiance` with JNI headers generated (`.\gradlew.bat compileJava`) - -### Configure + Build -```powershell -git submodule update --init --recursive -cmake -S . -B build -G "Visual Studio 18 2026" -A x64 ` - -DJAVA_PROJECT_ROOT_DIR=C:/Users/gabri/Documents/CodingRepos/Radiance ` - -DMCVR_ENABLE_NRD=ON -DMCVR_ENABLE_FFX_UPSCALER=ON -DMCVR_ENABLE_XESS=ON -DUSE_AMD=OFF -cmake --build build --config Release -cmake --install build --config Release -``` - -### CMake options -| Option | Default | Effect | -|--------|---------|--------| -| `JAVA_PROJECT_ROOT_DIR` | **required** | Path to Radiance repo (for JNI headers + install destination) | -| `MCVR_ENABLE_NRD` | ON | NVIDIA Ray Tracing Denoiser | -| `MCVR_ENABLE_FFX_UPSCALER` | ON | AMD FidelityFX FSR3 upscaler | -| `MCVR_ENABLE_XESS` | ON | Intel XeSS upscaler | -| `USE_AMD` | ON | AMD-specific optimizations (set OFF for NVIDIA) | - -### Install output -- `core.dll` / `core.lib` → `Radiance/src/main/resources/` -- `shaders/**/*.spv` → `Radiance/src/main/resources/shaders/` -- `internal.zip` (ray tracing GLSL sources) → `Radiance/src/main/resources/shaders/world/ray_tracing/` - -## Conventions - -- C++23 standard, MSVC on Windows -- `VK_NO_PROTOTYPES` defined — Vulkan loaded dynamically via volk -- GLFW function pointers rebound from Java's LWJGL DLL at runtime (no static GLFW linking) -- Shader compilation: GLSL → SPIR-V via `glslangValidator --target-env vulkan1.4` -- NRD shaders compiled via ShaderMake (downloaded during CMake configure) -- `NOMINMAX` and `WIN32_LEAN_AND_MEAN` defined on MSVC - -## Pitfalls - -- First CMake configure downloads DLSS libs, ShaderMake, DXC, and NRD dependencies — requires internet and can be slow -- `JAVA_PROJECT_ROOT_DIR` must use forward slashes even on Windows (CMake normalizes it) -- JNI headers must be regenerated (`gradlew compileJava` in Radiance) whenever native method signatures change -- The `glslangValidator` must be on PATH or found via `VULKAN_SDK` env var