diff --git a/.gitmodules b/.gitmodules index a69e4ee4cbf..33a1a4ed1af 100644 --- a/.gitmodules +++ b/.gitmodules @@ -40,7 +40,7 @@ [submodule "third-party/nv-codec-headers"] path = third-party/nv-codec-headers url = https://github.com/FFmpeg/nv-codec-headers.git - branch = sdk/12.0 + branch = master [submodule "third-party/nvapi"] path = third-party/nvapi url = https://github.com/NVIDIA/nvapi.git diff --git a/docs/configuration.md b/docs/configuration.md index 97f08576cd1..82a986c900d 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2347,6 +2347,46 @@ editing the `conf` file in a text editor. Use the examples as reference. +### nvenc_split_encode + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Description + Split the encoding of each video frame over multiple NVENC hardware units. + Significantly reduces encoding latency with a marginal compression efficiency penalty. + This option is ignored if your GPU has a singular NVENC unit. + @note{This option only applies when using NVENC [encoder](#encoderhttpslocalhost47990configencoder) with HEVC or AV1.} + @note{Applies to Windows only.} +
Default@code{} + driver_decides + @endcode
Example@code{} + nvenc_split_encode = driver_decides + @endcode
ChoicesdisabledDisabled
driver_decidesThe NVIDIA driver will automatically enable split frame encoding when the following conditions are met: 2+ NVENC units, resolution is at least 4K, and the preset is P1-P4.
enabledEnabled
+ ### nvenc_latency_over_power diff --git a/packaging/linux/flatpak/modules/ffmpeg.json b/packaging/linux/flatpak/modules/ffmpeg.json index daca27e4d0f..f4f15a341db 100644 --- a/packaging/linux/flatpak/modules/ffmpeg.json +++ b/packaging/linux/flatpak/modules/ffmpeg.json @@ -8,8 +8,8 @@ "sources": [ { "type": "file", - "url": "https://github.com/LizardByte/build-deps/releases/download/v2026.221.143859/Linux-x86_64-ffmpeg.tar.gz", - "sha256": "cebf7a069bf144808896befe8d0d9d2d1e1d9eb1c9ac44e6906b72c6150a216a", + "url": "https://github.com/LizardByte/build-deps/releases/download/v2026.323.141148/Linux-x86_64-ffmpeg.tar.gz", + "sha256": "66319706a94d1607492e6ebc51060918fce51197d589cac313de8c532143a184", "dest-filename": "ffmpeg.tar.gz", "only-arches": [ "x86_64" @@ -23,8 +23,8 @@ }, { "type": "file", - "url": "https://github.com/LizardByte/build-deps/releases/download/v2026.221.143859/Linux-aarch64-ffmpeg.tar.gz", - "sha256": "6ba08d00f70d913f57ff0df8decaca6c3787b798e163a1cb2f086cb86ff7986d", + "url": "https://github.com/LizardByte/build-deps/releases/download/v2026.323.141148/Linux-aarch64-ffmpeg.tar.gz", + "sha256": "c955e6dba2cf62b4b3c954e0da378db47233fa7bef09ab9c86b4656d2c08378c", "dest-filename": "ffmpeg.tar.gz", "only-arches": [ "aarch64" diff --git a/src/config.cpp b/src/config.cpp index 47475a04b4d..f08934c34ff 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -64,6 +64,21 @@ namespace config { return nvenc::nvenc_two_pass::quarter_resolution; } + nvenc::nvenc_split_frame_encoding split_encode_from_view(const std::string_view &preset) { + using enum nvenc::nvenc_split_frame_encoding; + if (preset == "disabled") { + return disabled; + } + if (preset == "driver_decides") { + return driver_decides; + } + if (preset == "enabled") { + return force_enabled; + } + BOOST_LOG(warning) << "config: unknown nvenc_split_encode value: " << preset; + return driver_decides; + } + } // namespace nv namespace amd { @@ -1083,6 +1098,7 @@ namespace config { bool_f(vars, "nvenc_spatial_aq", video.nv.adaptive_quantization); generic_f(vars, "nvenc_twopass", video.nv.two_pass, nv::twopass_from_view); bool_f(vars, "nvenc_h264_cavlc", video.nv.h264_cavlc); + generic_f(vars, "nvenc_split_encode", video.nv.split_frame_encoding, nv::split_encode_from_view); bool_f(vars, "nvenc_realtime_hags", video.nv_realtime_hags); bool_f(vars, "nvenc_opengl_vulkan_on_dxgi", video.nv_opengl_vulkan_on_dxgi); bool_f(vars, "nvenc_latency_over_power", video.nv_sunshine_high_power_mode); diff --git a/src/nvenc/nvenc_base.cpp b/src/nvenc/nvenc_base.cpp index 59c9781a2bf..0d33523a3b2 100644 --- a/src/nvenc/nvenc_base.cpp +++ b/src/nvenc/nvenc_base.cpp @@ -20,7 +20,7 @@ // - NV_ENC_*_VER definitions where the value inside NVENCAPI_STRUCT_VERSION() was increased // - Incompatible struct changes in nvEncodeAPI.h (fields removed, semantics changed, etc.) // - Test both old and new drivers with all supported codecs -#if NVENCAPI_VERSION != MAKE_NVENC_VER(12U, 0U) +#if NVENCAPI_VERSION != MAKE_NVENC_VER(13U, 0U) #error Check and update NVENC code for backwards compatibility! #endif @@ -98,10 +98,6 @@ namespace nvenc { } bool nvenc_base::create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace, NV_ENC_BUFFER_FORMAT buffer_format) { - // Pick the minimum NvEncode API version required to support the specified codec - // to maximize driver compatibility. AV1 was introduced in SDK v12.0. - minimum_api_version = (client_config.videoFormat <= 1) ? MAKE_NVENC_VER(11U, 0U) : MAKE_NVENC_VER(12U, 0U); - if (!nvenc && !init_library()) { return false; } @@ -118,10 +114,10 @@ namespace nvenc { encoder_params.buffer_format = buffer_format; encoder_params.rfi = true; - NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_params = {min_struct_version(NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER)}; + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_params = {NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER}; session_params.device = device; session_params.deviceType = device_type; - session_params.apiVersion = minimum_api_version; + session_params.apiVersion = NVENCAPI_VERSION; if (nvenc_failed(nvenc->nvEncOpenEncodeSessionEx(&session_params, &encoder))) { BOOST_LOG(error) << "NvEnc: NvEncOpenEncodeSessionEx() failed: " << last_nvenc_error_string; return false; @@ -139,7 +135,7 @@ namespace nvenc { return false; } - NV_ENC_INITIALIZE_PARAMS init_params = {min_struct_version(NV_ENC_INITIALIZE_PARAMS_VER)}; + NV_ENC_INITIALIZE_PARAMS init_params = {NV_ENC_INITIALIZE_PARAMS_VER}; switch (client_config.videoFormat) { case 0: @@ -173,10 +169,14 @@ namespace nvenc { } auto get_encoder_cap = [&](NV_ENC_CAPS cap) { - NV_ENC_CAPS_PARAM param = {min_struct_version(NV_ENC_CAPS_PARAM_VER), cap}; + NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER}; + param.capsToQuery = cap; int value = 0; - nvenc->nvEncGetEncodeCaps(encoder, init_params.encodeGUID, ¶m, &value); - return value; + int ret = nvenc->nvEncGetEncodeCaps(encoder, init_params.encodeGUID, ¶m, &value); + if (ret == NV_ENC_SUCCESS) { + return value; + } + return 0; }; auto buffer_is_10bit = [&]() { @@ -231,7 +231,18 @@ namespace nvenc { init_params.frameRateDen = fps.den; } - NV_ENC_PRESET_CONFIG preset_config = {min_struct_version(NV_ENC_PRESET_CONFIG_VER), {min_struct_version(NV_ENC_CONFIG_VER, 7, 8)}}; + if (client_config.videoFormat > 0 && get_encoder_cap(NV_ENC_CAPS_NUM_ENCODER_ENGINES) > 1) { + // SFE supports HEVC/AV1 if you have more than 1 nvenc block + using enum nvenc_split_frame_encoding; + init_params.splitEncodeMode = config.split_frame_encoding == disabled ? NV_ENC_SPLIT_DISABLE_MODE : + config.split_frame_encoding == force_enabled ? NV_ENC_SPLIT_AUTO_FORCED_MODE : + NV_ENC_SPLIT_AUTO_MODE; + } + + NV_ENC_PRESET_CONFIG preset_config = { + .version = NV_ENC_PRESET_CONFIG_VER, + .presetCfg = {.version = NV_ENC_CONFIG_VER}, + }; if (nvenc_failed(nvenc->nvEncGetEncodePresetConfigEx(encoder, init_params.encodeGUID, init_params.presetGUID, init_params.tuningInfo, &preset_config))) { BOOST_LOG(error) << "NvEnc: NvEncGetEncodePresetConfigEx() failed: " << last_nvenc_error_string; return false; @@ -333,7 +344,8 @@ namespace nvenc { auto &format_config = enc_config.encodeCodecConfig.hevcConfig; set_h264_hevc_common_format_config(format_config); if (buffer_is_10bit()) { - format_config.pixelBitDepthMinus8 = 2; + format_config.inputBitDepth = NV_ENC_BIT_DEPTH_10; + format_config.outputBitDepth = NV_ENC_BIT_DEPTH_10; } set_ref_frames(format_config.maxNumRefFramesInDPB, format_config.numRefL0, 5); set_minqp_if_enabled(config.min_qp_hevc); @@ -366,8 +378,8 @@ namespace nvenc { } format_config.enableBitstreamPadding = config.insert_filler_data; if (buffer_is_10bit()) { - format_config.inputPixelBitDepthMinus8 = 2; - format_config.pixelBitDepthMinus8 = 2; + format_config.inputBitDepth = NV_ENC_BIT_DEPTH_10; + format_config.outputBitDepth = NV_ENC_BIT_DEPTH_10; } format_config.colorPrimaries = colorspace.primaries; format_config.transferCharacteristics = colorspace.tranfer_function; @@ -395,7 +407,7 @@ namespace nvenc { } if (async_event_handle) { - NV_ENC_EVENT_PARAMS event_params = {min_struct_version(NV_ENC_EVENT_PARAMS_VER)}; + NV_ENC_EVENT_PARAMS event_params = {NV_ENC_EVENT_PARAMS_VER}; event_params.completionEvent = async_event_handle; if (nvenc_failed(nvenc->nvEncRegisterAsyncEvent(encoder, &event_params))) { BOOST_LOG(error) << "NvEnc: NvEncRegisterAsyncEvent() failed: " << last_nvenc_error_string; @@ -403,7 +415,7 @@ namespace nvenc { } } - NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer = {min_struct_version(NV_ENC_CREATE_BITSTREAM_BUFFER_VER)}; + NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer = {NV_ENC_CREATE_BITSTREAM_BUFFER_VER}; if (nvenc_failed(nvenc->nvEncCreateBitstreamBuffer(encoder, &create_bitstream_buffer))) { BOOST_LOG(error) << "NvEnc: NvEncCreateBitstreamBuffer() failed: " << last_nvenc_error_string; return false; @@ -455,6 +467,13 @@ namespace nvenc { if (config.insert_filler_data) { extra += " filler-data"; } + if (client_config.videoFormat > 0 && get_encoder_cap(NV_ENC_CAPS_NUM_ENCODER_ENGINES) > 1) { + if (init_params.splitEncodeMode == NV_ENC_SPLIT_AUTO_MODE) { + extra += " sfe-auto"; + } else if (init_params.splitEncodeMode == NV_ENC_SPLIT_AUTO_FORCED_MODE) { + extra += " sfe"; + } + } BOOST_LOG(info) << "NvEnc: created encoder " << video_format_string << quality_preset_string_from_guid(init_params.presetGUID) << extra; } @@ -472,7 +491,7 @@ namespace nvenc { output_bitstream = nullptr; } if (encoder && async_event_handle) { - NV_ENC_EVENT_PARAMS event_params = {min_struct_version(NV_ENC_EVENT_PARAMS_VER)}; + NV_ENC_EVENT_PARAMS event_params = {NV_ENC_EVENT_PARAMS_VER}; event_params.completionEvent = async_event_handle; if (nvenc_failed(nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params))) { BOOST_LOG(error) << "NvEnc: NvEncUnregisterAsyncEvent() failed: " << last_nvenc_error_string; @@ -508,7 +527,7 @@ namespace nvenc { return {}; } - NV_ENC_MAP_INPUT_RESOURCE mapped_input_buffer = {min_struct_version(NV_ENC_MAP_INPUT_RESOURCE_VER)}; + NV_ENC_MAP_INPUT_RESOURCE mapped_input_buffer = {NV_ENC_MAP_INPUT_RESOURCE_VER}; mapped_input_buffer.registeredResource = registered_input_buffer; if (nvenc_failed(nvenc->nvEncMapInputResource(encoder, &mapped_input_buffer))) { @@ -521,7 +540,7 @@ namespace nvenc { } }); - NV_ENC_PIC_PARAMS pic_params = {min_struct_version(NV_ENC_PIC_PARAMS_VER, 4, 6)}; + NV_ENC_PIC_PARAMS pic_params = {NV_ENC_PIC_PARAMS_VER}; pic_params.inputWidth = encoder_params.width; pic_params.inputHeight = encoder_params.height; pic_params.encodePicFlags = force_idr ? NV_ENC_PIC_FLAG_FORCEIDR : 0; @@ -537,7 +556,7 @@ namespace nvenc { return {}; } - NV_ENC_LOCK_BITSTREAM lock_bitstream = {min_struct_version(NV_ENC_LOCK_BITSTREAM_VER, 1, 2)}; + NV_ENC_LOCK_BITSTREAM lock_bitstream = {NV_ENC_LOCK_BITSTREAM_VER}; lock_bitstream.outputBitstream = output_bitstream; lock_bitstream.doNotWait = async_event_handle ? 1 : 0; @@ -584,8 +603,7 @@ namespace nvenc { return false; } - if (first_frame >= encoder_state.last_rfi_range.first && - last_frame <= encoder_state.last_rfi_range.second) { + if (first_frame >= encoder_state.last_rfi_range.first && last_frame <= encoder_state.last_rfi_range.second) { BOOST_LOG(debug) << "NvEnc: rfi request " << first_frame << "-" << last_frame << " already done"; return true; } @@ -671,19 +689,4 @@ namespace nvenc { return false; } - uint32_t nvenc_base::min_struct_version(uint32_t version, uint32_t v11_struct_version, uint32_t v12_struct_version) { - assert(minimum_api_version); - - // Mask off and replace the original NVENCAPI_VERSION - version &= ~NVENCAPI_VERSION; - version |= minimum_api_version; - - // If there's a struct version override, apply that too - if (v11_struct_version || v12_struct_version) { - version &= ~(0xFFu << 16); - version |= (((minimum_api_version & 0xFF) >= 12) ? v12_struct_version : v11_struct_version) << 16; - } - - return version; - } } // namespace nvenc diff --git a/src/nvenc/nvenc_base.h b/src/nvenc/nvenc_base.h index a4615a84259..bb2cc3f1ef7 100644 --- a/src/nvenc/nvenc_base.h +++ b/src/nvenc/nvenc_base.h @@ -106,16 +106,6 @@ namespace nvenc { bool nvenc_failed(NVENCSTATUS status); - /** - * @brief This function returns the corresponding struct version for the minimum API required by the codec. - * @details Reducing the struct versions maximizes driver compatibility by avoiding needless API breaks. - * @param version The raw structure version from `NVENCAPI_STRUCT_VERSION()`. - * @param v11_struct_version Optionally specifies the struct version to use with v11 SDK major versions. - * @param v12_struct_version Optionally specifies the struct version to use with v12 SDK major versions. - * @return A suitable struct version for the active codec. - */ - uint32_t min_struct_version(uint32_t version, uint32_t v11_struct_version = 0, uint32_t v12_struct_version = 0); - const NV_ENC_DEVICE_TYPE device_type; void *encoder = nullptr; @@ -142,7 +132,6 @@ namespace nvenc { private: NV_ENC_OUTPUT_PTR output_bitstream = nullptr; - uint32_t minimum_api_version = 0; struct { uint64_t last_encoded_frame_index = 0; diff --git a/src/nvenc/nvenc_config.h b/src/nvenc/nvenc_config.h index 824397e8577..b2143456a5c 100644 --- a/src/nvenc/nvenc_config.h +++ b/src/nvenc/nvenc_config.h @@ -12,6 +12,12 @@ namespace nvenc { full_resolution, ///< Better overall statistics, slower and uses more extra vram }; + enum class nvenc_split_frame_encoding { + disabled, ///< Disable + driver_decides, ///< Let driver decide + force_enabled, ///< Force-enable + }; + /** * @brief NVENC encoder configuration. */ @@ -48,6 +54,9 @@ namespace nvenc { // Add filler data to encoded frames to stay at target bitrate, mainly for testing bool insert_filler_data = false; + + // Enable split-frame encoding if the gpu has multiple NVENC hardware clusters + nvenc_split_frame_encoding split_frame_encoding = nvenc_split_frame_encoding::driver_decides; }; } // namespace nvenc diff --git a/src/nvenc/nvenc_d3d11.cpp b/src/nvenc/nvenc_d3d11.cpp index 1b749f925e1..f3c7af174e6 100644 --- a/src/nvenc/nvenc_d3d11.cpp +++ b/src/nvenc/nvenc_d3d11.cpp @@ -39,7 +39,7 @@ namespace nvenc { if ((dll = LoadLibraryEx(dll_name, nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32))) { if (auto create_instance = (decltype(NvEncodeAPICreateInstance) *) GetProcAddress(dll, "NvEncodeAPICreateInstance")) { auto new_nvenc = std::make_unique(); - new_nvenc->version = min_struct_version(NV_ENCODE_API_FUNCTION_LIST_VER); + new_nvenc->version = NV_ENCODE_API_FUNCTION_LIST_VER; if (nvenc_failed(create_instance(new_nvenc.get()))) { BOOST_LOG(error) << "NvEnc: NvEncodeAPICreateInstance() failed: " << last_nvenc_error_string; } else { diff --git a/src/nvenc/nvenc_d3d11_native.cpp b/src/nvenc/nvenc_d3d11_native.cpp index afc665788f3..02d1b364309 100644 --- a/src/nvenc/nvenc_d3d11_native.cpp +++ b/src/nvenc/nvenc_d3d11_native.cpp @@ -51,7 +51,7 @@ namespace nvenc { } if (!registered_input_buffer) { - NV_ENC_REGISTER_RESOURCE register_resource = {min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4)}; + NV_ENC_REGISTER_RESOURCE register_resource = {NV_ENC_REGISTER_RESOURCE_VER}; register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX; register_resource.width = encoder_params.width; register_resource.height = encoder_params.height; diff --git a/src/nvenc/nvenc_d3d11_on_cuda.cpp b/src/nvenc/nvenc_d3d11_on_cuda.cpp index b915b32921c..44123ed9e65 100644 --- a/src/nvenc/nvenc_d3d11_on_cuda.cpp +++ b/src/nvenc/nvenc_d3d11_on_cuda.cpp @@ -169,7 +169,7 @@ namespace nvenc { } if (!registered_input_buffer) { - NV_ENC_REGISTER_RESOURCE register_resource = {min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4)}; + NV_ENC_REGISTER_RESOURCE register_resource = {NV_ENC_REGISTER_RESOURCE_VER}; register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR; register_resource.width = encoder_params.width; register_resource.height = encoder_params.height; diff --git a/src_assets/common/assets/web/config.html b/src_assets/common/assets/web/config.html index b391e3e7470..9bef73b6e99 100644 --- a/src_assets/common/assets/web/config.html +++ b/src_assets/common/assets/web/config.html @@ -289,6 +289,7 @@

{{ $t('config.configuration') }}

"nvenc_spatial_aq": "disabled", "nvenc_vbv_increase": 0, "nvenc_realtime_hags": "enabled", + "nvenc_split_encode": "driver_decides", "nvenc_latency_over_power": "enabled", "nvenc_opengl_vulkan_on_dxgi": "enabled", "nvenc_h264_cavlc": "disabled", diff --git a/src_assets/common/assets/web/configs/tabs/encoders/NvidiaNvencEncoder.vue b/src_assets/common/assets/web/configs/tabs/encoders/NvidiaNvencEncoder.vue index a6bd2a00275..37873c1ddd3 100644 --- a/src_assets/common/assets/web/configs/tabs/encoders/NvidiaNvencEncoder.vue +++ b/src_assets/common/assets/web/configs/tabs/encoders/NvidiaNvencEncoder.vue @@ -27,6 +27,17 @@ const config = ref(props.config)
{{ $t('config.nvenc_preset_desc') }}
+ +
+ + +
{{ $t('config.nvenc_split_encode_desc') }}
+
+
diff --git a/src_assets/common/assets/web/public/assets/locale/en.json b/src_assets/common/assets/web/public/assets/locale/en.json index a647f446b90..77549bf2ef1 100644 --- a/src_assets/common/assets/web/public/assets/locale/en.json +++ b/src_assets/common/assets/web/public/assets/locale/en.json @@ -301,6 +301,9 @@ "nvenc_realtime_hags_desc": "Currently NVIDIA drivers may freeze in encoder when HAGS is enabled, realtime priority is used and VRAM utilization is close to maximum. Disabling this option lowers the priority to high, sidestepping the freeze at the cost of reduced capture performance when the GPU is heavily loaded.", "nvenc_spatial_aq": "Spatial AQ", "nvenc_spatial_aq_desc": "Assign higher QP values to flat regions of the video. Recommended to enable when streaming at lower bitrates.", + "nvenc_split_encode": "Split frame encoding", + "nvenc_split_encode_desc": "Split the encoding of each video frame over multiple NVENC hardware units. Significantly reduces host processing latency with a marginal compression efficiency penalty. The default option enables SFE if the following conditions are met: there are 2+ NVENC units, the stream is 4K resolution or higher, and the preset is P1-P4. Set this to Enabled to use SFE at lower resolutions or higher presets.", + "nvenc_split_encode_driver_decides_def": "Driver decides (default)", "nvenc_twopass": "Two-pass mode", "nvenc_twopass_desc": "Adds preliminary encoding pass. This allows to detect more motion vectors, better distribute bitrate across the frame and more strictly adhere to bitrate limits. Disabling it is not recommended since this can lead to occasional bitrate overshoot and subsequent packet loss.", "nvenc_twopass_disabled": "Disabled (fastest, not recommended)", diff --git a/third-party/build-deps b/third-party/build-deps index 91ac60f3a05..c08f69db104 160000 --- a/third-party/build-deps +++ b/third-party/build-deps @@ -1 +1 @@ -Subproject commit 91ac60f3a051819d7d9975b8757730a9471ee8de +Subproject commit c08f69db10450bd06cf79045e79b9179c99bae70 diff --git a/third-party/nv-codec-headers b/third-party/nv-codec-headers index fe32761e7a8..e844e5b26f4 160000 --- a/third-party/nv-codec-headers +++ b/third-party/nv-codec-headers @@ -1 +1 @@ -Subproject commit fe32761e7a8bc79fcf560f356bf3898271bf4d56 +Subproject commit e844e5b26f46bb77479f063029595293aa8f812d