From d422a545fc33b57b57981ed5713655738e4d5a3e Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Mon, 3 Mar 2025 16:52:09 +0100 Subject: [PATCH 01/14] working on upgrade to v12 with av1 --- nvenc_bindings.zig | 139 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 122 insertions(+), 17 deletions(-) diff --git a/nvenc_bindings.zig b/nvenc_bindings.zig index c64dbf5..8463592 100644 --- a/nvenc_bindings.zig +++ b/nvenc_bindings.zig @@ -41,24 +41,12 @@ pub const h264_profile_main_guid = guid(0x60b5c1d4, 0x67fe, 0x4790, .{ 0x94, 0xd pub const h264_profile_high_guid = guid(0xe7cbc309, 0x4f7a, 0x4b89, .{ 0xaf, 0x2a, 0xd5, 0x37, 0xc9, 0x2b, 0xe3, 0x10 }); pub const h264_profile_high_444_guid = guid(0x7ac663cb, 0xa598, 0x4960, .{ 0xb8, 0x44, 0x33, 0x9b, 0x26, 0x1a, 0x7d, 0x52 }); pub const h264_profile_stereo_guid = guid(0x40847bf5, 0x33f7, 0x4601, .{ 0x90, 0x84, 0xe8, 0xfe, 0x3c, 0x1d, 0xb8, 0xb7 }); -pub const h264_profile_svc_temporal_scalabilty = guid(0xce788d20, 0xaaa9, 0x4318, .{ 0x92, 0xbb, 0xac, 0x7e, 0x85, 0x8c, 0x8d, 0x36 }); pub const h264_profile_progressive_high_guid = guid(0xb405afac, 0xf32b, 0x417b, .{ 0x89, 0xc4, 0x9a, 0xbe, 0xed, 0x3e, 0x59, 0x78 }); pub const h264_profile_constrained_high_guid = guid(0xaec1bd87, 0xe85b, 0x48f2, .{ 0x84, 0xc3, 0x98, 0xbc, 0xa6, 0x28, 0x50, 0x72 }); pub const hevc_profile_main_guid = guid(0xb514c39a, 0xb55b, 0x40fa, .{ 0x87, 0x8f, 0xf1, 0x25, 0x3b, 0x4d, 0xfd, 0xec }); pub const hevc_profile_main10_guid = guid(0xfa4d2b6c, 0x3a5b, 0x411a, .{ 0x80, 0x18, 0x0a, 0x3f, 0x5e, 0x3c, 0x9b, 0xe5 }); pub const hevc_profile_frext_guid = guid(0x51ec32b5, 0x1b4c, 0x453c, .{ 0x9c, 0xbd, 0xb6, 0x16, 0xbd, 0x62, 0x13, 0x41 }); - -// The old presets were deprecated starting SDK version 10.0. We remove them -// entirely since using them is not supported on modern driver versions. -// pub const preset_default_guid = guid(0xb2dfb705, 0x4ebd, 0x4c49, .{ 0x9b, 0x5f, 0x24, 0xa7, 0x77, 0xd3, 0xe5, 0x87 }); -// pub const preset_hp_guid = guid(0x60e4c59f, 0xe846, 0x4484, .{ 0xa5, 0x6d, 0xcd, 0x45, 0xbe, 0x9f, 0xdd, 0xf6 }); -// pub const preset_hq_guid = guid(0x34dba71d, 0xa77b, 0x4b8f, .{ 0x9c, 0x3e, 0xb6, 0xd5, 0xda, 0x24, 0xc0, 0x12 }); -// pub const preset_bd_guid = guid(0x82e3e450, 0xbdbb, 0x4e40, .{ 0x98, 0x9c, 0x82, 0xa9, 0xd, 0xf9, 0xef, 0x32 }); -// pub const preset_low_latency_default_guid = guid(0x49df21c5, 0x6dfa, 0x4feb, .{ 0x97, 0x87, 0x6a, 0xcc, 0x9e, 0xff, 0xb7, 0x26 }); -// pub const preset_low_latency_hq_guid = guid(0xc5f733b9, 0xea97, 0x4cf9, .{ 0xbe, 0xc2, 0xbf, 0x78, 0xa7, 0x4f, 0xd1, 0x5 }); -// pub const preset_low_latency_hp_guid = guid(0x67082a44, 0x4bad, 0x48fa, .{ 0x98, 0xea, 0x93, 0x5, 0x6d, 0x15, 0xa, 0x58 }); -// pub const preset_lossless_default_guid = guid(0xd5bfb716, 0xc604, 0x44e7, .{ 0x9b, 0xb8, 0xde, 0xa5, 0x51, 0xf, 0xc3, 0xac }); -// pub const preset_lossless_hp_guid = guid(0x149998e7, 0x2364, 0x411d, .{ 0x82, 0xef, 0x17, 0x98, 0x88, 0x9, 0x34, 0x9 }); +pub const av1_profile_main_guid = guid(0x5f2a39f5, 0xf14e, 0x4f95, .{ 0x9a, 0x9e, 0xb7, 0x6d, 0x56, 0x8f, 0xcf, 0x97 }); pub const preset_p1 = guid(0xfc0a8d3e, 0x45f8, 0x4cf8, .{ 0x80, 0xc7, 0x29, 0x88, 0x71, 0x59, 0x0e, 0xbf }); pub const preset_p2 = guid(0xf581cfb8, 0x88d6, 0x4381, .{ 0x93, 0xf0, 0xdf, 0x13, 0xf9, 0xc2, 0x7d, 0xab }); @@ -70,6 +58,15 @@ pub const preset_p7 = guid(0x84848c12, 0x6f71, 0x4c13, .{ 0x93, 0x1b, 0x53, 0xe2 pub const infinite_goplength: u32 = 0xffffffff; +pub const AV1PartSize = enum(c_uint) { + autoselect = 0, + @"4x4" = 1, + @"8x8" = 2, + @"16x16" = 3, + @"32x32" = 4, + @"64x64" = 5, +}; + pub const BFrameRefMode = enum(c_uint) { disabled = 0, each = 1, @@ -104,6 +101,14 @@ pub const DeviceType = enum(c_uint) { opengl = 2, }; +pub const DisplayPicStruct = enum(c_uint) { + frame = 0, + field_top_bottom = 1, + field_bottom_top = 2, + frame_doubling = 3, + frame_tripling = 4, +}; + pub const H264AdaptiveTransformMode = enum(c_uint) { autoselect = 0, disable = 1, @@ -181,8 +186,39 @@ pub const Level = enum(c_uint) { hevc_6 = 180, hevc_61 = 183, hevc_62 = 186, - hevc_main = 0, - hevc_high = 1, + + tier_hevc_main = 0, + tier_hevc_high = 1, + + av1_2 = 0, + av1_21 = 1, + av1_22 = 2, + av1_23 = 3, + av1_3 = 4, + av1_31 = 5, + av1_32 = 6, + av1_33 = 7, + av1_4 = 8, + av1_41 = 9, + av1_42 = 10, + av1_43 = 11, + av1_5 = 12, + av1_51 = 13, + av1_52 = 14, + av1_53 = 15, + av1_6 = 16, + av1_61 = 17, + av1_62 = 18, + av1_63 = 19, + av1_7 = 20, + av1_71 = 21, + av1_72 = 22, + av1_73 = 23, + + av1_autoselect = 0, + + tier_av1_0 = 0, + tier_av1_1 = 1, }; pub const MemoryHeap = enum(c_uint) { @@ -306,6 +342,69 @@ pub const TuningInfo = enum(c_uint) { lossless = 4, }; +pub const VuiVideoFormat = enum(c_uint) { + component = 0, + pal = 1, + ntsc = 2, + secam = 3, + mac = 4, + unspecified = 5, +}; + +pub const VuiColorPrimaries = enum(c_uint) { + undefined = 0, + bt709 = 1, + unspecified = 2, + reserved = 3, + bt470m = 4, + bt470bg = 5, + smpte170m = 6, + smpte240m = 7, + film = 8, + bt2020 = 9, + smpte428 = 10, + smpte431 = 11, + smpte432 = 12, + jedec_p22 = 22, +}; + +pub const VuiTransferCharacteristic = enum(c_uint) { + undefined = 0, + bt709 = 1, + unspecified = 2, + reserved = 3, + bt470m = 4, + bt470bg = 5, + smpte170m = 6, + smpte240m = 7, + linear = 8, + log = 9, + log_sqrt = 10, + iec61966_2_4 = 11, + bt1361_ecg = 12, + srgb = 13, + bt2020_10 = 14, + bt2020_12 = 15, + smpte2084 = 16, + smpte428 = 17, + arib_std_b67 = 18, +}; + +pub const VuiMatrixCoeffs = enum(c_uint) { + rgb = 0, + bt709 = 1, + unspecified = 2, + reserved = 3, + fcc = 4, + bt470bg = 5, + smpte170m = 6, + smpte240m = 7, + ycgco = 8, + bt2020_ncl = 9, + bt2020_cl = 10, + smpte2085 = 11, +}; + pub const InputPtr = ?*opaque {}; pub const OutputPtr = ?*opaque {}; pub const RegisteredPtr = ?*opaque {}; @@ -723,10 +822,16 @@ pub const RcParams = extern struct { targetQualityLSB: u8, lookaheadDepth: u16, lowDelayKeyFrameScale: u8, - _reserved1: [3]u8, + yDcQPIndexOffset: u8, + uDcQPIndexOffset: u8, + vDcQPIndexOffset: u8, qpMapMode: QPMapMode, multiPass: MultiPass, - _reserved: [6]u32, + alphaLayerBitrateRatio: u32, + cbQPIndexOffset: u8, + crQPIndexOffset: u8, + _reserved: u16, + _reserved2: [4]u32, }; pub const RegisterResource = extern struct { From 0b3bf4d0a1efdc9e20bbd92968d77d2f5b0da1cf Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Mon, 3 Mar 2025 18:20:26 +0100 Subject: [PATCH 02/14] updated nvenc bindings --- nvenc_bindings.zig | 202 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 179 insertions(+), 23 deletions(-) diff --git a/nvenc_bindings.zig b/nvenc_bindings.zig index 8463592..4288374 100644 --- a/nvenc_bindings.zig +++ b/nvenc_bindings.zig @@ -13,16 +13,16 @@ pub const create_bitstream_buffer_ver = struct_version(1); pub const create_mv_buffer_ver = struct_version(1); pub const rc_params_ver = struct_version(1); pub const pic_params_mvc_ver = struct_version(1); -pub const config_ver = struct_version(7) | (1 << 31); +pub const config_ver = struct_version(8) | (1 << 31); pub const initialize_params_ver = struct_version(5) | (1 << 31); pub const reconfigure_params_ver = struct_version(1) | (1 << 31); pub const preset_config_ver = struct_version(4) | (1 << 31); -pub const pic_params_ver = struct_version(4) | (1 << 31); +pub const pic_params_ver = struct_version(6) | (1 << 31); pub const meonly_params_ver = struct_version(3); -pub const lock_bitstream_ver = struct_version(1); +pub const lock_bitstream_ver = struct_version(2); pub const lock_input_buffer_ver = struct_version(1); pub const map_input_resource_ver = struct_version(4); -pub const register_resource_ver = struct_version(3); +pub const register_resource_ver = struct_version(4); pub const stat_ver = struct_version(1); pub const sequence_param_payload_ver = struct_version(1); pub const event_params_ver = struct_version(1); @@ -58,6 +58,8 @@ pub const preset_p7 = guid(0x84848c12, 0x6f71, 0x4c13, .{ 0x93, 0x1b, 0x53, 0xe2 pub const infinite_goplength: u32 = 0xffffffff; +pub const max_num_clock_ts = 3; + pub const AV1PartSize = enum(c_uint) { autoselect = 0, @"4x4" = 1, @@ -342,7 +344,7 @@ pub const TuningInfo = enum(c_uint) { lossless = 4, }; -pub const VuiVideoFormat = enum(c_uint) { +pub const VuiVideoFormat = enum(u32) { component = 0, pal = 1, ntsc = 2, @@ -351,7 +353,7 @@ pub const VuiVideoFormat = enum(c_uint) { unspecified = 5, }; -pub const VuiColorPrimaries = enum(c_uint) { +pub const VuiColorPrimaries = enum(u32) { undefined = 0, bt709 = 1, unspecified = 2, @@ -368,7 +370,7 @@ pub const VuiColorPrimaries = enum(c_uint) { jedec_p22 = 22, }; -pub const VuiTransferCharacteristic = enum(c_uint) { +pub const VuiTransferCharacteristic = enum(u32) { undefined = 0, bt709 = 1, unspecified = 2, @@ -390,7 +392,7 @@ pub const VuiTransferCharacteristic = enum(c_uint) { arib_std_b67 = 18, }; -pub const VuiMatrixCoeffs = enum(c_uint) { +pub const VuiMatrixCoeffs = enum(u32) { rgb = 0, bt709 = 1, unspecified = 2, @@ -410,9 +412,26 @@ pub const OutputPtr = ?*opaque {}; pub const RegisteredPtr = ?*opaque {}; pub const CustreamPtr = ?*opaque {}; +pub const AV1OBUPayload = SeiPayload; + +pub const ClockTimestampSet = extern struct { + bitfields: packed struct { + countingType: bool, + discontinuityFlag: bool, + cntDroppedFrames: bool, + nFrames: u8, + secondsValue: u6, + minutesValue: u6, + hoursValue: u5, + reserved2: u4, + }, + timeOffset: u32, +}; + pub const CodecConfig = extern union { h264Config: ConfigH264, hevcConfig: ConfigHEVC, + av1Config: ConfigAV1, h264MeOnlyConfig: ConfigH264MeOnly, hevcMeOnlyConfig: ConfigHEVCMeOnly, _reserved: [320]u32, @@ -421,6 +440,7 @@ pub const CodecConfig = extern union { pub const CodecPicParams = extern union { h264PicParams: PicParamsH264, hevcPicParams: PicParamsHEVC, + av1PicParams: PicParamsAV1, _reserved: [256]u32, }; @@ -438,9 +458,52 @@ pub const Config = extern struct { _reserved2: [64]?*anyopaque, }; +pub const ConfigAV1 = extern struct { + level: u32, + tier: u32, + minPartSize: AV1PartSize, + maxPartSize: AV1PartSize, + bitfields: packed struct { + outputAnnexBFormat: bool, + enableTimingInfo: bool, + enableDecoderModelInfo: bool, + enableFrameIdNumbers: bool, + disableSeqHdr: bool, + repeatSeqHdr: bool, + enableIntraRefresh: bool, + chromaFormatIDC: u2, + enableBitstreamPadding: bool, + enableCustomTileConfig: bool, + enableFilmGrainParams: bool, + inputPixelBitDepthMinus8: u3, + pixelBitDepthMinus8: u3, + _reserved: u14, + }, + idrPeriod: u32, + intraRefreshPeriod: u32, + intraRefreshCnt: u32, + maxNumRefFramesInDPB: u32, + numTileColumns: u32, + numTileRows: u32, + tileWidths: [*c]u32, + tileHeights: [*c]u32, + maxTemporalLayersMinus1: u32, + colorPrimaries: VuiColorPrimaries, + transferCharacteristics: VuiTransferCharacteristic, + matrixCoefficients: VuiMatrixCoeffs, + colorRange: u32, + chromaSamplePosition: u32, + useBFramesAsRef: BFrameRefMode, + filmGrainParams: ?*FilmGrainParamsAV1, + numFwdRefs: NumRefFrames, + numBwdRefs: NumRefFrames, + _reserved1: [235]u32, + _reserved2: [62]?*anyopaque, +}; + pub const ConfigH264 = extern struct { bitfields: packed struct { - _reserved1: bool, + enableTemporalSVC: bool, enableStereoMVC: bool, hierarchicalPFrames: bool, hierarchicalBFrames: bool, @@ -458,7 +521,11 @@ pub const ConfigH264 = extern struct { qpPrimeYZeroTransformBypassFlag: bool, useConstrainedIntraPred: bool, enableFillerDataInsertion: bool, - _reserved2: u14, + disableSVCPrefixNalu: bool, + enableScalabilityInfoSEI: bool, + singleSliceIntraRefresh: bool, + enableTimeCode: bool, + _reserved: u10, }, level: u32, idrPeriod: u32, @@ -499,17 +566,20 @@ pub const ConfigH264VuiParameters = extern struct { overscanInfoPresentFlag: u32, overscanInfo: u32, videoSignalTypePresentFlag: u32, - videoFormat: u32, + videoFormat: VuiVideoFormat, videoFullRangeFlag: u32, colourDescriptionPresentFlag: u32, - colourPrimaries: u32, - transferCharacteristics: u32, - colourMatrix: u32, + colourPrimaries: VuiColorPrimaries, + transferCharacteristics: VuiTransferCharacteristic, + colourMatrix: VuiMatrixCoeffs, chromaSampleLocationFlag: u32, chromaSampleLocationTop: u32, chromaSampleLocationBot: u32, bitstreamRestrictionFlag: u32, - _reserved: [15]u32, + timingInfoPresentFlag: u32, + numUnitInTicks: u32, + timeScale: u32, + _reserved: [12]u32, }; pub const ConfigHEVCVuiParameters = extern struct { @@ -548,7 +618,11 @@ pub const ConfigHEVC = extern struct { pixelBitDepthMinus8: u3, enableFillerDataInsetion: bool, enableConstrainedEncoding: bool, - _reserved: u16, + enableAlphaLayerEncoding: bool, + singleSliceIntraRefresh: bool, + outputRecoveryPointSEI: bool, + outputTimeCodeSEI: bool, + reserved: u12, }, idrPeriod: u32, intraRefreshPeriod: u32, @@ -603,11 +677,50 @@ pub const ExternalMeHint = extern struct { _reserved: i32, }; +pub const ExternalMeSbHint = extern struct { + _reserved: i16, + _reserved1: i16, + _reserved2: i16, +}; + pub const ExternalMeHintCountsPerBlocktype = extern struct { _reserved: u32, _reserved1: [3]u32, }; +pub const FilmGrainParamsAV1 = extern struct { + bitfields: packed struct { + applyGrain: bool, + chromaScalingFromLuma: bool, + overlapFlag: bool, + clipToRestrictedRange: bool, + grainScalingMinus8: u2, + arCoeffLag: u2, + numYPoints: u4, + numCbPoints: u4, + numCrPoints: u4, + arCoeffShiftMinus6: u2, + grainScaleShift: u2, + _reserved: u8, + }, + pointYValue: u8[14], + pointYScaling: u8[14], + pointCbValue: u8[10], + pointCbScaling: u8[10], + pointCrValue: u8[10], + pointCrScaling: u8[10], + arCoeffsYPlus128: u8[24], + arCoeffsCbPlus128: u8[25], + arCoeffsCrPlus128: u8[25], + _reserved: u8[2], + cbMult: u8, + cbLumaMult: u8, + cbOffset: u16, + crMult: u8, + crLumaMult: u8, + crOffset: u16, +}; + pub const GUID = extern struct { Data1: u32, Data2: u16, @@ -635,7 +748,8 @@ pub const InitializeParams = extern struct { maxEncodeHeight: u32, maxMEHintCountsPerBlock: [2]ExternalMeHintCountsPerBlocktype, tuningInfo: TuningInfo, - _reserved: [288]u32, + bufferFormat: BufferFormat, + _reserved: [287]u32, _reserved2: [64]?*anyopaque, }; @@ -662,12 +776,14 @@ pub const LockBitstream = extern struct { frameSatd: u32, ltrFrameIdx: u32, ltrFrameBitmap: u32, - _reserved: [13]u32, + temporalId: u32, + _reserved: [12]u32, intraMBCount: u32, interMBCount: u32, averageMVX: i32, averageMVY: i32, - _reserved1: [219]u32, + alphaLayerSizeInBytes: u32, + _reserved1: [218]u32, _reserved2: [64]?*anyopaque, }; @@ -726,8 +842,41 @@ pub const PicParams = extern struct { qpDeltaMapSize: u32, _reservedBitFields: u32, meHintRefPicDist: [2]u16, - _reserved3: [286]u32, - _reserved4: [60]?*anyopaque, + alphaBuffer: InputPtr, + meExternalSbHints: [*c]ExternalMeSbHint, + meSbHintsCount: u32, + _reserved3: [285]u32, + _reserved4: [58]?*anyopaque, +}; + +pub const PicParamsAV1 = extern struct { + displayPOCSyntax: u32, + refPicFlag: u32, + temporalId: u32, + forceIntraRefreshWithFrameCnt: u32, + bitfields: packed struct { + goldenFrameFlag: bool, + arfFrameFlag: bool, + arf2FrameFlag: bool, + bwdFrameFlag: bool, + overlayFrameFlag: bool, + showExistingFrameFlag: bool, + errorResilientModeFlag: bool, + tileConfigUpdate: bool, + enableCustomTileConfig: bool, + filmGrainParamsUpdate: bool, + reservedBitFields: u22, + }, + numTileColumns: u32, + numTileRows: u32, + tileWidths: [*c]u32, + tileHeights: [*c]u32, + obuPayloadArrayCnt: u32, + _reserved: u32, + obuPayloadArray: *?AV1OBUPayload, + filmGrainParams: *?FilmGrainParamsAV1, + _reserved1: u32[247], + _reserved2: ?*anyopaque[61], }; pub const PicParamsH264 = extern struct { @@ -783,7 +932,8 @@ pub const PicParamsHEVC = extern struct { seiPayloadArrayCnt: u32, _reserved: u32, seiPayloadArray: [*c]SeiPayload, - _reserved2: [244]u32, + timeCode: TimeCode, + _reserved2: [237]u32, _reserved3: [61]?*anyopaque, }; @@ -845,8 +995,9 @@ pub const RegisterResource = extern struct { registeredResource: RegisteredPtr, bufferFormat: BufferFormat, bufferUsage: BufferUsage, + pInputFencePointer: ?*anyopaque, _reserved1: [247]u32, - _reserved2: [62]?*anyopaque, + _reserved2: [61]?*anyopaque, }; pub const SeiPayload = extern struct { @@ -866,6 +1017,11 @@ pub const SequenceParamPayload = extern struct { _reserved2: [64]?*anyopaque, }; +pub const TimeCode = extern struct { + displayPicStruct: DisplayPicStruct, + clockTimestamp: [max_num_clock_ts]ClockTimestampSet, +}; + pub const ApiFunctionList = extern struct { version: u32, reserved: u32, From 7d0edc4ed6270b80f5529ac617bbbf094dab6777 Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Mon, 3 Mar 2025 18:25:14 +0100 Subject: [PATCH 03/14] now working on nvcuvid.h --- nvdec_bindings.zig | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/nvdec_bindings.zig b/nvdec_bindings.zig index 015cdf0..63d146d 100644 --- a/nvdec_bindings.zig +++ b/nvdec_bindings.zig @@ -289,12 +289,18 @@ pub const ParserParams = extern struct { ulClockRate: c_uint, ulErrorThreshold: c_uint, ulMaxDisplayDelay: c_uint, - uReserved1: [5]c_uint, + bitfields: packed struct { + bAnnexb: bool, + uReserved: u31, + }, + uReserved1: [4]c_uint, pUserData: ?*anyopaque, pfnSequenceCallback: ?*const fn (?*anyopaque, ?*VideoFormat) callconv(.C) c_int, pfnDecodePicture: ?*const fn (?*anyopaque, ?*PicParams) callconv(.C) c_int, pfnDisplayPicture: ?*const fn (?*anyopaque, ?*ParserDispInfo) callconv(.C) c_int, - pvReserved2: [7]?*anyopaque, + pfnGetOperatingPoint: ?*const fn (?*anyopaque, ?*CUVIDOPERATINGPOINTINFO) callconv(.C) c_int, + pfnGetSEIMsg: ?*const fn (?*anyopaque, ?*CUVIDSEIMESSAGEINFO) callconv(.C) c_int, + pvReserved2: [5]?*anyopaque, pExtVideoInfo: ?*VideoFormatEx, }; @@ -674,7 +680,7 @@ pub const VideoFormat = extern struct { pub const VideoFormatEx = extern struct { format: VideoFormat, - raw_seqhdr_data: [1024]u8, + raw_seqhdr_data: [1024]u8, // TODO }; pub var cuvidGetDecoderCaps: ?*const fn (pdc: ?*DecodeCaps) Result = null; From c9664667a3a3d6a4fe65cfb9d93a6a707846b190 Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 11:16:22 +0100 Subject: [PATCH 04/14] nvcuvid.h --- nvdec_bindings.zig | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/nvdec_bindings.zig b/nvdec_bindings.zig index 63d146d..8594150 100644 --- a/nvdec_bindings.zig +++ b/nvdec_bindings.zig @@ -169,6 +169,12 @@ pub const create_flags = struct { pub const prefer_CUVID: c_uint = 4; }; +pub const AV1SeqHdr = extern struct { + max_width: c_uint, + max_height: c_uint, + _reserved: [1016]u8, +}; + pub const CreateInfo = extern struct { ulWidth: c_ulong, ulHeight: c_ulong, @@ -298,8 +304,8 @@ pub const ParserParams = extern struct { pfnSequenceCallback: ?*const fn (?*anyopaque, ?*VideoFormat) callconv(.C) c_int, pfnDecodePicture: ?*const fn (?*anyopaque, ?*PicParams) callconv(.C) c_int, pfnDisplayPicture: ?*const fn (?*anyopaque, ?*ParserDispInfo) callconv(.C) c_int, - pfnGetOperatingPoint: ?*const fn (?*anyopaque, ?*CUVIDOPERATINGPOINTINFO) callconv(.C) c_int, - pfnGetSEIMsg: ?*const fn (?*anyopaque, ?*CUVIDSEIMESSAGEINFO) callconv(.C) c_int, + pfnGetOperatingPoint: ?*const fn (?*anyopaque, ?*OperatingPointInfo) callconv(.C) c_int, + pfnGetSEIMsg: ?*const fn (?*anyopaque, ?*SEIMessageInfo) callconv(.C) c_int, pvReserved2: [5]?*anyopaque, pExtVideoInfo: ?*VideoFormatEx, }; @@ -620,6 +626,18 @@ pub const VP9PicParams = extern struct { reserved128Bits: [4]c_uint, }; +pub const OperatingPointInfo = extern struct { + codec: VideoCodec, + data: extern union { + av1: extern struct { + operating_points_cnt: u8, + reserved24_bits: [3]u8, + operating_points_idc: [32]c_ushort, + }, + _CodecReserved: [1024]u8, + }, +}; + pub const ProcParams = extern struct { progressive_frame: c_int, second_field: c_int, @@ -638,6 +656,19 @@ pub const ProcParams = extern struct { _Reserved2: [2]?*anyopaque, }; +pub const SEIMessage = extern struct { + sei_message_type: u8, + reserved: [3]u8, + sei_message_size: c_uint, +}; + +pub const SEIMessageInfo = extern struct { + pSEIData: ?*anyopaque, + pSEIMessage: ?*SEIMessage, + sei_message_count: c_uint, + picIdx: c_uint, +}; + pub const SourceDataPacket = extern struct { flags: c_ulong, payload_size: c_ulong, @@ -680,7 +711,10 @@ pub const VideoFormat = extern struct { pub const VideoFormatEx = extern struct { format: VideoFormat, - raw_seqhdr_data: [1024]u8, // TODO + data: extern union { + av1: AV1SeqHdr, + raw_seqhdr_data: [1024]u8, + }, }; pub var cuvidGetDecoderCaps: ?*const fn (pdc: ?*DecodeCaps) Result = null; From 2244000c8fdc8e887ccfe9328330baa353053295 Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 11:42:16 +0100 Subject: [PATCH 05/14] cuviddec.h --- nvdec_bindings.zig | 173 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 168 insertions(+), 5 deletions(-) diff --git a/nvdec_bindings.zig b/nvdec_bindings.zig index 8594150..9892623 100644 --- a/nvdec_bindings.zig +++ b/nvdec_bindings.zig @@ -127,7 +127,8 @@ pub const VideoCodec = enum(c_uint) { hevc = 8, vp8 = 9, vp9 = 10, - numcodecs = 11, + av1 = 11, + numcodecs = 12, yuv420 = 1230591318, yv12 = 1498820914, nv12 = 1314271538, @@ -205,7 +206,8 @@ pub const CreateInfo = extern struct { right: c_short, bottom: c_short, }, - _Reserved2: [5]c_ulong, + enableHistogram: c_ulong, + _Reserved2: [4]c_ulong, }; pub const DecodeCaps = extern struct { @@ -214,14 +216,17 @@ pub const DecodeCaps = extern struct { nBitDepthMinus8: c_uint, _reserved1: [3]c_uint, bIsSupported: u8, - _reserved2: u8, + nNumNVDECs: u8, nOutputFormatMask: c_ushort, nMaxWidth: c_uint, nMaxHeight: c_uint, nMaxMBCount: c_uint, nMinWidth: c_ushort, nMinHeight: c_ushort, - _reserved3: [11]c_uint, + bIsHistogramSupported: u8, + nCounterBitDepth: u8, + nMaxHistogramBins: c_ushort, + _reserved3: [10]c_uint, }; pub const GetDecodeStatus = extern struct { @@ -333,10 +338,167 @@ pub const PicParams = extern struct { hevc: HEVCPicParams, vp8: VP8PicParams, vp9: VP9PicParams, + av1: AV1PicParams, _CodecReserved: [1024]c_uint, }, }; +pub const AV1PicParams = extern struct { + width: c_uint, + height: c_uint, + frame_offset: c_uint, + decodePicIdx: c_int, + sequence_header: packed struct { + profile: u3, + use_128x128_superblock: bool, + subsampling_x: bool, + subsampling_y: bool, + mono_chrome: bool, + bit_depth_minus8: u4, + enable_filter_intra: bool, + enable_intra_edge_filter: bool, + enable_interintra_compound: bool, + enable_masked_compound: bool, + enable_dual_filter: bool, + enable_order_hint: bool, + order_hint_bits_minus1: u3, + enable_jnt_comp: bool, + enable_superres: bool, + enable_cdef: bool, + enable_restoration: bool, + enable_fgs: bool, + _reserved0_7bits: u7, + }, + frame_header: packed struct { + frame_type: u2, + show_frame: bool, + disable_cdf_update: bool, + allow_screen_content_tools: bool, + force_integer_mv: bool, + coded_denom: u3, + allow_intrabc: bool, + allow_high_precision_mv: bool, + interp_filter: u3, + switchable_motion_mode: bool, + use_ref_frame_mvs: bool, + disable_frame_end_update_cdf: bool, + delta_q_present: bool, + delta_q_res: u2, + using_qmatrix: bool, + coded_lossless: bool, + use_superres: bool, + tx_mode: u2, + reference_mode: bool, + allow_warped_motion: bool, + reduced_tx_set: bool, + skip_mode: bool, + _reserved1_3bits: u3, + }, + tiling_info: packed struct { + num_tile_cols: u8, + num_tile_rows: u8, + context_update_tile_id: u16, + }, + tile_widths: [64]c_ushort, + tile_heights: [64]c_ushort, + cdef_bitfields: packed struct { + cdef_damping_minus_3: u2, + cdef_bits: u2, + _reserved2_4bits: u4, + }, + cdef_y_strength: [8]u8, + cdef_uv_strength: [8]u8, + SkipModeFrames: packed struct { + SkipModeFrame0: u4, + SkipModeFrame1: u4, + }, + base_qindex: u8, + qp_y_dc_delta_q: i8, + qp_u_dc_delta_q: i8, + qp_v_dc_delta_q: i8, + qp_u_ac_delta_q: i8, + qp_v_ac_delta_q: i8, + qm_y: u8, + qm_u: u8, + qm_v: u8, + segmentation_bitfields: packed struct { + segmentation_enabled: bool, + segmentation_update_map: bool, + segmentation_update_data: bool, + segmentation_temporal_update: bool, + _reserved3_4bits: u84, + }, + segmentation_feature_data: [8][8]c_short, + segmentation_feature_mask: [8]u8, + loop_filter_level: [2]u8, + loop_filter_level_u: u8, + loop_filter_level_v: u8, + loop_filter_sharpness: u8, + loop_filter_ref_deltas: [8]i8, + loop_filter_mode_deltas: [2]i8, + loop_filter_bitfields: packed struct { + loop_filter_delta_enabled: bool, + loop_filter_delta_update: bool, + delta_lf_present: bool, + delta_lf_res: u2, + delta_lf_multi: bool, + _reserved4_2bits: u2, + }, + lr_unit_size: [3]u8, + lr_type: [3]u8, + primary_ref_frame: u8, + ref_frame_map: [8]u8, + reference_frames_bitfields: packed struct { + temporal_layer_id: u4, + spatial_layer_id: u4, + }, + _reserved5_32bits: [4]u8, + ref_frame: [7]extern struct { + width: c_uint, + height: c_uint, + index: u8, + _reserved24Bits: [3]u8, + }, + global_motion: [7]extern struct { + bitfields: packed struct { + invalid: bool, + wmtype: u2, + _reserved5Bits: u5, + }, + _reserved24Bits: [3]u8, + wmmat: [6]c_int, + }, + film_grain_params_bitfields: packed struct { + apply_grain: bool, + overlap_flag: bool, + scaling_shift_minus8: u2, + chroma_scaling_from_luma: bool, + ar_coeff_lag: u2, + ar_coeff_shift_minus6: u2, + grain_scale_shift: u2, + clip_to_restricted_range: bool, + _reserved6_4bits: u4, + }, + num_y_points: u8, + scaling_points_y: [14][2]u8, + num_cb_points: u8, + scaling_points_cb: [10][2]u8, + num_cr_points: u8, + scaling_points_cr: [10][2]u8, + _reserved7_8bits: u8, + random_seed: c_ushort, + ar_coeffs_y: [24]c_short, + ar_coeffs_cb: [25]c_short, + ar_coeffs_cr: [25]c_short, + cb_mult: u8, + cb_luma_mult: u8, + cb_offset: c_short, + cr_mult: u8, + cr_luma_mult: u8, + cr_offset: c_short, + _reserved: [7]c_int, +}; + pub const H264PicParams = extern struct { log2_max_frame_num_minus4: c_int, pic_order_cnt_type: c_int, @@ -653,7 +815,8 @@ pub const ProcParams = extern struct { _Reserved1: c_uint, output_stream: cuda_bindings.Stream, _Reserved: [46]c_uint, - _Reserved2: [2]?*anyopaque, + histogram_dptr: ?*c_ulonglong, + _Reserved2: [1]?*anyopaque, }; pub const SEIMessage = extern struct { From 79dc7b8a5a32b1c8e5fc719a7e4186b62a1baca2 Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 11:43:41 +0100 Subject: [PATCH 06/14] docs: update to 12.0 --- README.md | 6 +++--- build.zig.zon | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index adc78f1..11d2301 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ ffmpeg. ### Video Codec SDK Compatibility Matrix -The Zig wrapper is based on the headers of SDK version 10.0. Since all headers +The Zig wrapper is based on the headers of SDK version 12.0. Since all headers are compatible, this ensures compatibility with the corresponding SDK version, as well as CUDA and driver versions and above. @@ -87,10 +87,10 @@ For your convenience, find the full compatibility matrix below: | 13.0 | 570.0 / 570.0 | 11.0 | | 12.2 | 551.76 / 550.54.14 | 11.0 | | 12.1 | 531.61 / 530.41.03 | 11.0 | -| 12.0 | 522.25 / 520.56.06 | 11.0 | +| **12.0** | **522.25 / 520.56.06** | **11.0** | | 11.1 | 471.41 / 470.57.02 | 11.0 | | 11.0 | 456.71 / 455.27 | 11.0 | -| **10.0** | **445.87 / 450.51** | **10.1** | +| 10.0 | 445.87 / 450.51 | 10.1 | | 9.1 | 436.15 / 435.21 | 10.0 | | 9.0 | 418.81 / 418.30 | 10.0 | | 8.2 | 397.93 / 396.24 | 8.0 | diff --git a/build.zig.zon b/build.zig.zon index be12b7c..cd0a53c 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,6 +1,6 @@ .{ .name = "nvidia-video-codec-sdk", - .version = "10.0.26", + .version = "12.0.16", .dependencies = .{}, .paths = .{ "LICENSE-APACHE", From d05e18c24682b1888a5ce582eda93725a9136c0f Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 15:22:59 +0100 Subject: [PATCH 07/14] finished up rebase-nvidia-v12 with some testing --- nvdec_bindings.zig | 46 +++++------ nvenc_bindings.zig | 35 ++++---- tools/.gitignore | 174 ++++++++++++++++++++++++++++++++++++++++ tools/check_bindings.py | 171 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 385 insertions(+), 41 deletions(-) create mode 100644 tools/.gitignore create mode 100644 tools/check_bindings.py diff --git a/nvdec_bindings.zig b/nvdec_bindings.zig index 9892623..02f5354 100644 --- a/nvdec_bindings.zig +++ b/nvdec_bindings.zig @@ -176,7 +176,26 @@ pub const AV1SeqHdr = extern struct { _reserved: [1016]u8, }; -pub const CreateInfo = extern struct { +pub const DecodeCaps = extern struct { + eCodecType: VideoCodec, + eChromaFormat: VideoChromaFormat, + nBitDepthMinus8: c_uint, + _reserved1: [3]c_uint, + bIsSupported: u8, + nNumNVDECs: u8, + nOutputFormatMask: c_ushort, + nMaxWidth: c_uint, + nMaxHeight: c_uint, + nMaxMBCount: c_uint, + nMinWidth: c_ushort, + nMinHeight: c_ushort, + bIsHistogramSupported: u8, + nCounterBitDepth: u8, + nMaxHistogramBins: c_ushort, + _reserved3: [10]c_uint, +}; + +pub const DecodeCreateInfo = extern struct { ulWidth: c_ulong, ulHeight: c_ulong, ulNumDecodeSurfaces: c_ulong, @@ -210,25 +229,6 @@ pub const CreateInfo = extern struct { _Reserved2: [4]c_ulong, }; -pub const DecodeCaps = extern struct { - eCodecType: VideoCodec, - eChromaFormat: VideoChromaFormat, - nBitDepthMinus8: c_uint, - _reserved1: [3]c_uint, - bIsSupported: u8, - nNumNVDECs: u8, - nOutputFormatMask: c_ushort, - nMaxWidth: c_uint, - nMaxHeight: c_uint, - nMaxMBCount: c_uint, - nMinWidth: c_ushort, - nMinHeight: c_ushort, - bIsHistogramSupported: u8, - nCounterBitDepth: u8, - nMaxHistogramBins: c_ushort, - _reserved3: [10]c_uint, -}; - pub const GetDecodeStatus = extern struct { decodeStatus: DecodeStatus, reserved: [31]c_uint, @@ -426,7 +426,7 @@ pub const AV1PicParams = extern struct { segmentation_update_map: bool, segmentation_update_data: bool, segmentation_temporal_update: bool, - _reserved3_4bits: u84, + _reserved3_4bits: u4, }, segmentation_feature_data: [8][8]c_short, segmentation_feature_mask: [8]u8, @@ -881,7 +881,7 @@ pub const VideoFormatEx = extern struct { }; pub var cuvidGetDecoderCaps: ?*const fn (pdc: ?*DecodeCaps) Result = null; -pub var cuvidCreateDecoder: ?*const fn (phDecoder: ?*VideoDecoder, pdci: ?*CreateInfo) Result = null; +pub var cuvidCreateDecoder: ?*const fn (phDecoder: ?*VideoDecoder, pdci: ?*DecodeCreateInfo) Result = null; pub var cuvidDestroyDecoder: ?*const fn (hDecoder: VideoDecoder) Result = null; pub var cuvidDecodePicture: ?*const fn (hDecoder: VideoDecoder, pPicParams: ?*PicParams) Result = null; pub var cuvidGetDecodeStatus: ?*const fn (hDecoder: VideoDecoder, nPicIdx: c_int, pDecodeStatus: ?*GetDecodeStatus) Result = null; @@ -906,7 +906,7 @@ pub fn load() !void { else => @panic("unsupported operating system"), }; cuvidGetDecoderCaps = nvcuvid.lookup(*const fn (pdc: ?*DecodeCaps) Result, "cuvidGetDecoderCaps") orelse @panic("cuvid library invalid"); - cuvidCreateDecoder = nvcuvid.lookup(*const fn (phDecoder: ?*VideoDecoder, pdci: ?*CreateInfo) Result, "cuvidCreateDecoder") orelse @panic("cuvid library invalid"); + cuvidCreateDecoder = nvcuvid.lookup(*const fn (phDecoder: ?*VideoDecoder, pdci: ?*DecodeCreateInfo) Result, "cuvidCreateDecoder") orelse @panic("cuvid library invalid"); cuvidDestroyDecoder = nvcuvid.lookup(*const fn (hDecoder: VideoDecoder) Result, "cuvidDestroyDecoder") orelse @panic("cuvid library invalid"); cuvidDecodePicture = nvcuvid.lookup(*const fn (hDecoder: VideoDecoder, pPicParams: ?*PicParams) Result, "cuvidDecodePicture") orelse @panic("cuvid library invalid"); cuvidGetDecodeStatus = nvcuvid.lookup(*const fn (hDecoder: VideoDecoder, nPicIdx: c_int, pDecodeStatus: ?*GetDecodeStatus) Result, "cuvidGetDecodeStatus") orelse @panic("cuvid library invalid"); diff --git a/nvenc_bindings.zig b/nvenc_bindings.zig index 4288374..c9d11f6 100644 --- a/nvenc_bindings.zig +++ b/nvenc_bindings.zig @@ -412,7 +412,7 @@ pub const OutputPtr = ?*opaque {}; pub const RegisteredPtr = ?*opaque {}; pub const CustreamPtr = ?*opaque {}; -pub const AV1OBUPayload = SeiPayload; +pub const AV1OBUPayload = SEIPayload; pub const ClockTimestampSet = extern struct { bitfields: packed struct { @@ -703,16 +703,16 @@ pub const FilmGrainParamsAV1 = extern struct { grainScaleShift: u2, _reserved: u8, }, - pointYValue: u8[14], - pointYScaling: u8[14], - pointCbValue: u8[10], - pointCbScaling: u8[10], - pointCrValue: u8[10], - pointCrScaling: u8[10], - arCoeffsYPlus128: u8[24], - arCoeffsCbPlus128: u8[25], - arCoeffsCrPlus128: u8[25], - _reserved: u8[2], + pointYValue: [14]u8, + pointYScaling: [14]u8, + pointCbValue: [10]u8, + pointCbScaling: [10]u8, + pointCrValue: [10]u8, + pointCrScaling: [10]u8, + arCoeffsYPlus128: [24]u8, + arCoeffsCbPlus128: [25]u8, + arCoeffsCrPlus128: [25]u8, + _reserved: [2]u8, cbMult: u8, cbLumaMult: u8, cbOffset: u16, @@ -875,8 +875,8 @@ pub const PicParamsAV1 = extern struct { _reserved: u32, obuPayloadArray: *?AV1OBUPayload, filmGrainParams: *?FilmGrainParamsAV1, - _reserved1: u32[247], - _reserved2: ?*anyopaque[61], + _reserved1: [247]u32, + _reserved2: [61]?*anyopaque, }; pub const PicParamsH264 = extern struct { @@ -889,7 +889,7 @@ pub const PicParamsH264 = extern struct { sliceTypeData: [*c]u8, sliceTypeArrayCnt: u32, seiPayloadArrayCnt: u32, - seiPayloadArray: [*c]SeiPayload, + seiPayloadArray: [*c]SEIPayload, sliceMode: u32, sliceModeData: u32, ltrMarkFrameIdx: u32, @@ -931,7 +931,7 @@ pub const PicParamsHEVC = extern struct { ltrUsageMode: u32, seiPayloadArrayCnt: u32, _reserved: u32, - seiPayloadArray: [*c]SeiPayload, + seiPayloadArray: [*c]SEIPayload, timeCode: TimeCode, _reserved2: [237]u32, _reserved3: [61]?*anyopaque, @@ -1000,7 +1000,7 @@ pub const RegisterResource = extern struct { _reserved2: [61]?*anyopaque, }; -pub const SeiPayload = extern struct { +pub const SEIPayload = extern struct { payloadSize: u32, payloadType: u32, payload: [*c]u8, @@ -1066,8 +1066,7 @@ pub const ApiFunctionList = extern struct { nvEncSetIOCudaStreams: ?*const fn (?*anyopaque, CustreamPtr, CustreamPtr) callconv(.C) Status, nvEncGetEncodePresetConfigEx: ?*const fn (?*anyopaque, GUID, GUID, TuningInfo, ?*PresetConfig) callconv(.C) Status, __nvEncGetSequenceParamEx: ?*const fn (?*anyopaque, ?*InitializeParams, ?*SequenceParamPayload) callconv(.C) Status, // not included in bindings - __nvEncSetIOCudaStreams: ?*anyopaque, // not included in bindings - _reserved2: [279]?*anyopaque, + _reserved2: [277]?*anyopaque, }; pub var nvEncGetEncodePresetConfig: ?*const fn (?*anyopaque, GUID, GUID, ?*PresetConfig) callconv(.C) Status = null; diff --git a/tools/.gitignore b/tools/.gitignore new file mode 100644 index 0000000..0a19790 --- /dev/null +++ b/tools/.gitignore @@ -0,0 +1,174 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc diff --git a/tools/check_bindings.py b/tools/check_bindings.py new file mode 100644 index 0000000..cfac67b --- /dev/null +++ b/tools/check_bindings.py @@ -0,0 +1,171 @@ +""" +# check_bindings.py + +This script matches every Zig bindings struct to the corresponding C struct in +the original C headers and compiles a little Zig program and C program to print +the respective size they represent (@sizeOf for Zig, sizeof for C), then checks +to make sure they are equal. + +To use it run: + +```sh +python3 -m check_bindings /path/to/Video_Codec_SDK_12.0.16/Interface /path/to/cuda.h +``` + +You must supply the SDK Interface dir and cuda.h yourself. +""" + +import sys +import tempfile +import shutil +import subprocess +from pathlib import Path + +def check_bindings(interface_dir, cuda_h): + c_structs = find_c_structs(interface_dir) + zig_structs = find_zig_structs() + mapping = {} + + for scope in ['nvdec', 'nvenc']: + for c_struct in c_structs[scope]: + z = find_corresponding_zig_struct_and_remove(zig_structs[scope], c_struct) + if z is not None: + mapping[c_struct] = (scope, z) + # manual + mapping['NV_ENC_CONFIG_HEVC_VUI_PARAMETERS'] = ('nvenc', 'ConfigHEVCVuiParameters') # typedef alias + zig_structs['nvenc'].remove('ConfigHEVCVuiParameters') + + mapping = list(sorted(mapping.items(), key=lambda t: t[0])) + + assert(len(zig_structs['nvdec']) == 0) + assert(len(zig_structs['nvenc']) == 0) + + with tempfile.TemporaryDirectory() as tempdir: + tempdir = Path(tempdir) + nvenc_zig = Path('../nvenc_bindings.zig') + nvdec_zig = Path('../nvdec_bindings.zig') + for file in [nvenc_zig, nvdec_zig, Path('../cuda_bindings.zig')]: + shutil.copyfile(file, tempdir / file.name) + zig_gen = '''/// Generated by check_bindings.py +pub const std = @import("std"); +pub const nvdec_bindings = @import("nvdec_bindings.zig"); +pub const nvenc_bindings = @import("nvenc_bindings.zig"); +pub fn main() !void { +''' + for _, (scope, z) in mapping: + zig_gen += f' std.debug.print("{{d}}\\n", .{{ @sizeOf({scope}_bindings.{z}) }});\n' + zig_gen += '}' + (tempdir / 'zsizeof.zig').write_text(zig_gen) + subprocess.run(['zig', 'build-exe', '--dep', 'cuda_bindings', '-Mzsizeof=zsizeof.zig', '-Mcuda_bindings=cuda_bindings.zig'], cwd=tempdir) + zig_out = subprocess.run(['./zsizeof'], cwd=tempdir, capture_output=True) + zig_out = zig_out.stderr.decode('utf-8') + zig_sizes = [int(l.strip()) for l in zig_out.splitlines() if l.strip() != ''] + + with tempfile.TemporaryDirectory() as tempdir: + tempdir = Path(tempdir) + nv_encode_api_h = interface_dir / "nvEncodeAPI.h" + nv_cuvid_h = interface_dir / "nvcuvid.h" + cuvid_dec_h = interface_dir / "cuviddec.h" + for file in [nv_encode_api_h, nv_cuvid_h, cuvid_dec_h, cuda_h]: + shutil.copyfile(file, tempdir / file.name) + c_gen = '''/* Generated by check_bindings.py */ +#include +#include "nvEncodeAPI.h" +#include "nvcuvid.h" +#include "cuviddec.h" +int main() { +''' + for c, _ in mapping: + c_gen += f' printf("%d\\n", sizeof({c}));\n' + c_gen += '}' + (tempdir / 'csizeof.c').write_text(c_gen) + subprocess.run(['cc', 'csizeof.c', '-I.', '-o', 'csizeof'], cwd=tempdir) + c_out = subprocess.run(['./csizeof'], cwd=tempdir, capture_output=True) + c_out = c_out.stdout.decode('utf-8') + c_sizes = [int(l.strip()) for l in c_out.splitlines() if l.strip() != ''] + + mismatch = False + for ((c, (scope, z)), c_size, zig_size) in zip(mapping, c_sizes, zig_sizes): + out = f" - C: sizeof({c}) = {c_size}".ljust(64) + out2 = f"Zig: @sizeOf({z}) = {zig_size}".ljust(64) + if c_size != zig_size: + out3 = "! MISMATCH !" + mismatch = True + else: + out3 = "" + print(out + out2 + out3) + if mismatch: + print() + print('! MISMATCH detected !') + exit(1) + else: + print() + print('OK') + +def find_corresponding_zig_struct_and_remove(zig_structs, c_struct): + if c_struct == 'CUSEIMESSAGE': + canon = 'SEIMESSAGE' + elif c_struct == 'CUVIDEOFORMAT': + canon = 'VIDEOFORMAT' + elif c_struct == 'CUVIDEOFORMATEX': + canon = 'VIDEOFORMATEX' + else: + canon = c_struct.removeprefix('NVENC_').removeprefix('NV_ENC_').removeprefix('NV_ENCODE_').removeprefix('CUVID').removeprefix('cuda') + canon = canon.replace('_', '') + for z in zig_structs: + if z.lower() == canon.lower(): + zig_structs.remove(z) + return z + return None + +def find_c_structs(interface_dir): + nv_encode_api_h = interface_dir / "nvEncodeAPI.h" + if not nv_encode_api_h.is_file(): + raise RuntimeError(f"{nv_encode_api_h} does not exist") + nv_cuvid_h = interface_dir / "nvcuvid.h" + if not nv_cuvid_h.is_file(): + raise RuntimeError(f"{nv_cuvid_h} does not exist") + cuvid_dec_h = interface_dir / "cuviddec.h" + if not cuvid_dec_h.is_file(): + raise RuntimeError(f"{cuvid_dec_h} does not exist") + structs = {'nvdec': [], 'nvenc': []} + for scope, header in [('nvenc', nv_encode_api_h), ('nvdec', nv_cuvid_h), ('nvdec', cuvid_dec_h)]: + lines = [l.removesuffix('\n') for l in header.open().readlines()] + structs[scope].extend(find_c_structs_line_by_line(lines)) + return structs + +def find_c_structs_line_by_line(lines): + structs = [] + in_struct = False + for line in lines: + if not in_struct and "typedef struct" in line: + in_struct = True + if in_struct and ((len(line) > 0 and line[0] == '}') or (len(line) > 1 and line[1] == '}')): + struct = line.strip().removeprefix('}').removesuffix(';').strip() + if ',' in struct: + struct = struct.split(',')[0].strip() + structs.append(struct) + in_struct = False + return structs + +def find_zig_structs(): + nvenc_zig = Path('../nvenc_bindings.zig') + nvdec_zig = Path('../nvdec_bindings.zig') + structs = {'nvdec': [], 'nvenc': []} + for scope, file in [('nvenc', nvenc_zig), ('nvdec', nvdec_zig)]: + lines = [l.removesuffix('\n') for l in file.open().readlines()] + structs[scope].extend(find_zig_structs_line_by_line(lines)) + return structs + +def find_zig_structs_line_by_line(lines): + structs = [] + for line in lines: + if 'pub const' in line and 'extern struct' in line: + struct = line.strip().removeprefix('pub const').removesuffix('= extern struct {').strip() + structs.append(struct) + return structs + +if __name__ == "__main__": + interface_dir = Path(sys.argv[1]) + cuda_h = Path(sys.argv[2]) + check_bindings(interface_dir, cuda_h) From 09f1b7bd6b7c3c95c2ccca300c52006bb186aa22 Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 15:26:55 +0100 Subject: [PATCH 08/14] fixed higher level bindings --- nvdec.zig | 2 +- nvenc.zig | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/nvdec.zig b/nvdec.zig index 30b1029..88582f6 100644 --- a/nvdec.zig +++ b/nvdec.zig @@ -276,7 +276,7 @@ pub const Decoder = struct { return error.ResolutionNotSupportedMbCountTooHigh; } - var decoder_create_info = std.mem.zeroes(nvdec_bindings.CreateInfo); + var decoder_create_info = std.mem.zeroes(nvdec_bindings.DecodeCreateInfo); decoder_create_info.CodecType = format.codec; if (self.output_format) |output_format| { decoder_create_info.OutputFormat = output_format; diff --git a/nvenc.zig b/nvenc.zig index d1c5d3f..116ad50 100644 --- a/nvenc.zig +++ b/nvenc.zig @@ -115,7 +115,6 @@ pub const H264Profile = enum { high, high_444, stereo, - svc_temporal_scalabilty, progressive_high, constrained_high, }; @@ -267,7 +266,6 @@ pub const Encoder = struct { .high => nvenc_bindings.h264_profile_high_guid, .high_444 => nvenc_bindings.h264_profile_high_444_guid, .stereo => nvenc_bindings.h264_profile_stereo_guid, - .svc_temporal_scalabilty => nvenc_bindings.h264_profile_svc_temporal_scalabilty, .progressive_high => nvenc_bindings.h264_profile_progressive_high_guid, .constrained_high => nvenc_bindings.h264_profile_constrained_high_guid, }; From 55a26eabcb3d16d629eb75f725e30628a1c8e3e2 Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 15:40:30 +0100 Subject: [PATCH 09/14] nvenc: add AV1 support --- nvenc.zig | 32 ++++++++++++++++++++++++++++++++ nvenc_bindings.zig | 1 + 2 files changed, 33 insertions(+) diff --git a/nvenc.zig b/nvenc.zig index 116ad50..0778971 100644 --- a/nvenc.zig +++ b/nvenc.zig @@ -132,6 +132,15 @@ pub const HEVCProfile = enum { frext, }; +pub const AV1Format = enum { + yuv420, + yuv420_10bit, +}; + +pub const AV1Profile = enum { + main, +}; + /// Codec to use. Choose from H.264 and HEVC (H.265). /// Note that for each codec you can optionally select a profile and format. /// The profile will be forcefully applied to the encoder config. It is @@ -145,6 +154,10 @@ pub const Codec = union(enum) { profile: ?HEVCProfile = null, format: ?HEVCFormat = null, }, + av1: struct { + profile: ?AV1Profile = null, + format: ?AV1Format = null, + }, }; pub const Preset = enum { @@ -227,6 +240,7 @@ pub const Encoder = struct { const codec_guid = switch (options.codec) { .h264 => nvenc_bindings.codec_h264_guid, .hevc => nvenc_bindings.codec_hevc_guid, + .av1 => nvenc_bindings.codec_av1_guid, }; const preset_guid = switch (options.preset) { @@ -296,6 +310,24 @@ pub const Encoder = struct { }; } }, + .av1 => |av1_options| { + if (av1_options.profile) |profile| { + config.profileGUID = switch (profile) { + .main => nvenc_bindings.av1_profile_main_guid, + }; + } + if (av1_options.format) |format| { + config.encodeCodecConfig.av1Config.chromaFormatIDC = 1; + config.encodeCodecConfig.av1Config.inputPixelBitDepthMinus8 = switch (format) { + .yuv420_10bit => 2, + .yuv420 => 0, + }; + config.encodeCodecConfig.av1Config.pixelBitDepthMinus8 = switch (format) { + .yuv420_10bit => 2, + .yuv420 => 0, + }; + } + }, } if (options.rate_control) |rate_control| { diff --git a/nvenc_bindings.zig b/nvenc_bindings.zig index c9d11f6..86543ce 100644 --- a/nvenc_bindings.zig +++ b/nvenc_bindings.zig @@ -35,6 +35,7 @@ pub inline fn guid(part1: u32, part2: u16, part3: u16, part4: [8]u8) GUID { pub const codec_h264_guid = guid(0x6bc82762, 0x4e63, 0x4ca4, .{ 0xaa, 0x85, 0x1e, 0x50, 0xf3, 0x21, 0xf6, 0xbf }); pub const codec_hevc_guid = guid(0x790cdc88, 0x4522, 0x4d7b, .{ 0x94, 0x25, 0xbd, 0xa9, 0x97, 0x5f, 0x76, 0x3 }); +pub const codec_av1_guid = guid(0x0a352289, 0x0aa7, 0x4759, .{ 0x86, 0x2d, 0x5d, 0x15, 0xcd, 0x16, 0xd2, 0x54 }); pub const codec_profile_autoselect_guid = guid(0xbfd6f8e7, 0x233c, 0x4341, .{ 0x8b, 0x3e, 0x48, 0x18, 0x52, 0x38, 0x3, 0xf4 }); pub const h264_profile_baseline_guid = guid(0x727bcaa, 0x78c4, 0x4c83, .{ 0x8c, 0x2f, 0xef, 0x3d, 0xff, 0x26, 0x7c, 0x6a }); pub const h264_profile_main_guid = guid(0x60b5c1d4, 0x67fe, 0x4790, .{ 0x94, 0xd5, 0xc4, 0x72, 0x6d, 0x7b, 0x6e, 0x6d }); From a5a6022da995dacf2a882debd81b377810bb2d04 Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 15:43:29 +0100 Subject: [PATCH 10/14] nvdec: remove complicated custom num_decode_surfaces logic --- nvdec.zig | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/nvdec.zig b/nvdec.zig index 88582f6..4fcc68d 100644 --- a/nvdec.zig +++ b/nvdec.zig @@ -243,14 +243,7 @@ pub const Decoder = struct { fn handle_sequence_callback(self: *Decoder, format: *nvdec_bindings.VideoFormat) !c_int { if (self.decoder != null) return error.DecoderReconfigurationNotSupported; - // roughly similar to NvDecoder: - // https://github.com/NVIDIA/video-sdk-samples/blob/aa3544dcea2fe63122e4feb83bf805ea40e58dbe/Samples/NvCodec/NvDecoder/NvDecoder.cpp#L93 - const num_decode_surfaces: c_int = switch (format.codec) { - .vp9 => 12, - .h264, .h264_mvc, .h264_svc => 20, - .hevc => 20, - else => 8, - }; + const num_decode_surfaces = format.min_num_decode_surfaces; var decode_caps = std.mem.zeroes(nvdec_bindings.DecodeCaps); decode_caps.eCodecType = format.codec; From d06c20ffc12978507279e95a7f619477b5e9a70e Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 15:45:06 +0100 Subject: [PATCH 11/14] adding av1 tests --- test.zig | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test.zig b/test.zig index 0e7eaad..d25fad1 100644 --- a/test.zig +++ b/test.zig @@ -377,6 +377,26 @@ test "hevc full hd rate const qp" { ); } +test "default av1 full hd" { + try test_encoder_decoder( + .{ + .codec = .{ .av1 = .{} }, + .resolution = .{ .width = 1920, .height = 1080 }, + }, + long_duration, + ); +} + +test "default av1 4k" { + try test_encoder_decoder( + .{ + .codec = .{ .av1 = .{} }, + .resolution = .{ .width = 3840, .height = 2160 }, + }, + short_duration, + ); +} + const TestColor = enum { red, green, From 35cdb22e28b00081c4b8c3c55d167646253b5074 Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 15:45:22 +0100 Subject: [PATCH 12/14] test: add note more av1 tests --- test.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test.zig b/test.zig index d25fad1..9999c60 100644 --- a/test.zig +++ b/test.zig @@ -397,6 +397,8 @@ test "default av1 4k" { ); } +// TODO: more tests + const TestColor = enum { red, green, From 420940a08cf08404a2e70fe73fb29d57c7d30c00 Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 15:51:48 +0100 Subject: [PATCH 13/14] nvenc: use bitfields for AV1Config --- nvenc.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nvenc.zig b/nvenc.zig index 0778971..edc090f 100644 --- a/nvenc.zig +++ b/nvenc.zig @@ -317,12 +317,12 @@ pub const Encoder = struct { }; } if (av1_options.format) |format| { - config.encodeCodecConfig.av1Config.chromaFormatIDC = 1; - config.encodeCodecConfig.av1Config.inputPixelBitDepthMinus8 = switch (format) { + config.encodeCodecConfig.av1Config.bitfields.chromaFormatIDC = 1; + config.encodeCodecConfig.av1Config.bitfields.inputPixelBitDepthMinus8 = switch (format) { .yuv420_10bit => 2, .yuv420 => 0, }; - config.encodeCodecConfig.av1Config.pixelBitDepthMinus8 = switch (format) { + config.encodeCodecConfig.av1Config.bitfields.pixelBitDepthMinus8 = switch (format) { .yuv420_10bit => 2, .yuv420 => 0, }; From b0318b7e2cff3e25899b333f227585f4a5b34c94 Mon Sep 17 00:00:00 2001 From: Gerwin van der Lugt Date: Tue, 4 Mar 2025 15:53:22 +0100 Subject: [PATCH 14/14] test: fix av1 codec mapping --- test.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/test.zig b/test.zig index 9999c60..ddd4401 100644 --- a/test.zig +++ b/test.zig @@ -638,6 +638,7 @@ fn test_encoder_decoder(encoder_options: nvenc.EncoderOptions, num_frames: usize const decoder_codec = switch (encoder_options.codec) { .h264 => nvdec.Codec.h264, .hevc => nvdec.Codec.hevc, + .av1 => nvdec.Codec.av1, }; var decoder = try nvdec.Decoder.create(&context, .{ .codec = decoder_codec, .output_format = .nv12 }, allocator);