diff --git a/README.md b/README.md index adc78f1..11d2301 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ ffmpeg. ### Video Codec SDK Compatibility Matrix -The Zig wrapper is based on the headers of SDK version 10.0. Since all headers +The Zig wrapper is based on the headers of SDK version 12.0. Since all headers are compatible, this ensures compatibility with the corresponding SDK version, as well as CUDA and driver versions and above. @@ -87,10 +87,10 @@ For your convenience, find the full compatibility matrix below: | 13.0 | 570.0 / 570.0 | 11.0 | | 12.2 | 551.76 / 550.54.14 | 11.0 | | 12.1 | 531.61 / 530.41.03 | 11.0 | -| 12.0 | 522.25 / 520.56.06 | 11.0 | +| **12.0** | **522.25 / 520.56.06** | **11.0** | | 11.1 | 471.41 / 470.57.02 | 11.0 | | 11.0 | 456.71 / 455.27 | 11.0 | -| **10.0** | **445.87 / 450.51** | **10.1** | +| 10.0 | 445.87 / 450.51 | 10.1 | | 9.1 | 436.15 / 435.21 | 10.0 | | 9.0 | 418.81 / 418.30 | 10.0 | | 8.2 | 397.93 / 396.24 | 8.0 | diff --git a/build.zig.zon b/build.zig.zon index be12b7c..cd0a53c 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,6 +1,6 @@ .{ .name = "nvidia-video-codec-sdk", - .version = "10.0.26", + .version = "12.0.16", .dependencies = .{}, .paths = .{ "LICENSE-APACHE", diff --git a/nvdec.zig b/nvdec.zig index 30b1029..4fcc68d 100644 --- a/nvdec.zig +++ b/nvdec.zig @@ -243,14 +243,7 @@ pub const Decoder = struct { fn handle_sequence_callback(self: *Decoder, format: *nvdec_bindings.VideoFormat) !c_int { if (self.decoder != null) return error.DecoderReconfigurationNotSupported; - // roughly similar to NvDecoder: - // https://github.com/NVIDIA/video-sdk-samples/blob/aa3544dcea2fe63122e4feb83bf805ea40e58dbe/Samples/NvCodec/NvDecoder/NvDecoder.cpp#L93 - const num_decode_surfaces: c_int = switch (format.codec) { - .vp9 => 12, - .h264, .h264_mvc, .h264_svc => 20, - .hevc => 20, - else => 8, - }; + const num_decode_surfaces = format.min_num_decode_surfaces; var decode_caps = std.mem.zeroes(nvdec_bindings.DecodeCaps); decode_caps.eCodecType = format.codec; @@ -276,7 +269,7 @@ pub const Decoder = struct { return error.ResolutionNotSupportedMbCountTooHigh; } - var decoder_create_info = std.mem.zeroes(nvdec_bindings.CreateInfo); + var decoder_create_info = std.mem.zeroes(nvdec_bindings.DecodeCreateInfo); decoder_create_info.CodecType = format.codec; if (self.output_format) |output_format| { decoder_create_info.OutputFormat = output_format; diff --git a/nvdec_bindings.zig b/nvdec_bindings.zig index 015cdf0..02f5354 100644 --- a/nvdec_bindings.zig +++ b/nvdec_bindings.zig @@ -127,7 +127,8 @@ pub const VideoCodec = enum(c_uint) { hevc = 8, vp8 = 9, vp9 = 10, - numcodecs = 11, + av1 = 11, + numcodecs = 12, yuv420 = 1230591318, yv12 = 1498820914, nv12 = 1314271538, @@ -169,7 +170,32 @@ pub const create_flags = struct { pub const prefer_CUVID: c_uint = 4; }; -pub const CreateInfo = extern struct { +pub const AV1SeqHdr = extern struct { + max_width: c_uint, + max_height: c_uint, + _reserved: [1016]u8, +}; + +pub const DecodeCaps = extern struct { + eCodecType: VideoCodec, + eChromaFormat: VideoChromaFormat, + nBitDepthMinus8: c_uint, + _reserved1: [3]c_uint, + bIsSupported: u8, + nNumNVDECs: u8, + nOutputFormatMask: c_ushort, + nMaxWidth: c_uint, + nMaxHeight: c_uint, + nMaxMBCount: c_uint, + nMinWidth: c_ushort, + nMinHeight: c_ushort, + bIsHistogramSupported: u8, + nCounterBitDepth: u8, + nMaxHistogramBins: c_ushort, + _reserved3: [10]c_uint, +}; + +pub const DecodeCreateInfo = extern struct { ulWidth: c_ulong, ulHeight: c_ulong, ulNumDecodeSurfaces: c_ulong, @@ -199,23 +225,8 @@ pub const CreateInfo = extern struct { right: c_short, bottom: c_short, }, - _Reserved2: [5]c_ulong, -}; - -pub const DecodeCaps = extern struct { - eCodecType: VideoCodec, - eChromaFormat: VideoChromaFormat, - nBitDepthMinus8: c_uint, - _reserved1: [3]c_uint, - bIsSupported: u8, - _reserved2: u8, - nOutputFormatMask: c_ushort, - nMaxWidth: c_uint, - nMaxHeight: c_uint, - nMaxMBCount: c_uint, - nMinWidth: c_ushort, - nMinHeight: c_ushort, - _reserved3: [11]c_uint, + enableHistogram: c_ulong, + _Reserved2: [4]c_ulong, }; pub const GetDecodeStatus = extern struct { @@ -289,12 +300,18 @@ pub const ParserParams = extern struct { ulClockRate: c_uint, ulErrorThreshold: c_uint, ulMaxDisplayDelay: c_uint, - uReserved1: [5]c_uint, + bitfields: packed struct { + bAnnexb: bool, + uReserved: u31, + }, + uReserved1: [4]c_uint, pUserData: ?*anyopaque, pfnSequenceCallback: ?*const fn (?*anyopaque, ?*VideoFormat) callconv(.C) c_int, pfnDecodePicture: ?*const fn (?*anyopaque, ?*PicParams) callconv(.C) c_int, pfnDisplayPicture: ?*const fn (?*anyopaque, ?*ParserDispInfo) callconv(.C) c_int, - pvReserved2: [7]?*anyopaque, + pfnGetOperatingPoint: ?*const fn (?*anyopaque, ?*OperatingPointInfo) callconv(.C) c_int, + pfnGetSEIMsg: ?*const fn (?*anyopaque, ?*SEIMessageInfo) callconv(.C) c_int, + pvReserved2: [5]?*anyopaque, pExtVideoInfo: ?*VideoFormatEx, }; @@ -321,10 +338,167 @@ pub const PicParams = extern struct { hevc: HEVCPicParams, vp8: VP8PicParams, vp9: VP9PicParams, + av1: AV1PicParams, _CodecReserved: [1024]c_uint, }, }; +pub const AV1PicParams = extern struct { + width: c_uint, + height: c_uint, + frame_offset: c_uint, + decodePicIdx: c_int, + sequence_header: packed struct { + profile: u3, + use_128x128_superblock: bool, + subsampling_x: bool, + subsampling_y: bool, + mono_chrome: bool, + bit_depth_minus8: u4, + enable_filter_intra: bool, + enable_intra_edge_filter: bool, + enable_interintra_compound: bool, + enable_masked_compound: bool, + enable_dual_filter: bool, + enable_order_hint: bool, + order_hint_bits_minus1: u3, + enable_jnt_comp: bool, + enable_superres: bool, + enable_cdef: bool, + enable_restoration: bool, + enable_fgs: bool, + _reserved0_7bits: u7, + }, + frame_header: packed struct { + frame_type: u2, + show_frame: bool, + disable_cdf_update: bool, + allow_screen_content_tools: bool, + force_integer_mv: bool, + coded_denom: u3, + allow_intrabc: bool, + allow_high_precision_mv: bool, + interp_filter: u3, + switchable_motion_mode: bool, + use_ref_frame_mvs: bool, + disable_frame_end_update_cdf: bool, + delta_q_present: bool, + delta_q_res: u2, + using_qmatrix: bool, + coded_lossless: bool, + use_superres: bool, + tx_mode: u2, + reference_mode: bool, + allow_warped_motion: bool, + reduced_tx_set: bool, + skip_mode: bool, + _reserved1_3bits: u3, + }, + tiling_info: packed struct { + num_tile_cols: u8, + num_tile_rows: u8, + context_update_tile_id: u16, + }, + tile_widths: [64]c_ushort, + tile_heights: [64]c_ushort, + cdef_bitfields: packed struct { + cdef_damping_minus_3: u2, + cdef_bits: u2, + _reserved2_4bits: u4, + }, + cdef_y_strength: [8]u8, + cdef_uv_strength: [8]u8, + SkipModeFrames: packed struct { + SkipModeFrame0: u4, + SkipModeFrame1: u4, + }, + base_qindex: u8, + qp_y_dc_delta_q: i8, + qp_u_dc_delta_q: i8, + qp_v_dc_delta_q: i8, + qp_u_ac_delta_q: i8, + qp_v_ac_delta_q: i8, + qm_y: u8, + qm_u: u8, + qm_v: u8, + segmentation_bitfields: packed struct { + segmentation_enabled: bool, + segmentation_update_map: bool, + segmentation_update_data: bool, + segmentation_temporal_update: bool, + _reserved3_4bits: u4, + }, + segmentation_feature_data: [8][8]c_short, + segmentation_feature_mask: [8]u8, + loop_filter_level: [2]u8, + loop_filter_level_u: u8, + loop_filter_level_v: u8, + loop_filter_sharpness: u8, + loop_filter_ref_deltas: [8]i8, + loop_filter_mode_deltas: [2]i8, + loop_filter_bitfields: packed struct { + loop_filter_delta_enabled: bool, + loop_filter_delta_update: bool, + delta_lf_present: bool, + delta_lf_res: u2, + delta_lf_multi: bool, + _reserved4_2bits: u2, + }, + lr_unit_size: [3]u8, + lr_type: [3]u8, + primary_ref_frame: u8, + ref_frame_map: [8]u8, + reference_frames_bitfields: packed struct { + temporal_layer_id: u4, + spatial_layer_id: u4, + }, + _reserved5_32bits: [4]u8, + ref_frame: [7]extern struct { + width: c_uint, + height: c_uint, + index: u8, + _reserved24Bits: [3]u8, + }, + global_motion: [7]extern struct { + bitfields: packed struct { + invalid: bool, + wmtype: u2, + _reserved5Bits: u5, + }, + _reserved24Bits: [3]u8, + wmmat: [6]c_int, + }, + film_grain_params_bitfields: packed struct { + apply_grain: bool, + overlap_flag: bool, + scaling_shift_minus8: u2, + chroma_scaling_from_luma: bool, + ar_coeff_lag: u2, + ar_coeff_shift_minus6: u2, + grain_scale_shift: u2, + clip_to_restricted_range: bool, + _reserved6_4bits: u4, + }, + num_y_points: u8, + scaling_points_y: [14][2]u8, + num_cb_points: u8, + scaling_points_cb: [10][2]u8, + num_cr_points: u8, + scaling_points_cr: [10][2]u8, + _reserved7_8bits: u8, + random_seed: c_ushort, + ar_coeffs_y: [24]c_short, + ar_coeffs_cb: [25]c_short, + ar_coeffs_cr: [25]c_short, + cb_mult: u8, + cb_luma_mult: u8, + cb_offset: c_short, + cr_mult: u8, + cr_luma_mult: u8, + cr_offset: c_short, + _reserved: [7]c_int, +}; + pub const H264PicParams = extern struct { log2_max_frame_num_minus4: c_int, pic_order_cnt_type: c_int, @@ -614,6 +788,18 @@ pub const VP9PicParams = extern struct { reserved128Bits: [4]c_uint, }; +pub const OperatingPointInfo = extern struct { + codec: VideoCodec, + data: extern union { + av1: extern struct { + operating_points_cnt: u8, + reserved24_bits: [3]u8, + operating_points_idc: [32]c_ushort, + }, + _CodecReserved: [1024]u8, + }, +}; + pub const ProcParams = extern struct { progressive_frame: c_int, second_field: c_int, @@ -629,7 +815,21 @@ pub const ProcParams = extern struct { _Reserved1: c_uint, output_stream: cuda_bindings.Stream, _Reserved: [46]c_uint, - _Reserved2: [2]?*anyopaque, + histogram_dptr: ?*c_ulonglong, + _Reserved2: [1]?*anyopaque, +}; + +pub const SEIMessage = extern struct { + sei_message_type: u8, + reserved: [3]u8, + sei_message_size: c_uint, +}; + +pub const SEIMessageInfo = extern struct { + pSEIData: ?*anyopaque, + pSEIMessage: ?*SEIMessage, + sei_message_count: c_uint, + picIdx: c_uint, }; pub const SourceDataPacket = extern struct { @@ -674,11 +874,14 @@ pub const VideoFormat = extern struct { pub const VideoFormatEx = extern struct { format: VideoFormat, - raw_seqhdr_data: [1024]u8, + data: extern union { + av1: AV1SeqHdr, + raw_seqhdr_data: [1024]u8, + }, }; pub var cuvidGetDecoderCaps: ?*const fn (pdc: ?*DecodeCaps) Result = null; -pub var cuvidCreateDecoder: ?*const fn (phDecoder: ?*VideoDecoder, pdci: ?*CreateInfo) Result = null; +pub var cuvidCreateDecoder: ?*const fn (phDecoder: ?*VideoDecoder, pdci: ?*DecodeCreateInfo) Result = null; pub var cuvidDestroyDecoder: ?*const fn (hDecoder: VideoDecoder) Result = null; pub var cuvidDecodePicture: ?*const fn (hDecoder: VideoDecoder, pPicParams: ?*PicParams) Result = null; pub var cuvidGetDecodeStatus: ?*const fn (hDecoder: VideoDecoder, nPicIdx: c_int, pDecodeStatus: ?*GetDecodeStatus) Result = null; @@ -703,7 +906,7 @@ pub fn load() !void { else => @panic("unsupported operating system"), }; cuvidGetDecoderCaps = nvcuvid.lookup(*const fn (pdc: ?*DecodeCaps) Result, "cuvidGetDecoderCaps") orelse @panic("cuvid library invalid"); - cuvidCreateDecoder = nvcuvid.lookup(*const fn (phDecoder: ?*VideoDecoder, pdci: ?*CreateInfo) Result, "cuvidCreateDecoder") orelse @panic("cuvid library invalid"); + cuvidCreateDecoder = nvcuvid.lookup(*const fn (phDecoder: ?*VideoDecoder, pdci: ?*DecodeCreateInfo) Result, "cuvidCreateDecoder") orelse @panic("cuvid library invalid"); cuvidDestroyDecoder = nvcuvid.lookup(*const fn (hDecoder: VideoDecoder) Result, "cuvidDestroyDecoder") orelse @panic("cuvid library invalid"); cuvidDecodePicture = nvcuvid.lookup(*const fn (hDecoder: VideoDecoder, pPicParams: ?*PicParams) Result, "cuvidDecodePicture") orelse @panic("cuvid library invalid"); cuvidGetDecodeStatus = nvcuvid.lookup(*const fn (hDecoder: VideoDecoder, nPicIdx: c_int, pDecodeStatus: ?*GetDecodeStatus) Result, "cuvidGetDecodeStatus") orelse @panic("cuvid library invalid"); diff --git a/nvenc.zig b/nvenc.zig index d1c5d3f..edc090f 100644 --- a/nvenc.zig +++ b/nvenc.zig @@ -115,7 +115,6 @@ pub const H264Profile = enum { high, high_444, stereo, - svc_temporal_scalabilty, progressive_high, constrained_high, }; @@ -133,6 +132,15 @@ pub const HEVCProfile = enum { frext, }; +pub const AV1Format = enum { + yuv420, + yuv420_10bit, +}; + +pub const AV1Profile = enum { + main, +}; + /// Codec to use. Choose from H.264 and HEVC (H.265). /// Note that for each codec you can optionally select a profile and format. /// The profile will be forcefully applied to the encoder config. It is @@ -146,6 +154,10 @@ pub const Codec = union(enum) { profile: ?HEVCProfile = null, format: ?HEVCFormat = null, }, + av1: struct { + profile: ?AV1Profile = null, + format: ?AV1Format = null, + }, }; pub const Preset = enum { @@ -228,6 +240,7 @@ pub const Encoder = struct { const codec_guid = switch (options.codec) { .h264 => nvenc_bindings.codec_h264_guid, .hevc => nvenc_bindings.codec_hevc_guid, + .av1 => nvenc_bindings.codec_av1_guid, }; const preset_guid = switch (options.preset) { @@ -267,7 +280,6 @@ pub const Encoder = struct { .high => nvenc_bindings.h264_profile_high_guid, .high_444 => nvenc_bindings.h264_profile_high_444_guid, .stereo => nvenc_bindings.h264_profile_stereo_guid, - .svc_temporal_scalabilty => nvenc_bindings.h264_profile_svc_temporal_scalabilty, .progressive_high => nvenc_bindings.h264_profile_progressive_high_guid, .constrained_high => nvenc_bindings.h264_profile_constrained_high_guid, }; @@ -298,6 +310,24 @@ pub const Encoder = struct { }; } }, + .av1 => |av1_options| { + if (av1_options.profile) |profile| { + config.profileGUID = switch (profile) { + .main => nvenc_bindings.av1_profile_main_guid, + }; + } + if (av1_options.format) |format| { + config.encodeCodecConfig.av1Config.bitfields.chromaFormatIDC = 1; + config.encodeCodecConfig.av1Config.bitfields.inputPixelBitDepthMinus8 = switch (format) { + .yuv420_10bit => 2, + .yuv420 => 0, + }; + config.encodeCodecConfig.av1Config.bitfields.pixelBitDepthMinus8 = switch (format) { + .yuv420_10bit => 2, + .yuv420 => 0, + }; + } + }, } if (options.rate_control) |rate_control| { diff --git a/nvenc_bindings.zig b/nvenc_bindings.zig index c64dbf5..86543ce 100644 --- a/nvenc_bindings.zig +++ b/nvenc_bindings.zig @@ -13,16 +13,16 @@ pub const create_bitstream_buffer_ver = struct_version(1); pub const create_mv_buffer_ver = struct_version(1); pub const rc_params_ver = struct_version(1); pub const pic_params_mvc_ver = struct_version(1); -pub const config_ver = struct_version(7) | (1 << 31); +pub const config_ver = struct_version(8) | (1 << 31); pub const initialize_params_ver = struct_version(5) | (1 << 31); pub const reconfigure_params_ver = struct_version(1) | (1 << 31); pub const preset_config_ver = struct_version(4) | (1 << 31); -pub const pic_params_ver = struct_version(4) | (1 << 31); +pub const pic_params_ver = struct_version(6) | (1 << 31); pub const meonly_params_ver = struct_version(3); -pub const lock_bitstream_ver = struct_version(1); +pub const lock_bitstream_ver = struct_version(2); pub const lock_input_buffer_ver = struct_version(1); pub const map_input_resource_ver = struct_version(4); -pub const register_resource_ver = struct_version(3); +pub const register_resource_ver = struct_version(4); pub const stat_ver = struct_version(1); pub const sequence_param_payload_ver = struct_version(1); pub const event_params_ver = struct_version(1); @@ -35,30 +35,19 @@ pub inline fn guid(part1: u32, part2: u16, part3: u16, part4: [8]u8) GUID { pub const codec_h264_guid = guid(0x6bc82762, 0x4e63, 0x4ca4, .{ 0xaa, 0x85, 0x1e, 0x50, 0xf3, 0x21, 0xf6, 0xbf }); pub const codec_hevc_guid = guid(0x790cdc88, 0x4522, 0x4d7b, .{ 0x94, 0x25, 0xbd, 0xa9, 0x97, 0x5f, 0x76, 0x3 }); +pub const codec_av1_guid = guid(0x0a352289, 0x0aa7, 0x4759, .{ 0x86, 0x2d, 0x5d, 0x15, 0xcd, 0x16, 0xd2, 0x54 }); pub const codec_profile_autoselect_guid = guid(0xbfd6f8e7, 0x233c, 0x4341, .{ 0x8b, 0x3e, 0x48, 0x18, 0x52, 0x38, 0x3, 0xf4 }); pub const h264_profile_baseline_guid = guid(0x727bcaa, 0x78c4, 0x4c83, .{ 0x8c, 0x2f, 0xef, 0x3d, 0xff, 0x26, 0x7c, 0x6a }); pub const h264_profile_main_guid = guid(0x60b5c1d4, 0x67fe, 0x4790, .{ 0x94, 0xd5, 0xc4, 0x72, 0x6d, 0x7b, 0x6e, 0x6d }); pub const h264_profile_high_guid = guid(0xe7cbc309, 0x4f7a, 0x4b89, .{ 0xaf, 0x2a, 0xd5, 0x37, 0xc9, 0x2b, 0xe3, 0x10 }); pub const h264_profile_high_444_guid = guid(0x7ac663cb, 0xa598, 0x4960, .{ 0xb8, 0x44, 0x33, 0x9b, 0x26, 0x1a, 0x7d, 0x52 }); pub const h264_profile_stereo_guid = guid(0x40847bf5, 0x33f7, 0x4601, .{ 0x90, 0x84, 0xe8, 0xfe, 0x3c, 0x1d, 0xb8, 0xb7 }); -pub const h264_profile_svc_temporal_scalabilty = guid(0xce788d20, 0xaaa9, 0x4318, .{ 0x92, 0xbb, 0xac, 0x7e, 0x85, 0x8c, 0x8d, 0x36 }); pub const h264_profile_progressive_high_guid = guid(0xb405afac, 0xf32b, 0x417b, .{ 0x89, 0xc4, 0x9a, 0xbe, 0xed, 0x3e, 0x59, 0x78 }); pub const h264_profile_constrained_high_guid = guid(0xaec1bd87, 0xe85b, 0x48f2, .{ 0x84, 0xc3, 0x98, 0xbc, 0xa6, 0x28, 0x50, 0x72 }); pub const hevc_profile_main_guid = guid(0xb514c39a, 0xb55b, 0x40fa, .{ 0x87, 0x8f, 0xf1, 0x25, 0x3b, 0x4d, 0xfd, 0xec }); pub const hevc_profile_main10_guid = guid(0xfa4d2b6c, 0x3a5b, 0x411a, .{ 0x80, 0x18, 0x0a, 0x3f, 0x5e, 0x3c, 0x9b, 0xe5 }); pub const hevc_profile_frext_guid = guid(0x51ec32b5, 0x1b4c, 0x453c, .{ 0x9c, 0xbd, 0xb6, 0x16, 0xbd, 0x62, 0x13, 0x41 }); - -// The old presets were deprecated starting SDK version 10.0. We remove them -// entirely since using them is not supported on modern driver versions. -// pub const preset_default_guid = guid(0xb2dfb705, 0x4ebd, 0x4c49, .{ 0x9b, 0x5f, 0x24, 0xa7, 0x77, 0xd3, 0xe5, 0x87 }); -// pub const preset_hp_guid = guid(0x60e4c59f, 0xe846, 0x4484, .{ 0xa5, 0x6d, 0xcd, 0x45, 0xbe, 0x9f, 0xdd, 0xf6 }); -// pub const preset_hq_guid = guid(0x34dba71d, 0xa77b, 0x4b8f, .{ 0x9c, 0x3e, 0xb6, 0xd5, 0xda, 0x24, 0xc0, 0x12 }); -// pub const preset_bd_guid = guid(0x82e3e450, 0xbdbb, 0x4e40, .{ 0x98, 0x9c, 0x82, 0xa9, 0xd, 0xf9, 0xef, 0x32 }); -// pub const preset_low_latency_default_guid = guid(0x49df21c5, 0x6dfa, 0x4feb, .{ 0x97, 0x87, 0x6a, 0xcc, 0x9e, 0xff, 0xb7, 0x26 }); -// pub const preset_low_latency_hq_guid = guid(0xc5f733b9, 0xea97, 0x4cf9, .{ 0xbe, 0xc2, 0xbf, 0x78, 0xa7, 0x4f, 0xd1, 0x5 }); -// pub const preset_low_latency_hp_guid = guid(0x67082a44, 0x4bad, 0x48fa, .{ 0x98, 0xea, 0x93, 0x5, 0x6d, 0x15, 0xa, 0x58 }); -// pub const preset_lossless_default_guid = guid(0xd5bfb716, 0xc604, 0x44e7, .{ 0x9b, 0xb8, 0xde, 0xa5, 0x51, 0xf, 0xc3, 0xac }); -// pub const preset_lossless_hp_guid = guid(0x149998e7, 0x2364, 0x411d, .{ 0x82, 0xef, 0x17, 0x98, 0x88, 0x9, 0x34, 0x9 }); +pub const av1_profile_main_guid = guid(0x5f2a39f5, 0xf14e, 0x4f95, .{ 0x9a, 0x9e, 0xb7, 0x6d, 0x56, 0x8f, 0xcf, 0x97 }); pub const preset_p1 = guid(0xfc0a8d3e, 0x45f8, 0x4cf8, .{ 0x80, 0xc7, 0x29, 0x88, 0x71, 0x59, 0x0e, 0xbf }); pub const preset_p2 = guid(0xf581cfb8, 0x88d6, 0x4381, .{ 0x93, 0xf0, 0xdf, 0x13, 0xf9, 0xc2, 0x7d, 0xab }); @@ -70,6 +59,17 @@ pub const preset_p7 = guid(0x84848c12, 0x6f71, 0x4c13, .{ 0x93, 0x1b, 0x53, 0xe2 pub const infinite_goplength: u32 = 0xffffffff; +pub const max_num_clock_ts = 3; + +pub const AV1PartSize = enum(c_uint) { + autoselect = 0, + @"4x4" = 1, + @"8x8" = 2, + @"16x16" = 3, + @"32x32" = 4, + @"64x64" = 5, +}; + pub const BFrameRefMode = enum(c_uint) { disabled = 0, each = 1, @@ -104,6 +104,14 @@ pub const DeviceType = enum(c_uint) { opengl = 2, }; +pub const DisplayPicStruct = enum(c_uint) { + frame = 0, + field_top_bottom = 1, + field_bottom_top = 2, + frame_doubling = 3, + frame_tripling = 4, +}; + pub const H264AdaptiveTransformMode = enum(c_uint) { autoselect = 0, disable = 1, @@ -181,8 +189,39 @@ pub const Level = enum(c_uint) { hevc_6 = 180, hevc_61 = 183, hevc_62 = 186, - hevc_main = 0, - hevc_high = 1, + + tier_hevc_main = 0, + tier_hevc_high = 1, + + av1_2 = 0, + av1_21 = 1, + av1_22 = 2, + av1_23 = 3, + av1_3 = 4, + av1_31 = 5, + av1_32 = 6, + av1_33 = 7, + av1_4 = 8, + av1_41 = 9, + av1_42 = 10, + av1_43 = 11, + av1_5 = 12, + av1_51 = 13, + av1_52 = 14, + av1_53 = 15, + av1_6 = 16, + av1_61 = 17, + av1_62 = 18, + av1_63 = 19, + av1_7 = 20, + av1_71 = 21, + av1_72 = 22, + av1_73 = 23, + + av1_autoselect = 0, + + tier_av1_0 = 0, + tier_av1_1 = 1, }; pub const MemoryHeap = enum(c_uint) { @@ -306,14 +345,94 @@ pub const TuningInfo = enum(c_uint) { lossless = 4, }; +pub const VuiVideoFormat = enum(u32) { + component = 0, + pal = 1, + ntsc = 2, + secam = 3, + mac = 4, + unspecified = 5, +}; + +pub const VuiColorPrimaries = enum(u32) { + undefined = 0, + bt709 = 1, + unspecified = 2, + reserved = 3, + bt470m = 4, + bt470bg = 5, + smpte170m = 6, + smpte240m = 7, + film = 8, + bt2020 = 9, + smpte428 = 10, + smpte431 = 11, + smpte432 = 12, + jedec_p22 = 22, +}; + +pub const VuiTransferCharacteristic = enum(u32) { + undefined = 0, + bt709 = 1, + unspecified = 2, + reserved = 3, + bt470m = 4, + bt470bg = 5, + smpte170m = 6, + smpte240m = 7, + linear = 8, + log = 9, + log_sqrt = 10, + iec61966_2_4 = 11, + bt1361_ecg = 12, + srgb = 13, + bt2020_10 = 14, + bt2020_12 = 15, + smpte2084 = 16, + smpte428 = 17, + arib_std_b67 = 18, +}; + +pub const VuiMatrixCoeffs = enum(u32) { + rgb = 0, + bt709 = 1, + unspecified = 2, + reserved = 3, + fcc = 4, + bt470bg = 5, + smpte170m = 6, + smpte240m = 7, + ycgco = 8, + bt2020_ncl = 9, + bt2020_cl = 10, + smpte2085 = 11, +}; + pub const InputPtr = ?*opaque {}; pub const OutputPtr = ?*opaque {}; pub const RegisteredPtr = ?*opaque {}; pub const CustreamPtr = ?*opaque {}; +pub const AV1OBUPayload = SEIPayload; + +pub const ClockTimestampSet = extern struct { + bitfields: packed struct { + countingType: bool, + discontinuityFlag: bool, + cntDroppedFrames: bool, + nFrames: u8, + secondsValue: u6, + minutesValue: u6, + hoursValue: u5, + reserved2: u4, + }, + timeOffset: u32, +}; + pub const CodecConfig = extern union { h264Config: ConfigH264, hevcConfig: ConfigHEVC, + av1Config: ConfigAV1, h264MeOnlyConfig: ConfigH264MeOnly, hevcMeOnlyConfig: ConfigHEVCMeOnly, _reserved: [320]u32, @@ -322,6 +441,7 @@ pub const CodecConfig = extern union { pub const CodecPicParams = extern union { h264PicParams: PicParamsH264, hevcPicParams: PicParamsHEVC, + av1PicParams: PicParamsAV1, _reserved: [256]u32, }; @@ -339,9 +459,52 @@ pub const Config = extern struct { _reserved2: [64]?*anyopaque, }; +pub const ConfigAV1 = extern struct { + level: u32, + tier: u32, + minPartSize: AV1PartSize, + maxPartSize: AV1PartSize, + bitfields: packed struct { + outputAnnexBFormat: bool, + enableTimingInfo: bool, + enableDecoderModelInfo: bool, + enableFrameIdNumbers: bool, + disableSeqHdr: bool, + repeatSeqHdr: bool, + enableIntraRefresh: bool, + chromaFormatIDC: u2, + enableBitstreamPadding: bool, + enableCustomTileConfig: bool, + enableFilmGrainParams: bool, + inputPixelBitDepthMinus8: u3, + pixelBitDepthMinus8: u3, + _reserved: u14, + }, + idrPeriod: u32, + intraRefreshPeriod: u32, + intraRefreshCnt: u32, + maxNumRefFramesInDPB: u32, + numTileColumns: u32, + numTileRows: u32, + tileWidths: [*c]u32, + tileHeights: [*c]u32, + maxTemporalLayersMinus1: u32, + colorPrimaries: VuiColorPrimaries, + transferCharacteristics: VuiTransferCharacteristic, + matrixCoefficients: VuiMatrixCoeffs, + colorRange: u32, + chromaSamplePosition: u32, + useBFramesAsRef: BFrameRefMode, + filmGrainParams: ?*FilmGrainParamsAV1, + numFwdRefs: NumRefFrames, + numBwdRefs: NumRefFrames, + _reserved1: [235]u32, + _reserved2: [62]?*anyopaque, +}; + pub const ConfigH264 = extern struct { bitfields: packed struct { - _reserved1: bool, + enableTemporalSVC: bool, enableStereoMVC: bool, hierarchicalPFrames: bool, hierarchicalBFrames: bool, @@ -359,7 +522,11 @@ pub const ConfigH264 = extern struct { qpPrimeYZeroTransformBypassFlag: bool, useConstrainedIntraPred: bool, enableFillerDataInsertion: bool, - _reserved2: u14, + disableSVCPrefixNalu: bool, + enableScalabilityInfoSEI: bool, + singleSliceIntraRefresh: bool, + enableTimeCode: bool, + _reserved: u10, }, level: u32, idrPeriod: u32, @@ -400,17 +567,20 @@ pub const ConfigH264VuiParameters = extern struct { overscanInfoPresentFlag: u32, overscanInfo: u32, videoSignalTypePresentFlag: u32, - videoFormat: u32, + videoFormat: VuiVideoFormat, videoFullRangeFlag: u32, colourDescriptionPresentFlag: u32, - colourPrimaries: u32, - transferCharacteristics: u32, - colourMatrix: u32, + colourPrimaries: VuiColorPrimaries, + transferCharacteristics: VuiTransferCharacteristic, + colourMatrix: VuiMatrixCoeffs, chromaSampleLocationFlag: u32, chromaSampleLocationTop: u32, chromaSampleLocationBot: u32, bitstreamRestrictionFlag: u32, - _reserved: [15]u32, + timingInfoPresentFlag: u32, + numUnitInTicks: u32, + timeScale: u32, + _reserved: [12]u32, }; pub const ConfigHEVCVuiParameters = extern struct { @@ -449,7 +619,11 @@ pub const ConfigHEVC = extern struct { pixelBitDepthMinus8: u3, enableFillerDataInsetion: bool, enableConstrainedEncoding: bool, - _reserved: u16, + enableAlphaLayerEncoding: bool, + singleSliceIntraRefresh: bool, + outputRecoveryPointSEI: bool, + outputTimeCodeSEI: bool, + reserved: u12, }, idrPeriod: u32, intraRefreshPeriod: u32, @@ -504,11 +678,50 @@ pub const ExternalMeHint = extern struct { _reserved: i32, }; +pub const ExternalMeSbHint = extern struct { + _reserved: i16, + _reserved1: i16, + _reserved2: i16, +}; + pub const ExternalMeHintCountsPerBlocktype = extern struct { _reserved: u32, _reserved1: [3]u32, }; +pub const FilmGrainParamsAV1 = extern struct { + bitfields: packed struct { + applyGrain: bool, + chromaScalingFromLuma: bool, + overlapFlag: bool, + clipToRestrictedRange: bool, + grainScalingMinus8: u2, + arCoeffLag: u2, + numYPoints: u4, + numCbPoints: u4, + numCrPoints: u4, + arCoeffShiftMinus6: u2, + grainScaleShift: u2, + _reserved: u8, + }, + pointYValue: [14]u8, + pointYScaling: [14]u8, + pointCbValue: [10]u8, + pointCbScaling: [10]u8, + pointCrValue: [10]u8, + pointCrScaling: [10]u8, + arCoeffsYPlus128: [24]u8, + arCoeffsCbPlus128: [25]u8, + arCoeffsCrPlus128: [25]u8, + _reserved: [2]u8, + cbMult: u8, + cbLumaMult: u8, + cbOffset: u16, + crMult: u8, + crLumaMult: u8, + crOffset: u16, +}; + pub const GUID = extern struct { Data1: u32, Data2: u16, @@ -536,7 +749,8 @@ pub const InitializeParams = extern struct { maxEncodeHeight: u32, maxMEHintCountsPerBlock: [2]ExternalMeHintCountsPerBlocktype, tuningInfo: TuningInfo, - _reserved: [288]u32, + bufferFormat: BufferFormat, + _reserved: [287]u32, _reserved2: [64]?*anyopaque, }; @@ -563,12 +777,14 @@ pub const LockBitstream = extern struct { frameSatd: u32, ltrFrameIdx: u32, ltrFrameBitmap: u32, - _reserved: [13]u32, + temporalId: u32, + _reserved: [12]u32, intraMBCount: u32, interMBCount: u32, averageMVX: i32, averageMVY: i32, - _reserved1: [219]u32, + alphaLayerSizeInBytes: u32, + _reserved1: [218]u32, _reserved2: [64]?*anyopaque, }; @@ -627,8 +843,41 @@ pub const PicParams = extern struct { qpDeltaMapSize: u32, _reservedBitFields: u32, meHintRefPicDist: [2]u16, - _reserved3: [286]u32, - _reserved4: [60]?*anyopaque, + alphaBuffer: InputPtr, + meExternalSbHints: [*c]ExternalMeSbHint, + meSbHintsCount: u32, + _reserved3: [285]u32, + _reserved4: [58]?*anyopaque, +}; + +pub const PicParamsAV1 = extern struct { + displayPOCSyntax: u32, + refPicFlag: u32, + temporalId: u32, + forceIntraRefreshWithFrameCnt: u32, + bitfields: packed struct { + goldenFrameFlag: bool, + arfFrameFlag: bool, + arf2FrameFlag: bool, + bwdFrameFlag: bool, + overlayFrameFlag: bool, + showExistingFrameFlag: bool, + errorResilientModeFlag: bool, + tileConfigUpdate: bool, + enableCustomTileConfig: bool, + filmGrainParamsUpdate: bool, + reservedBitFields: u22, + }, + numTileColumns: u32, + numTileRows: u32, + tileWidths: [*c]u32, + tileHeights: [*c]u32, + obuPayloadArrayCnt: u32, + _reserved: u32, + obuPayloadArray: *?AV1OBUPayload, + filmGrainParams: *?FilmGrainParamsAV1, + _reserved1: [247]u32, + _reserved2: [61]?*anyopaque, }; pub const PicParamsH264 = extern struct { @@ -641,7 +890,7 @@ pub const PicParamsH264 = extern struct { sliceTypeData: [*c]u8, sliceTypeArrayCnt: u32, seiPayloadArrayCnt: u32, - seiPayloadArray: [*c]SeiPayload, + seiPayloadArray: [*c]SEIPayload, sliceMode: u32, sliceModeData: u32, ltrMarkFrameIdx: u32, @@ -683,8 +932,9 @@ pub const PicParamsHEVC = extern struct { ltrUsageMode: u32, seiPayloadArrayCnt: u32, _reserved: u32, - seiPayloadArray: [*c]SeiPayload, - _reserved2: [244]u32, + seiPayloadArray: [*c]SEIPayload, + timeCode: TimeCode, + _reserved2: [237]u32, _reserved3: [61]?*anyopaque, }; @@ -723,10 +973,16 @@ pub const RcParams = extern struct { targetQualityLSB: u8, lookaheadDepth: u16, lowDelayKeyFrameScale: u8, - _reserved1: [3]u8, + yDcQPIndexOffset: u8, + uDcQPIndexOffset: u8, + vDcQPIndexOffset: u8, qpMapMode: QPMapMode, multiPass: MultiPass, - _reserved: [6]u32, + alphaLayerBitrateRatio: u32, + cbQPIndexOffset: u8, + crQPIndexOffset: u8, + _reserved: u16, + _reserved2: [4]u32, }; pub const RegisterResource = extern struct { @@ -740,11 +996,12 @@ pub const RegisterResource = extern struct { registeredResource: RegisteredPtr, bufferFormat: BufferFormat, bufferUsage: BufferUsage, + pInputFencePointer: ?*anyopaque, _reserved1: [247]u32, - _reserved2: [62]?*anyopaque, + _reserved2: [61]?*anyopaque, }; -pub const SeiPayload = extern struct { +pub const SEIPayload = extern struct { payloadSize: u32, payloadType: u32, payload: [*c]u8, @@ -761,6 +1018,11 @@ pub const SequenceParamPayload = extern struct { _reserved2: [64]?*anyopaque, }; +pub const TimeCode = extern struct { + displayPicStruct: DisplayPicStruct, + clockTimestamp: [max_num_clock_ts]ClockTimestampSet, +}; + pub const ApiFunctionList = extern struct { version: u32, reserved: u32, @@ -805,8 +1067,7 @@ pub const ApiFunctionList = extern struct { nvEncSetIOCudaStreams: ?*const fn (?*anyopaque, CustreamPtr, CustreamPtr) callconv(.C) Status, nvEncGetEncodePresetConfigEx: ?*const fn (?*anyopaque, GUID, GUID, TuningInfo, ?*PresetConfig) callconv(.C) Status, __nvEncGetSequenceParamEx: ?*const fn (?*anyopaque, ?*InitializeParams, ?*SequenceParamPayload) callconv(.C) Status, // not included in bindings - __nvEncSetIOCudaStreams: ?*anyopaque, // not included in bindings - _reserved2: [279]?*anyopaque, + _reserved2: [277]?*anyopaque, }; pub var nvEncGetEncodePresetConfig: ?*const fn (?*anyopaque, GUID, GUID, ?*PresetConfig) callconv(.C) Status = null; diff --git a/test.zig b/test.zig index 0e7eaad..ddd4401 100644 --- a/test.zig +++ b/test.zig @@ -377,6 +377,28 @@ test "hevc full hd rate const qp" { ); } +test "default av1 full hd" { + try test_encoder_decoder( + .{ + .codec = .{ .av1 = .{} }, + .resolution = .{ .width = 1920, .height = 1080 }, + }, + long_duration, + ); +} + +test "default av1 4k" { + try test_encoder_decoder( + .{ + .codec = .{ .av1 = .{} }, + .resolution = .{ .width = 3840, .height = 2160 }, + }, + short_duration, + ); +} + +// TODO: more tests + const TestColor = enum { red, green, @@ -616,6 +638,7 @@ fn test_encoder_decoder(encoder_options: nvenc.EncoderOptions, num_frames: usize const decoder_codec = switch (encoder_options.codec) { .h264 => nvdec.Codec.h264, .hevc => nvdec.Codec.hevc, + .av1 => nvdec.Codec.av1, }; var decoder = try nvdec.Decoder.create(&context, .{ .codec = decoder_codec, .output_format = .nv12 }, allocator); diff --git a/tools/.gitignore b/tools/.gitignore new file mode 100644 index 0000000..0a19790 --- /dev/null +++ b/tools/.gitignore @@ -0,0 +1,174 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc diff --git a/tools/check_bindings.py b/tools/check_bindings.py new file mode 100644 index 0000000..cfac67b --- /dev/null +++ b/tools/check_bindings.py @@ -0,0 +1,171 @@ +""" +# check_bindings.py + +This script matches every Zig bindings struct to the corresponding C struct in +the original C headers and compiles a little Zig program and C program to print +the respective size they represent (@sizeOf for Zig, sizeof for C), then checks +to make sure they are equal. + +To use it run: + +```sh +python3 -m check_bindings /path/to/Video_Codec_SDK_12.0.16/Interface /path/to/cuda.h +``` + +You must supply the SDK Interface dir and cuda.h yourself. +""" + +import sys +import tempfile +import shutil +import subprocess +from pathlib import Path + +def check_bindings(interface_dir, cuda_h): + c_structs = find_c_structs(interface_dir) + zig_structs = find_zig_structs() + mapping = {} + + for scope in ['nvdec', 'nvenc']: + for c_struct in c_structs[scope]: + z = find_corresponding_zig_struct_and_remove(zig_structs[scope], c_struct) + if z is not None: + mapping[c_struct] = (scope, z) + # manual + mapping['NV_ENC_CONFIG_HEVC_VUI_PARAMETERS'] = ('nvenc', 'ConfigHEVCVuiParameters') # typedef alias + zig_structs['nvenc'].remove('ConfigHEVCVuiParameters') + + mapping = list(sorted(mapping.items(), key=lambda t: t[0])) + + assert(len(zig_structs['nvdec']) == 0) + assert(len(zig_structs['nvenc']) == 0) + + with tempfile.TemporaryDirectory() as tempdir: + tempdir = Path(tempdir) + nvenc_zig = Path('../nvenc_bindings.zig') + nvdec_zig = Path('../nvdec_bindings.zig') + for file in [nvenc_zig, nvdec_zig, Path('../cuda_bindings.zig')]: + shutil.copyfile(file, tempdir / file.name) + zig_gen = '''/// Generated by check_bindings.py +pub const std = @import("std"); +pub const nvdec_bindings = @import("nvdec_bindings.zig"); +pub const nvenc_bindings = @import("nvenc_bindings.zig"); +pub fn main() !void { +''' + for _, (scope, z) in mapping: + zig_gen += f' std.debug.print("{{d}}\\n", .{{ @sizeOf({scope}_bindings.{z}) }});\n' + zig_gen += '}' + (tempdir / 'zsizeof.zig').write_text(zig_gen) + subprocess.run(['zig', 'build-exe', '--dep', 'cuda_bindings', '-Mzsizeof=zsizeof.zig', '-Mcuda_bindings=cuda_bindings.zig'], cwd=tempdir) + zig_out = subprocess.run(['./zsizeof'], cwd=tempdir, capture_output=True) + zig_out = zig_out.stderr.decode('utf-8') + zig_sizes = [int(l.strip()) for l in zig_out.splitlines() if l.strip() != ''] + + with tempfile.TemporaryDirectory() as tempdir: + tempdir = Path(tempdir) + nv_encode_api_h = interface_dir / "nvEncodeAPI.h" + nv_cuvid_h = interface_dir / "nvcuvid.h" + cuvid_dec_h = interface_dir / "cuviddec.h" + for file in [nv_encode_api_h, nv_cuvid_h, cuvid_dec_h, cuda_h]: + shutil.copyfile(file, tempdir / file.name) + c_gen = '''/* Generated by check_bindings.py */ +#include +#include "nvEncodeAPI.h" +#include "nvcuvid.h" +#include "cuviddec.h" +int main() { +''' + for c, _ in mapping: + c_gen += f' printf("%d\\n", sizeof({c}));\n' + c_gen += '}' + (tempdir / 'csizeof.c').write_text(c_gen) + subprocess.run(['cc', 'csizeof.c', '-I.', '-o', 'csizeof'], cwd=tempdir) + c_out = subprocess.run(['./csizeof'], cwd=tempdir, capture_output=True) + c_out = c_out.stdout.decode('utf-8') + c_sizes = [int(l.strip()) for l in c_out.splitlines() if l.strip() != ''] + + mismatch = False + for ((c, (scope, z)), c_size, zig_size) in zip(mapping, c_sizes, zig_sizes): + out = f" - C: sizeof({c}) = {c_size}".ljust(64) + out2 = f"Zig: @sizeOf({z}) = {zig_size}".ljust(64) + if c_size != zig_size: + out3 = "! MISMATCH !" + mismatch = True + else: + out3 = "" + print(out + out2 + out3) + if mismatch: + print() + print('! MISMATCH detected !') + exit(1) + else: + print() + print('OK') + +def find_corresponding_zig_struct_and_remove(zig_structs, c_struct): + if c_struct == 'CUSEIMESSAGE': + canon = 'SEIMESSAGE' + elif c_struct == 'CUVIDEOFORMAT': + canon = 'VIDEOFORMAT' + elif c_struct == 'CUVIDEOFORMATEX': + canon = 'VIDEOFORMATEX' + else: + canon = c_struct.removeprefix('NVENC_').removeprefix('NV_ENC_').removeprefix('NV_ENCODE_').removeprefix('CUVID').removeprefix('cuda') + canon = canon.replace('_', '') + for z in zig_structs: + if z.lower() == canon.lower(): + zig_structs.remove(z) + return z + return None + +def find_c_structs(interface_dir): + nv_encode_api_h = interface_dir / "nvEncodeAPI.h" + if not nv_encode_api_h.is_file(): + raise RuntimeError(f"{nv_encode_api_h} does not exist") + nv_cuvid_h = interface_dir / "nvcuvid.h" + if not nv_cuvid_h.is_file(): + raise RuntimeError(f"{nv_cuvid_h} does not exist") + cuvid_dec_h = interface_dir / "cuviddec.h" + if not cuvid_dec_h.is_file(): + raise RuntimeError(f"{cuvid_dec_h} does not exist") + structs = {'nvdec': [], 'nvenc': []} + for scope, header in [('nvenc', nv_encode_api_h), ('nvdec', nv_cuvid_h), ('nvdec', cuvid_dec_h)]: + lines = [l.removesuffix('\n') for l in header.open().readlines()] + structs[scope].extend(find_c_structs_line_by_line(lines)) + return structs + +def find_c_structs_line_by_line(lines): + structs = [] + in_struct = False + for line in lines: + if not in_struct and "typedef struct" in line: + in_struct = True + if in_struct and ((len(line) > 0 and line[0] == '}') or (len(line) > 1 and line[1] == '}')): + struct = line.strip().removeprefix('}').removesuffix(';').strip() + if ',' in struct: + struct = struct.split(',')[0].strip() + structs.append(struct) + in_struct = False + return structs + +def find_zig_structs(): + nvenc_zig = Path('../nvenc_bindings.zig') + nvdec_zig = Path('../nvdec_bindings.zig') + structs = {'nvdec': [], 'nvenc': []} + for scope, file in [('nvenc', nvenc_zig), ('nvdec', nvdec_zig)]: + lines = [l.removesuffix('\n') for l in file.open().readlines()] + structs[scope].extend(find_zig_structs_line_by_line(lines)) + return structs + +def find_zig_structs_line_by_line(lines): + structs = [] + for line in lines: + if 'pub const' in line and 'extern struct' in line: + struct = line.strip().removeprefix('pub const').removesuffix('= extern struct {').strip() + structs.append(struct) + return structs + +if __name__ == "__main__": + interface_dir = Path(sys.argv[1]) + cuda_h = Path(sys.argv[2]) + check_bindings(interface_dir, cuda_h)