From dc0145bda22aadceb0c29bd90f34d935ebfc2658 Mon Sep 17 00:00:00 2001 From: Robert Masen Date: Thu, 5 Mar 2026 15:39:26 +0000 Subject: [PATCH] fix: APIs that need to be called twice to first calculate the size of a buffer and then to fill that buffer should now be accounted for --- nvml-wrapper/src/device.rs | 74 +++++++++++++------------------------ nvml-wrapper/src/error.rs | 76 +++++++++++++++++++++++--------------- nvml-wrapper/src/unit.rs | 34 +++++------------ 3 files changed, 83 insertions(+), 101 deletions(-) diff --git a/nvml-wrapper/src/device.rs b/nvml-wrapper/src/device.rs index 31b3333..d7447c1 100644 --- a/nvml-wrapper/src/device.rs +++ b/nvml-wrapper/src/device.rs @@ -16,6 +16,7 @@ use crate::enums::device::{ BusType, DeviceArchitecture, FanControlPolicy, GpuLockedClocksSetting, PcieLinkMaxSpeed, PowerSource, }; +use crate::error::nvml_try_count; #[cfg(target_os = "linux")] use crate::error::NvmlErrorWithSource; use crate::error::{nvml_sym, nvml_try, Bits, NvmlError}; @@ -2217,11 +2218,8 @@ impl<'nvml> Device<'nvml> { let mut count: c_uint = 0; // Passing null doesn't indicate that we want the count. It's just allowed. - match sym(self.device, &mut count, ptr::null_mut()) { - nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Ok(count), - // If success, return 0; otherwise, return error - other => nvml_try(other).map(|_| 0), - } + nvml_try_count(sym(self.device, &mut count, ptr::null_mut()))?; + Ok(count) } } @@ -2293,11 +2291,8 @@ impl<'nvml> Device<'nvml> { let mut count: c_uint = 0; // Passing null doesn't indicate that we want the count. It's just allowed. - match sym(self.device, &mut count, ptr::null_mut()) { - nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Ok(count), - // If success, return 0; otherwise, return error - other => nvml_try(other).map(|_| 0), - } + nvml_try_count(sym(self.device, &mut count, ptr::null_mut()))?; + Ok(count) } } @@ -2365,16 +2360,13 @@ impl<'nvml> Device<'nvml> { unsafe { let mut count: c_uint = 0; - match sym( + nvml_try_count(sym( self.device, ptr::null_mut(), &mut count, last_seen_timestamp, - ) { - // Despite being undocumented, this appears to be the correct behavior - nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Ok(count), - other => nvml_try(other).map(|_| 0), - } + ))?; + Ok(count) } } @@ -3454,7 +3446,7 @@ impl<'nvml> Device<'nvml> { unsafe { let mut count: c_uint = 0; - nvml_try(sym( + nvml_try_count(sym( self.device, cause.as_c(), &mut count, @@ -3606,7 +3598,7 @@ impl<'nvml> Device<'nvml> { let mut val_type: nvmlValueType_t = mem::zeroed(); let mut count: c_uint = mem::zeroed(); - nvml_try(sym( + nvml_try_count(sym( self.device, sample_type.as_c(), timestamp, @@ -3947,13 +3939,12 @@ impl<'nvml> Device<'nvml> { let sym = nvml_sym(self.nvml.lib.nvmlDeviceGetSupportedGraphicsClocks.as_ref())?; unsafe { - match sym(self.device, for_mem_clock, &mut count, items.as_mut_ptr()) { - // `count` is now the size that is required. Return it in the error. - nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => { - return Err(NvmlError::InsufficientSize(Some(count as usize))) - } - value => nvml_try(value)?, - } + nvml_try_count(sym( + self.device, + for_mem_clock, + &mut count, + items.as_mut_ptr(), + ))?; } items.truncate(count as usize); @@ -3995,7 +3986,7 @@ impl<'nvml> Device<'nvml> { let mut count = size as c_uint; let sym = nvml_sym(self.nvml.lib.nvmlDeviceGetSupportedMemoryClocks.as_ref())?; - + // TODO: should this fn call `sym` twice, first to populate `count` and second to fill the vec? unsafe { match sym(self.device, &mut count, items.as_mut_ptr()) { // `count` is now the size that is required. Return it in the error. @@ -4186,7 +4177,7 @@ impl<'nvml> Device<'nvml> { unsafe { let mut count: c_uint = 0; - nvml_try(sym( + nvml_try_count(sym( self.device, level.as_c(), &mut count, @@ -5034,20 +5025,13 @@ impl<'nvml> Device<'nvml> { fn accounting_pids_count(&self) -> Result { let sym = nvml_sym(self.nvml.lib.nvmlDeviceGetAccountingPids.as_ref())?; + // Indicates that we want the count + let mut count: c_uint = 0; unsafe { - // Indicates that we want the count - let mut count: c_uint = 0; - // Null also indicates that we want the count - match sym(self.device, &mut count, ptr::null_mut()) { - // List is empty - nvmlReturn_enum_NVML_SUCCESS => Ok(0), - // Count is set to pids count - nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Ok(count), - // We know this is an error - other => nvml_try(other).map(|_| 0), - } + nvml_try_count(sym(self.device, &mut count, ptr::null_mut()))?; } + Ok(count) } /** @@ -6196,7 +6180,7 @@ impl<'nvml> Device<'nvml> { unsafe { let mut count: u32 = 0; - nvml_try(sym(self.device, &mut count, std::ptr::null_mut()))?; + nvml_try_count(sym(self.device, &mut count, std::ptr::null_mut()))?; let mut arr: Vec = vec![0; count as usize]; nvml_try(sym(self.device, &mut count, arr.as_mut_ptr()))?; @@ -6228,7 +6212,7 @@ impl<'nvml> Device<'nvml> { unsafe { let mut count: u32 = 0; - nvml_try(sym(instance, &mut count, std::ptr::null_mut()))?; + nvml_try_count(sym(instance, &mut count, std::ptr::null_mut()))?; let mut pids: Vec = vec![0; count as usize]; nvml_try(sym(instance, &mut count, pids.as_mut_ptr()))?; @@ -6529,10 +6513,7 @@ impl<'nvml> Device<'nvml> { unsafe { let mut count: c_uint = 0; - match nvml_try(sym(self.device, &mut count, ids.as_mut_ptr())) { - Ok(()) | Err(NvmlError::InsufficientSize(_)) => {} - Err(err) => return Err(err), - } + nvml_try_count(sym(self.device, &mut count, ids.as_mut_ptr()))?; ids.resize(count as usize, 0); nvml_try(sym(self.device, &mut count, ids.as_mut_ptr()))?; @@ -6549,10 +6530,7 @@ impl<'nvml> Device<'nvml> { unsafe { let mut count: c_uint = 0; - match nvml_try(sym(self.device, &mut count, ids.as_mut_ptr())) { - Ok(()) | Err(NvmlError::InsufficientSize(_)) => {} - Err(err) => return Err(err), - } + nvml_try_count(sym(self.device, &mut count, ids.as_mut_ptr()))?; ids.resize(count as usize, 0); nvml_try(sym(self.device, &mut count, ids.as_mut_ptr()))?; diff --git a/nvml-wrapper/src/error.rs b/nvml-wrapper/src/error.rs index 2bedb5f..0f95c7b 100644 --- a/nvml-wrapper/src/error.rs +++ b/nvml-wrapper/src/error.rs @@ -166,36 +166,54 @@ pub enum NvmlError { } /// Converts an `nvmlReturn_t` type into a `Result<(), NvmlError>`. -#[allow(deprecated)] pub fn nvml_try(code: nvmlReturn_t) -> Result<(), NvmlError> { - use NvmlError::*; - - match code { - nvmlReturn_enum_NVML_SUCCESS => Ok(()), - nvmlReturn_enum_NVML_ERROR_UNINITIALIZED => Err(Uninitialized), - nvmlReturn_enum_NVML_ERROR_INVALID_ARGUMENT => Err(InvalidArg), - nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED => Err(NotSupported), - nvmlReturn_enum_NVML_ERROR_NO_PERMISSION => Err(NoPermission), - nvmlReturn_enum_NVML_ERROR_ALREADY_INITIALIZED => Err(AlreadyInitialized), - nvmlReturn_enum_NVML_ERROR_NOT_FOUND => Err(NotFound), - nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Err(InsufficientSize(None)), - nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_POWER => Err(InsufficientPower), - nvmlReturn_enum_NVML_ERROR_DRIVER_NOT_LOADED => Err(DriverNotLoaded), - nvmlReturn_enum_NVML_ERROR_TIMEOUT => Err(Timeout), - nvmlReturn_enum_NVML_ERROR_IRQ_ISSUE => Err(IrqIssue), - nvmlReturn_enum_NVML_ERROR_LIBRARY_NOT_FOUND => Err(LibraryNotFound), - nvmlReturn_enum_NVML_ERROR_FUNCTION_NOT_FOUND => Err(FunctionNotFound), - nvmlReturn_enum_NVML_ERROR_CORRUPTED_INFOROM => Err(CorruptedInfoROM), - nvmlReturn_enum_NVML_ERROR_GPU_IS_LOST => Err(GpuLost), - nvmlReturn_enum_NVML_ERROR_RESET_REQUIRED => Err(ResetRequired), - nvmlReturn_enum_NVML_ERROR_OPERATING_SYSTEM => Err(OperatingSystem), - nvmlReturn_enum_NVML_ERROR_LIB_RM_VERSION_MISMATCH => Err(LibRmVersionMismatch), - nvmlReturn_enum_NVML_ERROR_IN_USE => Err(InUse), - nvmlReturn_enum_NVML_ERROR_MEMORY => Err(InsufficientMemory), - nvmlReturn_enum_NVML_ERROR_NO_DATA => Err(NoData), - nvmlReturn_enum_NVML_ERROR_VGPU_ECC_NOT_SUPPORTED => Err(VgpuEccNotSupported), - nvmlReturn_enum_NVML_ERROR_UNKNOWN => Err(Unknown), - _ => Err(UnexpectedVariant(code)), + if code == nvmlReturn_enum_NVML_SUCCESS { + return Ok(()); + } + Err(code.into()) +} + +/// Converts an `nvmlReturn_t` type into a `Result<(), NvmlError>`, allowing for the call to return the +/// value `nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE` which is a common return value when using an +/// in/out parameter that provides the size of a buffer needed to complete that call +pub fn nvml_try_count(code: nvmlReturn_t) -> Result<(), NvmlError> { + if code == nvmlReturn_enum_NVML_SUCCESS || code == nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE + { + return Ok(()); + } + Err(code.into()) +} + +#[allow(deprecated)] +impl From for NvmlError { + fn from(value: nvmlReturn_t) -> Self { + use NvmlError::*; + match value { + nvmlReturn_enum_NVML_ERROR_UNINITIALIZED => Uninitialized, + nvmlReturn_enum_NVML_ERROR_INVALID_ARGUMENT => InvalidArg, + nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED => NotSupported, + nvmlReturn_enum_NVML_ERROR_NO_PERMISSION => NoPermission, + nvmlReturn_enum_NVML_ERROR_ALREADY_INITIALIZED => AlreadyInitialized, + nvmlReturn_enum_NVML_ERROR_NOT_FOUND => NotFound, + nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => InsufficientSize(None), + nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_POWER => InsufficientPower, + nvmlReturn_enum_NVML_ERROR_DRIVER_NOT_LOADED => DriverNotLoaded, + nvmlReturn_enum_NVML_ERROR_TIMEOUT => Timeout, + nvmlReturn_enum_NVML_ERROR_IRQ_ISSUE => IrqIssue, + nvmlReturn_enum_NVML_ERROR_LIBRARY_NOT_FOUND => LibraryNotFound, + nvmlReturn_enum_NVML_ERROR_FUNCTION_NOT_FOUND => FunctionNotFound, + nvmlReturn_enum_NVML_ERROR_CORRUPTED_INFOROM => CorruptedInfoROM, + nvmlReturn_enum_NVML_ERROR_GPU_IS_LOST => GpuLost, + nvmlReturn_enum_NVML_ERROR_RESET_REQUIRED => ResetRequired, + nvmlReturn_enum_NVML_ERROR_OPERATING_SYSTEM => OperatingSystem, + nvmlReturn_enum_NVML_ERROR_LIB_RM_VERSION_MISMATCH => LibRmVersionMismatch, + nvmlReturn_enum_NVML_ERROR_IN_USE => InUse, + nvmlReturn_enum_NVML_ERROR_MEMORY => InsufficientMemory, + nvmlReturn_enum_NVML_ERROR_NO_DATA => NoData, + nvmlReturn_enum_NVML_ERROR_VGPU_ECC_NOT_SUPPORTED => VgpuEccNotSupported, + nvmlReturn_enum_NVML_ERROR_UNKNOWN => Unknown, + _ => UnexpectedVariant(value), + } } } diff --git a/nvml-wrapper/src/unit.rs b/nvml-wrapper/src/unit.rs index 21a7a61..18e32f1 100644 --- a/nvml-wrapper/src/unit.rs +++ b/nvml-wrapper/src/unit.rs @@ -1,7 +1,7 @@ use crate::device::Device; use crate::enum_wrappers::unit::LedColor; use crate::enums::unit::{LedState, TemperatureReading}; -use crate::error::{nvml_sym, nvml_try, NvmlError}; +use crate::error::{nvml_sym, nvml_try, nvml_try_count, NvmlError}; use crate::ffi::bindings::*; use crate::struct_wrappers::unit::{FansInfo, PsuInfo, UnitInfo}; use crate::Nvml; @@ -145,31 +145,17 @@ impl<'nvml> Unit<'nvml> { pub fn device_count(&self) -> Result { let sym = nvml_sym(self.nvml.lib.nvmlUnitGetDevices.as_ref())?; + /* + From the docs: + deviceCount + Reference in which to provide the devices array size, + and to return the number of attached GPU devices + */ + let mut count: c_uint = 0; unsafe { - /* - NVIDIA doesn't even say that `count` will be set to the count if - `InsufficientSize` is returned. But we can assume sanity, right? - - The idea here is: - If there are 0 devices, NVML_SUCCESS is returned, `count` is set - to 0. We return count, all good. - If there is 1 device, NVML_SUCCESS is returned, `count` is set to - 1. We return count, all good. - If there are >= 2 devices, NVML_INSUFFICIENT_SIZE is returned. - `count` is theoretically set to the actual count, and we - return it. - */ - let mut count: c_uint = 1; - let mut devices: [nvmlDevice_t; 1] = [mem::zeroed()]; - - match sym(self.unit, &mut count, devices.as_mut_ptr()) { - nvmlReturn_enum_NVML_SUCCESS | nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => { - Ok(count) - } - // We know that this will be an error - other => nvml_try(other).map(|_| 0), - } + nvml_try_count(sym(self.unit, &mut count, std::ptr::null_mut()))?; } + Ok(count) } /**