From dc0145bda22aadceb0c29bd90f34d935ebfc2658 Mon Sep 17 00:00:00 2001
From: Robert Masen <robert.masen@vultr.com>
Date: Thu, 5 Mar 2026 15:39:26 +0000
Subject: [PATCH] fix: APIs that need to be called twice to first calculate the
 size of a buffer and then to fill that buffer should now be accounted for

---
 nvml-wrapper/src/device.rs | 74 +++++++++++++------------------------
 nvml-wrapper/src/error.rs  | 76 +++++++++++++++++++++++---------------
 nvml-wrapper/src/unit.rs   | 34 +++++------------
 3 files changed, 83 insertions(+), 101 deletions(-)

diff --git a/nvml-wrapper/src/device.rs b/nvml-wrapper/src/device.rs
index 31b3333..d7447c1 100644
--- a/nvml-wrapper/src/device.rs
+++ b/nvml-wrapper/src/device.rs
@@ -16,6 +16,7 @@ use crate::enums::device::{
     BusType, DeviceArchitecture, FanControlPolicy, GpuLockedClocksSetting, PcieLinkMaxSpeed,
     PowerSource,
 };
+use crate::error::nvml_try_count;
 #[cfg(target_os = "linux")]
 use crate::error::NvmlErrorWithSource;
 use crate::error::{nvml_sym, nvml_try, Bits, NvmlError};
@@ -2217,11 +2218,8 @@ impl<'nvml> Device<'nvml> {
             let mut count: c_uint = 0;
 
             // Passing null doesn't indicate that we want the count. It's just allowed.
-            match sym(self.device, &mut count, ptr::null_mut()) {
-                nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Ok(count),
-                // If success, return 0; otherwise, return error
-                other => nvml_try(other).map(|_| 0),
-            }
+            nvml_try_count(sym(self.device, &mut count, ptr::null_mut()))?;
+            Ok(count)
         }
     }
 
@@ -2293,11 +2291,8 @@ impl<'nvml> Device<'nvml> {
             let mut count: c_uint = 0;
 
             // Passing null doesn't indicate that we want the count. It's just allowed.
-            match sym(self.device, &mut count, ptr::null_mut()) {
-                nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Ok(count),
-                // If success, return 0; otherwise, return error
-                other => nvml_try(other).map(|_| 0),
-            }
+            nvml_try_count(sym(self.device, &mut count, ptr::null_mut()))?;
+            Ok(count)
         }
     }
 
@@ -2365,16 +2360,13 @@ impl<'nvml> Device<'nvml> {
         unsafe {
             let mut count: c_uint = 0;
 
-            match sym(
+            nvml_try_count(sym(
                 self.device,
                 ptr::null_mut(),
                 &mut count,
                 last_seen_timestamp,
-            ) {
-                // Despite being undocumented, this appears to be the correct behavior
-                nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Ok(count),
-                other => nvml_try(other).map(|_| 0),
-            }
+            ))?;
+            Ok(count)
         }
     }
 
@@ -3454,7 +3446,7 @@ impl<'nvml> Device<'nvml> {
         unsafe {
             let mut count: c_uint = 0;
 
-            nvml_try(sym(
+            nvml_try_count(sym(
                 self.device,
                 cause.as_c(),
                 &mut count,
@@ -3606,7 +3598,7 @@ impl<'nvml> Device<'nvml> {
             let mut val_type: nvmlValueType_t = mem::zeroed();
             let mut count: c_uint = mem::zeroed();
 
-            nvml_try(sym(
+            nvml_try_count(sym(
                 self.device,
                 sample_type.as_c(),
                 timestamp,
@@ -3947,13 +3939,12 @@ impl<'nvml> Device<'nvml> {
         let sym = nvml_sym(self.nvml.lib.nvmlDeviceGetSupportedGraphicsClocks.as_ref())?;
 
         unsafe {
-            match sym(self.device, for_mem_clock, &mut count, items.as_mut_ptr()) {
-                // `count` is now the size that is required. Return it in the error.
-                nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => {
-                    return Err(NvmlError::InsufficientSize(Some(count as usize)))
-                }
-                value => nvml_try(value)?,
-            }
+            nvml_try_count(sym(
+                self.device,
+                for_mem_clock,
+                &mut count,
+                items.as_mut_ptr(),
+            ))?;
         }
 
         items.truncate(count as usize);
@@ -3995,7 +3986,7 @@ impl<'nvml> Device<'nvml> {
         let mut count = size as c_uint;
 
         let sym = nvml_sym(self.nvml.lib.nvmlDeviceGetSupportedMemoryClocks.as_ref())?;
-
+        // TODO: should this fn call `sym` twice, first to populate `count` and second to fill the vec?
         unsafe {
             match sym(self.device, &mut count, items.as_mut_ptr()) {
                 // `count` is now the size that is required. Return it in the error.
@@ -4186,7 +4177,7 @@ impl<'nvml> Device<'nvml> {
         unsafe {
             let mut count: c_uint = 0;
 
-            nvml_try(sym(
+            nvml_try_count(sym(
                 self.device,
                 level.as_c(),
                 &mut count,
@@ -5034,20 +5025,13 @@ impl<'nvml> Device<'nvml> {
     fn accounting_pids_count(&self) -> Result<c_uint, NvmlError> {
         let sym = nvml_sym(self.nvml.lib.nvmlDeviceGetAccountingPids.as_ref())?;
 
+        // Indicates that we want the count
+        let mut count: c_uint = 0;
         unsafe {
-            // Indicates that we want the count
-            let mut count: c_uint = 0;
-
             // Null also indicates that we want the count
-            match sym(self.device, &mut count, ptr::null_mut()) {
-                // List is empty
-                nvmlReturn_enum_NVML_SUCCESS => Ok(0),
-                // Count is set to pids count
-                nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Ok(count),
-                // We know this is an error
-                other => nvml_try(other).map(|_| 0),
-            }
+            nvml_try_count(sym(self.device, &mut count, ptr::null_mut()))?;
         }
+        Ok(count)
     }
 
     /**
@@ -6196,7 +6180,7 @@ impl<'nvml> Device<'nvml> {
         unsafe {
             let mut count: u32 = 0;
 
-            nvml_try(sym(self.device, &mut count, std::ptr::null_mut()))?;
+            nvml_try_count(sym(self.device, &mut count, std::ptr::null_mut()))?;
             let mut arr: Vec<nvmlVgpuInstance_t> = vec![0; count as usize];
             nvml_try(sym(self.device, &mut count, arr.as_mut_ptr()))?;
 
@@ -6228,7 +6212,7 @@ impl<'nvml> Device<'nvml> {
         unsafe {
             let mut count: u32 = 0;
 
-            nvml_try(sym(instance, &mut count, std::ptr::null_mut()))?;
+            nvml_try_count(sym(instance, &mut count, std::ptr::null_mut()))?;
             let mut pids: Vec<u32> = vec![0; count as usize];
             nvml_try(sym(instance, &mut count, pids.as_mut_ptr()))?;
 
@@ -6529,10 +6513,7 @@ impl<'nvml> Device<'nvml> {
         unsafe {
             let mut count: c_uint = 0;
 
-            match nvml_try(sym(self.device, &mut count, ids.as_mut_ptr())) {
-                Ok(()) | Err(NvmlError::InsufficientSize(_)) => {}
-                Err(err) => return Err(err),
-            }
+            nvml_try_count(sym(self.device, &mut count, ids.as_mut_ptr()))?;
 
             ids.resize(count as usize, 0);
             nvml_try(sym(self.device, &mut count, ids.as_mut_ptr()))?;
@@ -6549,10 +6530,7 @@ impl<'nvml> Device<'nvml> {
         unsafe {
             let mut count: c_uint = 0;
 
-            match nvml_try(sym(self.device, &mut count, ids.as_mut_ptr())) {
-                Ok(()) | Err(NvmlError::InsufficientSize(_)) => {}
-                Err(err) => return Err(err),
-            }
+            nvml_try_count(sym(self.device, &mut count, ids.as_mut_ptr()))?;
 
             ids.resize(count as usize, 0);
             nvml_try(sym(self.device, &mut count, ids.as_mut_ptr()))?;
diff --git a/nvml-wrapper/src/error.rs b/nvml-wrapper/src/error.rs
index 2bedb5f..0f95c7b 100644
--- a/nvml-wrapper/src/error.rs
+++ b/nvml-wrapper/src/error.rs
@@ -166,36 +166,54 @@ pub enum NvmlError {
 }
 
 /// Converts an `nvmlReturn_t` type into a `Result<(), NvmlError>`.
-#[allow(deprecated)]
 pub fn nvml_try(code: nvmlReturn_t) -> Result<(), NvmlError> {
-    use NvmlError::*;
-
-    match code {
-        nvmlReturn_enum_NVML_SUCCESS => Ok(()),
-        nvmlReturn_enum_NVML_ERROR_UNINITIALIZED => Err(Uninitialized),
-        nvmlReturn_enum_NVML_ERROR_INVALID_ARGUMENT => Err(InvalidArg),
-        nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED => Err(NotSupported),
-        nvmlReturn_enum_NVML_ERROR_NO_PERMISSION => Err(NoPermission),
-        nvmlReturn_enum_NVML_ERROR_ALREADY_INITIALIZED => Err(AlreadyInitialized),
-        nvmlReturn_enum_NVML_ERROR_NOT_FOUND => Err(NotFound),
-        nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Err(InsufficientSize(None)),
-        nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_POWER => Err(InsufficientPower),
-        nvmlReturn_enum_NVML_ERROR_DRIVER_NOT_LOADED => Err(DriverNotLoaded),
-        nvmlReturn_enum_NVML_ERROR_TIMEOUT => Err(Timeout),
-        nvmlReturn_enum_NVML_ERROR_IRQ_ISSUE => Err(IrqIssue),
-        nvmlReturn_enum_NVML_ERROR_LIBRARY_NOT_FOUND => Err(LibraryNotFound),
-        nvmlReturn_enum_NVML_ERROR_FUNCTION_NOT_FOUND => Err(FunctionNotFound),
-        nvmlReturn_enum_NVML_ERROR_CORRUPTED_INFOROM => Err(CorruptedInfoROM),
-        nvmlReturn_enum_NVML_ERROR_GPU_IS_LOST => Err(GpuLost),
-        nvmlReturn_enum_NVML_ERROR_RESET_REQUIRED => Err(ResetRequired),
-        nvmlReturn_enum_NVML_ERROR_OPERATING_SYSTEM => Err(OperatingSystem),
-        nvmlReturn_enum_NVML_ERROR_LIB_RM_VERSION_MISMATCH => Err(LibRmVersionMismatch),
-        nvmlReturn_enum_NVML_ERROR_IN_USE => Err(InUse),
-        nvmlReturn_enum_NVML_ERROR_MEMORY => Err(InsufficientMemory),
-        nvmlReturn_enum_NVML_ERROR_NO_DATA => Err(NoData),
-        nvmlReturn_enum_NVML_ERROR_VGPU_ECC_NOT_SUPPORTED => Err(VgpuEccNotSupported),
-        nvmlReturn_enum_NVML_ERROR_UNKNOWN => Err(Unknown),
-        _ => Err(UnexpectedVariant(code)),
+    if code == nvmlReturn_enum_NVML_SUCCESS {
+        return Ok(());
+    }
+    Err(code.into())
+}
+
+/// Converts an `nvmlReturn_t` type into a `Result<(), NvmlError>`, allowing for the call to return the
+/// value `nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE` which is a common return value when using an
+/// in/out parameter that provides the size of a buffer needed to complete that call
+pub fn nvml_try_count(code: nvmlReturn_t) -> Result<(), NvmlError> {
+    if code == nvmlReturn_enum_NVML_SUCCESS || code == nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE
+    {
+        return Ok(());
+    }
+    Err(code.into())
+}
+
+#[allow(deprecated)]
+impl From<nvmlReturn_t> for NvmlError {
+    fn from(value: nvmlReturn_t) -> Self {
+        use NvmlError::*;
+        match value {
+            nvmlReturn_enum_NVML_ERROR_UNINITIALIZED => Uninitialized,
+            nvmlReturn_enum_NVML_ERROR_INVALID_ARGUMENT => InvalidArg,
+            nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED => NotSupported,
+            nvmlReturn_enum_NVML_ERROR_NO_PERMISSION => NoPermission,
+            nvmlReturn_enum_NVML_ERROR_ALREADY_INITIALIZED => AlreadyInitialized,
+            nvmlReturn_enum_NVML_ERROR_NOT_FOUND => NotFound,
+            nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => InsufficientSize(None),
+            nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_POWER => InsufficientPower,
+            nvmlReturn_enum_NVML_ERROR_DRIVER_NOT_LOADED => DriverNotLoaded,
+            nvmlReturn_enum_NVML_ERROR_TIMEOUT => Timeout,
+            nvmlReturn_enum_NVML_ERROR_IRQ_ISSUE => IrqIssue,
+            nvmlReturn_enum_NVML_ERROR_LIBRARY_NOT_FOUND => LibraryNotFound,
+            nvmlReturn_enum_NVML_ERROR_FUNCTION_NOT_FOUND => FunctionNotFound,
+            nvmlReturn_enum_NVML_ERROR_CORRUPTED_INFOROM => CorruptedInfoROM,
+            nvmlReturn_enum_NVML_ERROR_GPU_IS_LOST => GpuLost,
+            nvmlReturn_enum_NVML_ERROR_RESET_REQUIRED => ResetRequired,
+            nvmlReturn_enum_NVML_ERROR_OPERATING_SYSTEM => OperatingSystem,
+            nvmlReturn_enum_NVML_ERROR_LIB_RM_VERSION_MISMATCH => LibRmVersionMismatch,
+            nvmlReturn_enum_NVML_ERROR_IN_USE => InUse,
+            nvmlReturn_enum_NVML_ERROR_MEMORY => InsufficientMemory,
+            nvmlReturn_enum_NVML_ERROR_NO_DATA => NoData,
+            nvmlReturn_enum_NVML_ERROR_VGPU_ECC_NOT_SUPPORTED => VgpuEccNotSupported,
+            nvmlReturn_enum_NVML_ERROR_UNKNOWN => Unknown,
+            _ => UnexpectedVariant(value),
+        }
     }
 }
 
diff --git a/nvml-wrapper/src/unit.rs b/nvml-wrapper/src/unit.rs
index 21a7a61..18e32f1 100644
--- a/nvml-wrapper/src/unit.rs
+++ b/nvml-wrapper/src/unit.rs
@@ -1,7 +1,7 @@
 use crate::device::Device;
 use crate::enum_wrappers::unit::LedColor;
 use crate::enums::unit::{LedState, TemperatureReading};
-use crate::error::{nvml_sym, nvml_try, NvmlError};
+use crate::error::{nvml_sym, nvml_try, nvml_try_count, NvmlError};
 use crate::ffi::bindings::*;
 use crate::struct_wrappers::unit::{FansInfo, PsuInfo, UnitInfo};
 use crate::Nvml;
@@ -145,31 +145,17 @@ impl<'nvml> Unit<'nvml> {
     pub fn device_count(&self) -> Result<u32, NvmlError> {
         let sym = nvml_sym(self.nvml.lib.nvmlUnitGetDevices.as_ref())?;
 
+        /*
+        From the docs:
+        deviceCount
+          Reference in which to provide the devices array size,
+          and to return the number of attached GPU devices
+        */
+        let mut count: c_uint = 0;
         unsafe {
-            /*
-            NVIDIA doesn't even say that `count` will be set to the count if
-            `InsufficientSize` is returned. But we can assume sanity, right?
-
-            The idea here is:
-            If there are 0 devices, NVML_SUCCESS is returned, `count` is set
-              to 0. We return count, all good.
-            If there is 1 device, NVML_SUCCESS is returned, `count` is set to
-              1. We return count, all good.
-            If there are >= 2 devices, NVML_INSUFFICIENT_SIZE is returned.
-             `count` is theoretically set to the actual count, and we
-              return it.
-            */
-            let mut count: c_uint = 1;
-            let mut devices: [nvmlDevice_t; 1] = [mem::zeroed()];
-
-            match sym(self.unit, &mut count, devices.as_mut_ptr()) {
-                nvmlReturn_enum_NVML_SUCCESS | nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => {
-                    Ok(count)
-                }
-                // We know that this will be an error
-                other => nvml_try(other).map(|_| 0),
-            }
+            nvml_try_count(sym(self.unit, &mut count, std::ptr::null_mut()))?;
         }
+        Ok(count)
     }
 
     /**