Skip to content

Commit 211b704

Browse files
authored
Add files via upload
1 parent ab90702 commit 211b704

1 file changed

Lines changed: 352 additions & 0 deletions

File tree

nvidia_stats.c

Lines changed: 352 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,352 @@
1+
/*
2+
* NVIDIA GPU Stats Reader
3+
*
4+
* This simple C program demonstrates how to read NVIDIA GPU:
5+
* - Core Voltage (using undocumented NVAPI call 0x465f9bcf)
6+
* - Hotspot Temperature (using undocumented NVAPI call 0x65fe3aad)
7+
* - Memory Temperature (using undocumented NVAPI call 0x65fe3aad)
8+
*
9+
* Based on LACT (Linux AMDGPU Controller Tool) implementation.
10+
* Reference: https://github.com/weter11/LACT
11+
*
12+
* Compile: gcc -o nvidia_stats nvidia_stats.c -ldl
13+
* Run: ./nvidia_stats
14+
*/
15+
16+
#include <stdio.h>
17+
#include <stdlib.h>
18+
#include <stdint.h>
19+
#include <string.h>
20+
#include <dlfcn.h>
21+
22+
/* NVAPI Constants */
23+
#define NVAPI_LIBRARY "libnvidia-api.so.1"
24+
#define NVAPI_MAX_PHYSICAL_GPUS 64
25+
#define NVAPI_SHORT_STRING_MAX 64
26+
27+
/* NVAPI Query Interface IDs */
28+
#define QUERY_NVAPI_INITIALIZE 0x0150e828
29+
#define QUERY_NVAPI_UNLOAD 0xd22bdd7e
30+
#define QUERY_NVAPI_ENUM_PHYSICAL_GPUS 0xe5ac921f
31+
#define QUERY_NVAPI_GET_BUS_ID 0x1be0b8e5
32+
#define QUERY_NVAPI_GET_ERROR_MESSAGE 0x6c2d048c
33+
#define QUERY_NVAPI_THERMALS 0x65fe3aad /* Undocumented call */
34+
#define QUERY_NVAPI_VOLTAGE 0x465f9bcf /* Undocumented call */
35+
36+
/* Type definitions */
37+
typedef int32_t NvAPI_Status;
38+
typedef void* NvPhysicalGpuHandle;
39+
40+
/* Function pointer type for nvapi_QueryInterface */
41+
typedef void* (*NvAPI_QueryInterface_t)(uint32_t id);
42+
43+
/* Function pointer types for NVAPI functions */
44+
typedef NvAPI_Status (*NvAPI_Initialize_t)(void);
45+
typedef NvAPI_Status (*NvAPI_Unload_t)(void);
46+
typedef NvAPI_Status (*NvAPI_EnumPhysicalGPUs_t)(NvPhysicalGpuHandle handles[], uint32_t *count);
47+
typedef NvAPI_Status (*NvAPI_GetErrorMessage_t)(NvAPI_Status status, char text[NVAPI_SHORT_STRING_MAX]);
48+
49+
/*
50+
* NvApiThermals structure
51+
* Used with undocumented call QUERY_NVAPI_THERMALS (0x65fe3aad)
52+
*
53+
* The version field encodes struct size and version number:
54+
* version = (sizeof(struct) | (version_number << 16))
55+
*
56+
* Temperature values are stored in the values array and need to be divided by 256.
57+
* - Hotspot temperature is at index 9
58+
* - VRAM/Memory temperature is at index 15
59+
*/
60+
typedef struct {
61+
uint32_t version;
62+
int32_t mask;
63+
int32_t values[40];
64+
} NvApiThermals;
65+
66+
typedef NvAPI_Status (*NvAPI_GetThermals_t)(NvPhysicalGpuHandle handle, NvApiThermals *thermals);
67+
68+
/*
69+
* NvApiVoltage structure
70+
* Used with undocumented call QUERY_NVAPI_VOLTAGE (0x465f9bcf)
71+
*
72+
* The version field encodes struct size and version number:
73+
* version = (sizeof(struct) | (version_number << 16))
74+
*
75+
* value_uv contains the voltage in microvolts (µV)
76+
*/
77+
typedef struct {
78+
uint32_t version;
79+
uint32_t flags;
80+
uint32_t padding_1[8];
81+
uint32_t value_uv;
82+
uint32_t padding_2[8];
83+
} NvApiVoltage;
84+
85+
typedef NvAPI_Status (*NvAPI_GetVoltage_t)(NvPhysicalGpuHandle handle, NvApiVoltage *voltage);
86+
87+
/* Global variables */
88+
static void *nvapi_lib = NULL;
89+
static NvAPI_QueryInterface_t nvapi_QueryInterface = NULL;
90+
91+
/*
92+
* Load NVAPI library and get the query interface function
93+
*/
94+
int load_nvapi(void) {
95+
nvapi_lib = dlopen(NVAPI_LIBRARY, RTLD_NOW);
96+
if (!nvapi_lib) {
97+
fprintf(stderr, "Error: Could not load %s: %s\n", NVAPI_LIBRARY, dlerror());
98+
fprintf(stderr, "Make sure NVIDIA drivers are installed.\n");
99+
return -1;
100+
}
101+
102+
nvapi_QueryInterface = (NvAPI_QueryInterface_t)dlsym(nvapi_lib, "nvapi_QueryInterface");
103+
if (!nvapi_QueryInterface) {
104+
fprintf(stderr, "Error: Could not find nvapi_QueryInterface: %s\n", dlerror());
105+
dlclose(nvapi_lib);
106+
return -1;
107+
}
108+
109+
return 0;
110+
}
111+
112+
/*
113+
* Get a function pointer from NVAPI using query interface
114+
*/
115+
void* get_nvapi_function(uint32_t id) {
116+
void *func = nvapi_QueryInterface(id);
117+
if (!func) {
118+
fprintf(stderr, "Error: Could not get function for ID 0x%08x\n", id);
119+
}
120+
return func;
121+
}
122+
123+
/*
124+
* Initialize NVAPI
125+
*/
126+
int init_nvapi(void) {
127+
NvAPI_Initialize_t initialize = (NvAPI_Initialize_t)get_nvapi_function(QUERY_NVAPI_INITIALIZE);
128+
if (!initialize) return -1;
129+
130+
NvAPI_Status status = initialize();
131+
if (status != 0) {
132+
fprintf(stderr, "Error: NvAPI_Initialize failed with status 0x%08x\n", status);
133+
return -1;
134+
}
135+
136+
printf("NVAPI initialized successfully.\n\n");
137+
return 0;
138+
}
139+
140+
/*
141+
* Unload NVAPI
142+
*/
143+
void unload_nvapi(void) {
144+
if (nvapi_QueryInterface) {
145+
NvAPI_Unload_t unload = (NvAPI_Unload_t)get_nvapi_function(QUERY_NVAPI_UNLOAD);
146+
if (unload) {
147+
unload();
148+
}
149+
}
150+
151+
if (nvapi_lib) {
152+
dlclose(nvapi_lib);
153+
}
154+
}
155+
156+
/*
157+
* Enumerate physical GPUs
158+
*/
159+
int enum_physical_gpus(NvPhysicalGpuHandle *handles, uint32_t *count) {
160+
NvAPI_EnumPhysicalGPUs_t enum_gpus = (NvAPI_EnumPhysicalGPUs_t)get_nvapi_function(QUERY_NVAPI_ENUM_PHYSICAL_GPUS);
161+
if (!enum_gpus) return -1;
162+
163+
NvAPI_Status status = enum_gpus(handles, count);
164+
if (status != 0) {
165+
fprintf(stderr, "Error: EnumPhysicalGPUs failed with status 0x%08x\n", status);
166+
return -1;
167+
}
168+
169+
return 0;
170+
}
171+
172+
/*
173+
* Calculate the thermals mask by probing which bits return valid data
174+
* This is necessary because different GPUs support different sensors
175+
*/
176+
int32_t calculate_thermals_mask(NvPhysicalGpuHandle handle) {
177+
NvAPI_GetThermals_t get_thermals = (NvAPI_GetThermals_t)get_nvapi_function(QUERY_NVAPI_THERMALS);
178+
if (!get_thermals) return 1;
179+
180+
NvApiThermals thermals;
181+
memset(&thermals, 0, sizeof(thermals));
182+
183+
/* Version: struct size | (version 2 << 16) */
184+
thermals.version = sizeof(NvApiThermals) | (2 << 16);
185+
thermals.mask = 1;
186+
187+
/* Initial call to verify it works */
188+
NvAPI_Status status = get_thermals(handle, &thermals);
189+
if (status != 0) {
190+
fprintf(stderr, "Warning: Initial thermals query failed\n");
191+
return 1;
192+
}
193+
194+
/* Probe each bit to find the maximum valid mask */
195+
for (int bit = 0; bit < 32; bit++) {
196+
thermals.mask = 1 << bit;
197+
status = get_thermals(handle, &thermals);
198+
if (status != 0) {
199+
return thermals.mask - 1;
200+
}
201+
}
202+
203+
return 0xFFFFFFFF; /* All bits valid */
204+
}
205+
206+
/*
207+
* Get thermals (hotspot and VRAM temperature)
208+
*
209+
* Returns temperatures in degrees Celsius.
210+
* Based on LACT's implementation:
211+
* - Hotspot temperature is at values[9] / 256
212+
* - VRAM temperature is at values[15] / 256
213+
*/
214+
int get_thermals(NvPhysicalGpuHandle handle, int32_t mask, int32_t *hotspot, int32_t *vram) {
215+
NvAPI_GetThermals_t get_thermals = (NvAPI_GetThermals_t)get_nvapi_function(QUERY_NVAPI_THERMALS);
216+
if (!get_thermals) return -1;
217+
218+
NvApiThermals thermals;
219+
memset(&thermals, 0, sizeof(thermals));
220+
221+
/* Version: struct size | (version 2 << 16) */
222+
thermals.version = sizeof(NvApiThermals) | (2 << 16);
223+
thermals.mask = mask;
224+
225+
NvAPI_Status status = get_thermals(handle, &thermals);
226+
if (status != 0) {
227+
fprintf(stderr, "Error: GetThermals failed with status 0x%08x\n", status);
228+
return -1;
229+
}
230+
231+
/* Extract hotspot temperature from index 9 */
232+
int32_t hotspot_raw = thermals.values[9] / 256;
233+
if (hotspot_raw > 0 && hotspot_raw < 255) {
234+
*hotspot = hotspot_raw;
235+
} else {
236+
*hotspot = -1; /* Not available */
237+
}
238+
239+
/* Extract VRAM temperature from index 15 */
240+
int32_t vram_raw = thermals.values[15] / 256;
241+
if (vram_raw > 0 && vram_raw < 255) {
242+
*vram = vram_raw;
243+
} else {
244+
*vram = -1; /* Not available */
245+
}
246+
247+
return 0;
248+
}
249+
250+
/*
251+
* Get voltage in microvolts
252+
*/
253+
int get_voltage(NvPhysicalGpuHandle handle, uint32_t *voltage_uv) {
254+
NvAPI_GetVoltage_t get_voltage = (NvAPI_GetVoltage_t)get_nvapi_function(QUERY_NVAPI_VOLTAGE);
255+
if (!get_voltage) return -1;
256+
257+
NvApiVoltage voltage;
258+
memset(&voltage, 0, sizeof(voltage));
259+
260+
/* Version: struct size | (version 1 << 16) */
261+
voltage.version = sizeof(NvApiVoltage) | (1 << 16);
262+
263+
NvAPI_Status status = get_voltage(handle, &voltage);
264+
if (status != 0) {
265+
fprintf(stderr, "Error: GetVoltage failed with status 0x%08x\n", status);
266+
return -1;
267+
}
268+
269+
*voltage_uv = voltage.value_uv;
270+
return 0;
271+
}
272+
273+
/*
274+
* Main function - demonstrate reading NVIDIA GPU stats
275+
*/
276+
int main(void) {
277+
printf("=================================================\n");
278+
printf("NVIDIA GPU Stats Reader\n");
279+
printf("Using undocumented NVAPI calls from libnvidia-api.so.1\n");
280+
printf("=================================================\n\n");
281+
282+
/* Load NVAPI library */
283+
if (load_nvapi() != 0) {
284+
return 1;
285+
}
286+
287+
/* Initialize NVAPI */
288+
if (init_nvapi() != 0) {
289+
dlclose(nvapi_lib);
290+
return 1;
291+
}
292+
293+
/* Enumerate GPUs */
294+
NvPhysicalGpuHandle handles[NVAPI_MAX_PHYSICAL_GPUS];
295+
uint32_t gpu_count = 0;
296+
297+
if (enum_physical_gpus(handles, &gpu_count) != 0) {
298+
unload_nvapi();
299+
return 1;
300+
}
301+
302+
printf("Found %u NVIDIA GPU(s)\n\n", gpu_count);
303+
304+
/* Get stats for each GPU */
305+
for (uint32_t i = 0; i < gpu_count; i++) {
306+
printf("-------------------------------------------------\n");
307+
printf("GPU %u:\n", i);
308+
printf("-------------------------------------------------\n");
309+
310+
NvPhysicalGpuHandle handle = handles[i];
311+
312+
/* Calculate thermals mask */
313+
int32_t mask = calculate_thermals_mask(handle);
314+
printf("Thermals mask: 0x%08x\n\n", mask);
315+
316+
/* Get voltage */
317+
uint32_t voltage_uv = 0;
318+
if (get_voltage(handle, &voltage_uv) == 0) {
319+
float voltage_v = (float)voltage_uv / 1000000.0f;
320+
printf("Core Voltage: %.3f V (%u µV)\n", voltage_v, voltage_uv);
321+
} else {
322+
printf("Core Voltage: Not available\n");
323+
}
324+
325+
/* Get thermals */
326+
int32_t hotspot = 0, vram = 0;
327+
if (get_thermals(handle, mask, &hotspot, &vram) == 0) {
328+
if (hotspot >= 0) {
329+
printf("Hotspot Temperature: %d °C\n", hotspot);
330+
} else {
331+
printf("Hotspot Temperature: Not available\n");
332+
}
333+
334+
if (vram >= 0) {
335+
printf("Memory Temperature: %d °C\n", vram);
336+
} else {
337+
printf("Memory Temperature: Not available\n");
338+
}
339+
} else {
340+
printf("Hotspot Temperature: Error reading\n");
341+
printf("Memory Temperature: Error reading\n");
342+
}
343+
344+
printf("\n");
345+
}
346+
347+
/* Cleanup */
348+
unload_nvapi();
349+
printf("Done.\n");
350+
351+
return 0;
352+
}

0 commit comments

Comments
 (0)