From 4e2bd5d60a7dd8ffb42487e57b21a49c15cfc62b Mon Sep 17 00:00:00 2001 From: Mingyuan Date: Tue, 30 Dec 2025 02:08:45 +0800 Subject: [PATCH 1/2] tracer_nvbit: fall back to cuKernelGetAttribute for kernel attrs --- util/tracer_nvbit/tracer_tool/tracer_tool.cu | 29 +++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/util/tracer_nvbit/tracer_tool/tracer_tool.cu b/util/tracer_nvbit/tracer_tool/tracer_tool.cu index b36c33b18..4dbef0e62 100644 --- a/util/tracer_nvbit/tracer_tool/tracer_tool.cu +++ b/util/tracer_nvbit/tracer_tool/tracer_tool.cu @@ -34,6 +34,19 @@ #define TRACER_VERSION "5" +static int get_attr_with_kernel_fallback(CUfunction func, + CUfunction_attribute attr) { + int value = 0; + CUresult res = cuFuncGetAttribute(&value, attr, func); + if (res == CUDA_ERROR_INVALID_HANDLE) { + CUdevice dev = 0; + if (cuCtxGetDevice(&dev) == CUDA_SUCCESS) { + cuKernelGetAttribute(&value, attr, (CUkernel)func, dev); + } + } + return value; +} + /* Channel used to communicate from GPU to CPU receiving thread */ #define CHANNEL_SIZE (1l << 20) static __managed__ ChannelDev channel_dev; @@ -502,16 +515,12 @@ static void enter_kernel_launch(CUcontext ctx, CUfunction func, } // Get the number of registers and shared memory size for the kernel - int nregs; - CUDA_SAFECALL(cuFuncGetAttribute(&nregs, CU_FUNC_ATTRIBUTE_NUM_REGS, func)); - - int shmem_static_nbytes; - CUDA_SAFECALL(cuFuncGetAttribute(&shmem_static_nbytes, - CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, func)); - - int binary_version; - CUDA_SAFECALL(cuFuncGetAttribute(&binary_version, - CU_FUNC_ATTRIBUTE_BINARY_VERSION, func)); + int nregs = + get_attr_with_kernel_fallback(func, CU_FUNC_ATTRIBUTE_NUM_REGS); + int shmem_static_nbytes = get_attr_with_kernel_fallback( + func, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES); + int binary_version = + get_attr_with_kernel_fallback(func, CU_FUNC_ATTRIBUTE_BINARY_VERSION); // Instrument the kernel if needed instrument_function_if_needed(ctx, func); From cfe1a924ebb84e248bf32159bdf3a9e10b336e2a Mon Sep 17 00:00:00 2001 From: William-An Date: Tue, 20 Jan 2026 21:01:33 -0500 Subject: [PATCH 2/2] format tracer tool code --- util/tracer_nvbit/tracer_tool/tracer_tool.cu | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/util/tracer_nvbit/tracer_tool/tracer_tool.cu b/util/tracer_nvbit/tracer_tool/tracer_tool.cu index 4dbef0e62..e0bef89bf 100644 --- a/util/tracer_nvbit/tracer_tool/tracer_tool.cu +++ b/util/tracer_nvbit/tracer_tool/tracer_tool.cu @@ -515,10 +515,9 @@ static void enter_kernel_launch(CUcontext ctx, CUfunction func, } // Get the number of registers and shared memory size for the kernel - int nregs = - get_attr_with_kernel_fallback(func, CU_FUNC_ATTRIBUTE_NUM_REGS); - int shmem_static_nbytes = get_attr_with_kernel_fallback( - func, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES); + int nregs = get_attr_with_kernel_fallback(func, CU_FUNC_ATTRIBUTE_NUM_REGS); + int shmem_static_nbytes = + get_attr_with_kernel_fallback(func, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES); int binary_version = get_attr_with_kernel_fallback(func, CU_FUNC_ATTRIBUTE_BINARY_VERSION);