From 24f2401d7792cf846db34ff97562dad9b2ac4e3b Mon Sep 17 00:00:00 2001 From: preflight Date: Tue, 26 May 2026 22:49:01 -0400 Subject: [PATCH] fix https://github.com/NVIDIA/TensorRT-Edge-LLM/issues/87: hard-error instead of silent return when CuTe DSL GEMM is not compiled --- cpp/kernels/talkerMLPKernels/talkerMLPKernels.cu | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cpp/kernels/talkerMLPKernels/talkerMLPKernels.cu b/cpp/kernels/talkerMLPKernels/talkerMLPKernels.cu index 60056ae..a1b27fa 100644 --- a/cpp/kernels/talkerMLPKernels/talkerMLPKernels.cu +++ b/cpp/kernels/talkerMLPKernels/talkerMLPKernels.cu @@ -338,8 +338,10 @@ void invokeTalkerMLP(rt::Tensor const& input, rt::Tensor const& fc1Weight, rt::T return; } #else - LOG_ERROR("CuTe DSL GEMM not compiled. Rebuild with -DENABLE_CUTE_DSL=gemm (or ALL)."); - return; + ELLM_CHECK(false, + "invokeTalkerMLP requires CuTe DSL GEMM. Rebuild with -DENABLE_CUTE_DSL=gemm (or ALL); " + "without it the launcher returns without writing the output tensor and downstream " + "sampling consumes uninitialised GPU memory, producing wrong-but-not-crashing audio."); #endif } @@ -382,8 +384,10 @@ void invokeLinearLayer( return; } #else - LOG_ERROR("CuTe DSL GEMM not compiled. Rebuild with -DENABLE_CUTE_DSL=gemm (or ALL)."); - return; + ELLM_CHECK(false, + "invokeLinearLayer requires CuTe DSL GEMM. Rebuild with -DENABLE_CUTE_DSL=gemm (or ALL); " + "without it the launcher returns without writing the output tensor and downstream " + "consumers see uninitialised GPU memory."); #endif }