From c7ce1a126bfba3814f97c05d1f04c8882962973a Mon Sep 17 00:00:00 2001 From: Siddharth Tyagi Date: Tue, 15 Jul 2025 14:59:22 +0530 Subject: [PATCH] Increased searchLength and updated APPLY_PRIOR_TO_LAYERS Signed-off-by: Siddharth Tyagi --- cpp/tensorrt_llm/batch_manager/runtimeBuffers.cpp | 2 +- tensorrt_llm/models/t5tts/model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tensorrt_llm/batch_manager/runtimeBuffers.cpp b/cpp/tensorrt_llm/batch_manager/runtimeBuffers.cpp index 05a30f4f3c9..2b1dc4f5b78 100644 --- a/cpp/tensorrt_llm/batch_manager/runtimeBuffers.cpp +++ b/cpp/tensorrt_llm/batch_manager/runtimeBuffers.cpp @@ -1029,7 +1029,7 @@ void RuntimeBuffers::setAttentionPriorIdx( } // create a cpu buffer for scores to find max score in - SizeType32 searchLength = 5; + SizeType32 searchLength = 15; auto const& manager = runtime.getBufferManager(); auto const& stream = runtime.getStream(); auto scoresHost = manager.cpu(ITensor::makeShape({searchLength}), scores->getDataType()); diff --git a/tensorrt_llm/models/t5tts/model.py b/tensorrt_llm/models/t5tts/model.py index 5a193c8e1e5..1f815b0c930 100644 --- a/tensorrt_llm/models/t5tts/model.py +++ b/tensorrt_llm/models/t5tts/model.py @@ -52,7 +52,7 @@ } COMPUTE_SCORES_FROM_LAYERS = [4,6,10] -APPLY_PRIOR_TO_LAYERS = [4,5,6,7,8,9,10,11] +APPLY_PRIOR_TO_LAYERS = [4,6,10] class PositionwiseConvFF(Module):