Skip to content

Commit 2c2e7ab

Browse files
authored
bugfix: correct total_num_rows calculation in batch_prefill for accurate tensor indexing. (#504)
1 parent e4916ec commit 2c2e7ab

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

xllm/core/kernels/cuda/batch_prefill.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ void batch_prefill(torch::Tensor float_workspace_buffer,
4747
torch::Tensor kv_cu_seq_lens_host = kv_cu_seq_lens.to(torch::kCPU);
4848
torch::Tensor kv_len_arr_host =
4949
kv_cu_seq_lens_host.slice(0, 1) - kv_cu_seq_lens_host.slice(0, 0, -1);
50-
const int64_t total_num_rows = qo_indptr_host.size(0);
50+
const int64_t total_num_rows = qo_indptr_host[-1].item<int64_t>();
5151
const int64_t batch_size = qo_indptr_host.size(0) - 1;
5252

5353
auto plan_info = FunctionFactory::get_instance().prefill_plan_func(uri).call(

0 commit comments

Comments
 (0)