Skip to content

Commit 007912d

Browse files
authored
bugfix: fix core dump of large beam width. (#488)
1 parent a8669af commit 007912d

File tree

4 files changed

+15
-13
lines changed

4 files changed

+15
-13
lines changed

xllm/core/framework/block/block_manager_pool.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,7 @@ bool BlockManagerPool::allocate(Sequence* sequence, size_t num_tokens) {
216216
const size_t block_size = options_.block_size();
217217
const size_t num_blocks_needed = (num_tokens + block_size - 1) / block_size;
218218
if (num_blocks_needed <= num_blocks) {
219-
process_beam_search(sequence, /*need_swap*/ true);
220-
return true;
219+
return process_beam_search(sequence, /*need_swap*/ true);
221220
}
222221
process_beam_search(sequence);
223222

@@ -263,27 +262,31 @@ std::vector<Block> BlockManagerPool::allocate(size_t num_tokens,
263262
return block_managers_[dp_rank]->allocate(num_blocks_needed);
264263
}
265264

266-
void BlockManagerPool::process_beam_search(Sequence* sequence, bool need_swap) {
265+
bool BlockManagerPool::process_beam_search(Sequence* sequence, bool need_swap) {
267266
if (!sequence->check_beam_search()) {
268-
return;
267+
return true;
269268
}
270269

271270
auto src_blocks = sequence->kv_state().src_blocks();
272271
if (src_blocks.size() == 0) {
273-
return;
272+
return true;
274273
}
275274

276275
// when sequence need to swap the last block and no new block appended,
277276
// allocate a new block for this sequence
278277
if (need_swap && sequence->kv_state().need_swap()) {
279278
int32_t dp_rank = get_dp_rank(sequence);
280279
auto new_blocks = block_managers_[dp_rank]->allocate(1);
280+
if (new_blocks.size() == 0) {
281+
return false;
282+
}
281283
swap_block_transfer_infos_[dp_rank].emplace_back(src_blocks.back().id(),
282284
new_blocks[0].id());
283-
sequence->kv_state().process_beam_search(new_blocks);
285+
sequence->kv_state().process_beam_search(new_blocks[0]);
284286
} else {
285-
sequence->kv_state().process_beam_search({});
287+
sequence->kv_state().process_beam_search(std::nullopt);
286288
}
289+
return true;
287290
}
288291

289292
uint32_t BlockManagerPool::pre_allocate(Sequence* sequence) {

xllm/core/framework/block/block_manager_pool.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class BlockManagerPool final : public KVCacheManager {
9191
void allocate_host_shared(Sequence* sequence);
9292
void save_offload_blocks(Sequence* sequence);
9393

94-
void process_beam_search(Sequence* sequence, bool need_swap = false);
94+
bool process_beam_search(Sequence* sequence, bool need_swap = false);
9595

9696
private:
9797
std::vector<std::unique_ptr<BlockManager>> block_managers_;

xllm/core/framework/request/sequence_kv_state.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,13 @@ void KVCacheState::reset() {
143143
transfer_kv_info_.reset();
144144
}
145145

146-
void KVCacheState::process_beam_search(const std::vector<Block>& new_blocks) {
146+
void KVCacheState::process_beam_search(std::optional<Block> new_block) {
147147
blocks_.clear();
148148
blocks_ = std::move(src_blocks_);
149149

150-
if (!new_blocks.empty()) {
151-
CHECK_EQ(new_blocks.size(), 1);
150+
if (new_block.has_value()) {
152151
blocks_.pop_back();
153-
blocks_.insert(blocks_.end(), new_blocks.begin(), new_blocks.end());
152+
blocks_.emplace_back(new_block.value());
154153
}
155154
}
156155

xllm/core/framework/request/sequence_kv_state.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class KVCacheState {
6262

6363
void reset();
6464

65-
void process_beam_search(const std::vector<Block>& new_blocks);
65+
void process_beam_search(std::optional<Block> new_block = std::nullopt);
6666

6767
private:
6868
// number of tokens in kv cache

0 commit comments

Comments
 (0)