diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 00000000000..d2508be30ba --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-05-04 - Inefficient list.pop(0) operations +**Learning:** Found multiple usages of `list.pop(0)` in the codebase, particularly in `fastdeploy/model_executor/entropy_utils.py` . `pop(0)` on a list is an O(N) operation because it requires shifting all subsequent elements. For queues or iterating over elements sequentially, using an index or `collections.deque` is much faster. +**Action:** Replace O(N) `pop(0)` operations with O(1) index tracking or `collections.deque` where appropriate to optimize execution performance. diff --git a/fastdeploy/model_executor/entropy_utils.py b/fastdeploy/model_executor/entropy_utils.py index 21d1b3421e9..e97a38d39a9 100644 --- a/fastdeploy/model_executor/entropy_utils.py +++ b/fastdeploy/model_executor/entropy_utils.py @@ -49,9 +49,11 @@ def calculate_logits_entropy(logits, share_inputs, temperature): entropy_tensor = get_entropy(logits) entropy = entropy_tensor.tolist() + entropy_idx = 0 for i in range(real_bsz): for _ in range(real_seq_lens[i]): - share_inputs["entropy_list"][i].append(entropy.pop(0)) + share_inputs["entropy_list"][i].append(entropy[entropy_idx]) + entropy_idx += 1 if ( share_inputs["stop_flags"][i] and share_inputs["seq_lens_decoder"][i] != 0 @@ -92,9 +94,11 @@ def speculate_calculate_logits_entropy(logits, share_inputs, temperature): entropy_tensor = get_entropy(accepted_logits) entropy = entropy_tensor.tolist() + entropy_idx = 0 for i in range(real_bsz): for _ in range(share_inputs["accept_num"][i]): - share_inputs["entropy_list"][i].append(entropy.pop(0)) + share_inputs["entropy_list"][i].append(entropy[entropy_idx]) + entropy_idx += 1 if ( share_inputs["stop_flags"][i] and share_inputs["seq_lens_decoder"][i] != 0