Skip to content

Commit bf356b0

Browse files
committed
feat: standardize some C++ implementations.
1 parent 5a322eb commit bf356b0

File tree

3 files changed

+10
-10
lines changed

3 files changed

+10
-10
lines changed

xllm/core/framework/sampling/constrained_decoding.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ namespace xllm {
2424
// conforms to specific formats or rules.
2525
class ConstrainedDecoding {
2626
public:
27-
virtual ~ConstrainedDecoding();
27+
virtual ~ConstrainedDecoding() = default;
2828

2929
// Precompute and cache fixed constraint masks (e.g., static vocabulary
3030
// whitelists) to avoid redundant calculations during token generation.
3131
// Returns: true if cache built successfully, false otherwise
32-
virtual bool build_mask_cache();
32+
virtual bool build_mask_cache() = 0;
3333

3434
// Generate dynamic constraint mask based on already generated token
3535
// sequences. This mask will be applied to filter invalid tokens.
@@ -42,6 +42,6 @@ class ConstrainedDecoding {
4242
// tokens for each sequence, the usage is to filter invalid tokens by adding
4343
// the mask to the model logits.
4444
virtual torch::Tensor generate_mask(
45-
const std::vector<std::vector<int32_t>>& generated_token_list);
45+
const std::vector<std::vector<int32_t>>& generated_token_list) = 0;
4646
};
4747
} // namespace xllm

xllm/core/framework/sampling/rec_constrained_decoding.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,16 @@ limitations under the License.
3333
#include "util/tensor_helper.h"
3434

3535
namespace xllm {
36-
37-
constexpr float PRE_MASK_FACTOR = -10000.0f;
38-
constexpr int GEN_MASK_THREAD_NUM = 16;
39-
4036
RecConstrainedDecoding::RecConstrainedDecoding(uint64_t model_version,
4137
const int32_t vocab_size,
4238
torch::ScalarType dtype,
4339
torch::Device device,
4440
bool use_gen_threadpool)
45-
: model_version_(model_version),
41+
: use_gen_threadpool_(use_gen_threadpool),
4642
vocab_size_(vocab_size),
47-
dtype_(dtype),
43+
model_version_(model_version),
4844
device_(device),
49-
use_gen_threadpool_(use_gen_threadpool) {
45+
dtype_(dtype) {
5046
if (use_gen_threadpool_) {
5147
gen_threadpool_ = std::make_unique<ThreadPool>(GEN_MASK_THREAD_NUM);
5248
}

xllm/core/framework/sampling/rec_constrained_decoding.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ class RecConstrainedDecoding : public ConstrainedDecoding {
4040
torch::Tensor generate_decode_mask(
4141
const std::vector<std::vector<int32_t>>& generated_token_list);
4242

43+
private:
44+
constexpr static float PRE_MASK_FACTOR = -10000.0f;
45+
constexpr static int GEN_MASK_THREAD_NUM = 16;
46+
4347
private:
4448
bool build_mask_cache_;
4549
bool use_gen_threadpool_;

0 commit comments

Comments
 (0)