feat: standardize some C++ implementations.

magicheng0816 · magicheng0816 · commit bf356b0c173b · 2025-12-09T10:47:24.000+08:00
diff --git a/xllm/core/framework/sampling/constrained_decoding.h b/xllm/core/framework/sampling/constrained_decoding.h
@@ -24,12 +24,12 @@ namespace xllm {
 // conforms to specific formats or rules.
 class ConstrainedDecoding {
  public:
-  virtual ~ConstrainedDecoding();
+  virtual ~ConstrainedDecoding() = default;
 
   // Precompute and cache fixed constraint masks (e.g., static vocabulary
   // whitelists) to avoid redundant calculations during token generation.
   // Returns: true if cache built successfully, false otherwise
-  virtual bool build_mask_cache();
+  virtual bool build_mask_cache() = 0;
 
   // Generate dynamic constraint mask based on already generated token
   // sequences. This mask will be applied to filter invalid tokens.
@@ -42,6 +42,6 @@ class ConstrainedDecoding {
   // tokens for each sequence, the usage is to filter invalid tokens by adding
   // the mask to the model logits.
   virtual torch::Tensor generate_mask(
-      const std::vector<std::vector<int32_t>>& generated_token_list);
+      const std::vector<std::vector<int32_t>>& generated_token_list) = 0;
 };
 }  // namespace xllm
diff --git a/xllm/core/framework/sampling/rec_constrained_decoding.cpp b/xllm/core/framework/sampling/rec_constrained_decoding.cpp
@@ -33,20 +33,16 @@ limitations under the License.
 #include "util/tensor_helper.h"
 
 namespace xllm {
-
-constexpr float PRE_MASK_FACTOR = -10000.0f;
-constexpr int GEN_MASK_THREAD_NUM = 16;
-
 RecConstrainedDecoding::RecConstrainedDecoding(uint64_t model_version,
                                                const int32_t vocab_size,
                                                torch::ScalarType dtype,
                                                torch::Device device,
                                                bool use_gen_threadpool)
-    : model_version_(model_version),
+    : use_gen_threadpool_(use_gen_threadpool),
       vocab_size_(vocab_size),
-      dtype_(dtype),
+      model_version_(model_version),
       device_(device),
-      use_gen_threadpool_(use_gen_threadpool) {
+      dtype_(dtype) {
   if (use_gen_threadpool_) {
     gen_threadpool_ = std::make_unique<ThreadPool>(GEN_MASK_THREAD_NUM);
   }
diff --git a/xllm/core/framework/sampling/rec_constrained_decoding.h b/xllm/core/framework/sampling/rec_constrained_decoding.h
@@ -40,6 +40,10 @@ class RecConstrainedDecoding : public ConstrainedDecoding {
   torch::Tensor generate_decode_mask(
       const std::vector<std::vector<int32_t>>& generated_token_list);
 
+ private:
+  constexpr static float PRE_MASK_FACTOR = -10000.0f;
+  constexpr static int GEN_MASK_THREAD_NUM = 16;
+
  private:
   bool build_mask_cache_;
   bool use_gen_threadpool_;