File tree Expand file tree Collapse file tree 3 files changed +10
-10
lines changed
xllm/core/framework/sampling Expand file tree Collapse file tree 3 files changed +10
-10
lines changed Original file line number Diff line number Diff line change @@ -24,12 +24,12 @@ namespace xllm {
2424// conforms to specific formats or rules.
2525class ConstrainedDecoding {
2626 public:
27- virtual ~ConstrainedDecoding ();
27+ virtual ~ConstrainedDecoding () = default ;
2828
2929 // Precompute and cache fixed constraint masks (e.g., static vocabulary
3030 // whitelists) to avoid redundant calculations during token generation.
3131 // Returns: true if cache built successfully, false otherwise
32- virtual bool build_mask_cache ();
32+ virtual bool build_mask_cache () = 0 ;
3333
3434 // Generate dynamic constraint mask based on already generated token
3535 // sequences. This mask will be applied to filter invalid tokens.
@@ -42,6 +42,6 @@ class ConstrainedDecoding {
4242 // tokens for each sequence, the usage is to filter invalid tokens by adding
4343 // the mask to the model logits.
4444 virtual torch::Tensor generate_mask (
45- const std::vector<std::vector<int32_t >>& generated_token_list);
45+ const std::vector<std::vector<int32_t >>& generated_token_list) = 0 ;
4646};
4747} // namespace xllm
Original file line number Diff line number Diff line change @@ -33,20 +33,16 @@ limitations under the License.
3333#include " util/tensor_helper.h"
3434
3535namespace xllm {
36-
37- constexpr float PRE_MASK_FACTOR = -10000 .0f ;
38- constexpr int GEN_MASK_THREAD_NUM = 16 ;
39-
4036RecConstrainedDecoding::RecConstrainedDecoding (uint64_t model_version,
4137 const int32_t vocab_size,
4238 torch::ScalarType dtype,
4339 torch::Device device,
4440 bool use_gen_threadpool)
45- : model_version_(model_version ),
41+ : use_gen_threadpool_(use_gen_threadpool ),
4642 vocab_size_ (vocab_size),
47- dtype_(dtype ),
43+ model_version_(model_version ),
4844 device_(device),
49- use_gen_threadpool_(use_gen_threadpool ) {
45+ dtype_(dtype ) {
5046 if (use_gen_threadpool_) {
5147 gen_threadpool_ = std::make_unique<ThreadPool>(GEN_MASK_THREAD_NUM);
5248 }
Original file line number Diff line number Diff line change @@ -40,6 +40,10 @@ class RecConstrainedDecoding : public ConstrainedDecoding {
4040 torch::Tensor generate_decode_mask (
4141 const std::vector<std::vector<int32_t >>& generated_token_list);
4242
43+ private:
44+ constexpr static float PRE_MASK_FACTOR = -10000 .0f ;
45+ constexpr static int GEN_MASK_THREAD_NUM = 16 ;
46+
4347 private:
4448 bool build_mask_cache_;
4549 bool use_gen_threadpool_;
You can’t perform that action at this time.
0 commit comments