@@ -34,6 +34,8 @@ limitations under the License.
3434#include " llm_engine.h"
3535#include " llm_master.h"
3636#include " models/model_registry.h"
37+ #include " rec_engine.h"
38+ #include " rec_master.h"
3739#include " speculative_engine.h"
3840#include " util/device_name_utils.h"
3941#include " util/scope_guard.h"
@@ -231,6 +233,35 @@ Master::Master(const Options& options, EngineType type) : options_(options) {
231233 eng_options.device_ip (options_.device_ip ().value ());
232234 }
233235 engine_ = std::make_unique<LLMEngine>(eng_options);
236+ } else if (type == EngineType::REC) {
237+ options_.enable_schedule_overlap (false );
238+ LOG (WARNING) << " Force to disable schedule overlap for REC model, not "
239+ " supported yet." ;
240+ runtime::Options eng_options;
241+ eng_options.model_path (options_.model_path ())
242+ .devices (devices)
243+ .backend (options_.backend ())
244+ .block_size (options_.block_size ())
245+ .max_cache_size (options_.max_cache_size ())
246+ .max_memory_utilization (options_.max_memory_utilization ())
247+ .enable_prefix_cache (options_.enable_prefix_cache ())
248+ .task_type (options_.task_type ())
249+ .enable_chunked_prefill (options_.enable_chunked_prefill ())
250+ .enable_offline_inference (options_.enable_offline_inference ())
251+ .spawn_worker_path (options_.spawn_worker_path ())
252+ .enable_shm (options_.enable_shm ())
253+ .is_local (options_.is_local ())
254+ .enable_schedule_overlap (options_.enable_schedule_overlap ())
255+ .master_node_addr (options_.master_node_addr ())
256+ .nnodes (options_.nnodes ())
257+ .node_rank (options_.node_rank ())
258+ .dp_size (options_.dp_size ())
259+ .ep_size (options_.ep_size ())
260+ .max_seqs_per_batch (options_.max_seqs_per_batch ())
261+ .max_tokens_per_chunk_for_prefill (
262+ options_.max_tokens_per_chunk_for_prefill ());
263+
264+ engine_ = std::make_unique<RecEngine>(eng_options);
234265 } else {
235266 LOG (WARNING) << " Not supported llm engine type: "
236267 << static_cast <size_t >(type);
@@ -246,6 +277,9 @@ std::unique_ptr<Master> create_master(const std::string& backend,
246277 } else if (backend == " dit" ) {
247278 LOG (INFO) << " creating dit master" ;
248279 return std::make_unique<DiTMaster>(options);
280+ } else if (backend == " rec" ) {
281+ LOG (INFO) << " creating rec master" ;
282+ return std::make_unique<RecMaster>(options);
249283 } else {
250284 LOG (FATAL) << " Failed to create master, backend is" << backend;
251285 return nullptr ;
0 commit comments