jd-opensource · DragonFive · Dec 16, 2025 · Dec 19, 2025
diff --git a/xllm/api_service/rec_completion_service_impl.cpp b/xllm/api_service/rec_completion_service_impl.cpp
@@ -28,9 +28,7 @@ limitations under the License.
 #include "completion.pb.h"
 #include "core/distributed_runtime/llm_master.h"
 #include "core/distributed_runtime/rec_master.h"
-#include "core/framework/request/mm_data.h"
 #include "core/framework/request/request_output.h"
-#include "core/util/utils.h"
 
 #define likely(x) __builtin_expect(!!(x), 1)
 #define unlikely(x) __builtin_expect(!!(x), 0)
@@ -167,18 +165,15 @@ void RecCompletionServiceImpl::process_async_impl(
   }
 
   const auto& rpc_request_ref = call->request();
-  std::optional<MMData> mm_data = std::nullopt;
+  std::optional<std::vector<proto::InferInputTensor>> input_tensors =
+      std::nullopt;
   if (rpc_request_ref.input_tensors_size()) {
-    // HISTOGRAM_OBSERVE(rec_input_first_dim,
-    //                  rpc_request_ref.input_tensors(0).shape(0));
-
-    MMDict mm_dict;
+    std::vector<proto::InferInputTensor> tensors;
+    tensors.reserve(rpc_request_ref.input_tensors_size());
     for (int i = 0; i < rpc_request_ref.input_tensors_size(); ++i) {
-      const auto& tensor = rpc_request_ref.input_tensors(i);
-      mm_dict[tensor.name()] =
-          xllm::util::convert_rec_tensor_to_torch(tensor).to(torch::kBFloat16);
+      tensors.push_back(rpc_request_ref.input_tensors(i));
     }
-    mm_data = std::move(MMData(MMType::EMBEDDING, mm_dict));
+    input_tensors = std::move(tensors);
   }
 
   // schedule the request
@@ -187,7 +182,7 @@ void RecCompletionServiceImpl::process_async_impl(
   master_->handle_request(
       std::move(rpc_request_ref.prompt()),
       std::move(prompt_tokens),
-      std::move(mm_data),
+      std::move(input_tensors),
       std::move(request_params),
       [call,
        model,
@@ -219,4 +214,4 @@ void RecCompletionServiceImpl::process_async_impl(
       });
 }
 
-}  // namespace xllm
+}  // namespace xllm
diff --git a/xllm/core/common/CMakeLists.txt b/xllm/core/common/CMakeLists.txt
@@ -14,6 +14,7 @@ cc_library(
     $<$<BOOL:${USE_NPU}>:mspti_helper.h>
     options.h
     rate_limiter.h
+    rec_model_utils.h
     types.h
     device_monitor.h
     version_singleton.h
@@ -66,4 +67,3 @@ cc_test(
 target_link_libraries(common PRIVATE OpenSSL::SSL OpenSSL::Crypto protobuf::libprotobuf)
 add_dependencies(common brpc-static)
 
-
diff --git a/xllm/core/common/rec_model_utils.h b/xllm/core/common/rec_model_utils.h
@@ -0,0 +1,48 @@
+/* Copyright 2025 The xLLM Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://github.com/jd-opensource/xllm/blob/main/LICENSE
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#pragma once
+
+#include <cstdint>
+#include <string_view>
+
+namespace xllm {
+
+enum class RecModelKind : int8_t {
+  kNone = 0,
+  kOneRec = 1,
+  kLlmRec = 2,
+};
+
+inline constexpr bool is_onerec_model_type(std::string_view model_type) {
+  return model_type == "onerec";
+}
+
+inline constexpr bool is_llmrec_model_type(std::string_view model_type) {
+  return model_type == "qwen2" || model_type == "qwen3";
+}
+
+inline constexpr RecModelKind get_rec_model_kind(std::string_view model_type) {
+  if (is_onerec_model_type(model_type)) {
+    return RecModelKind::kOneRec;
+  }
+  if (is_llmrec_model_type(model_type)) {
+    return RecModelKind::kLlmRec;
+  }
+  return RecModelKind::kNone;
+}
+
+}  // namespace xllm
+
diff --git a/xllm/core/common/types.h b/xllm/core/common/types.h
@@ -292,4 +292,9 @@ struct EplbInfo {
 inline constexpr int REC_TOKEN_SIZE = 3;
 
 using RecTokenTriple = std::array<int32_t, REC_TOKEN_SIZE>;
+
+inline constexpr const char* LLM_REC_INPUT_TOKENS = "llm_rec_input_tokens";
+inline constexpr const char* LLM_REC_INPUT_INDICES = "llm_rec_input_indices";
+inline constexpr const char* LLM_REC_INPUT_EMBEDDING =
+    "llm_rec_input_embedding";
 }  // namespace xllm
diff --git a/xllm/core/distributed_runtime/master.cpp b/xllm/core/distributed_runtime/master.cpp
@@ -22,6 +22,7 @@ limitations under the License.
 #include <boost/algorithm/string.hpp>
 #include <csignal>
 #include <memory>
+#include <optional>
 #include <thread>
 #include <utility>
 
@@ -30,6 +31,7 @@ limitations under the License.
 #include "common/types.h"
 #include "dit_master.h"
 #include "framework/model/model_args.h"
+#include "framework/model_loader.h"
 #include "framework/request/request.h"
 #include "llm_engine.h"
 #include "llm_master.h"
@@ -237,8 +239,13 @@ Master::Master(const Options& options, EngineType type) : options_(options) {
     options_.enable_schedule_overlap(false);
     LOG(WARNING) << "Force to disable schedule overlap for REC model, not "
                     "supported yet.";
+
+    auto model_loader = ModelLoader::create(options_.model_path());
+    const std::string rec_model_type = model_loader->model_args().model_type();
+
     runtime::Options eng_options;
     eng_options.model_path(options_.model_path())
+        .rec_model_type(std::make_optional(rec_model_type))
         .devices(devices)
         .backend(options_.backend())
         .block_size(options_.block_size())