Skip to content

Commit ea71989

Browse files
committed
feat: add rec_type and onerec batch input builder.
1 parent 31a9e30 commit ea71989

File tree

12 files changed

+1816
-86
lines changed

12 files changed

+1816
-86
lines changed

xllm/api_service/rec_completion_service_impl.cpp

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,7 @@ limitations under the License.
2828
#include "completion.pb.h"
2929
#include "core/distributed_runtime/llm_master.h"
3030
#include "core/distributed_runtime/rec_master.h"
31-
#include "core/framework/request/mm_data.h"
3231
#include "core/framework/request/request_output.h"
33-
#include "core/util/utils.h"
3432

3533
#define likely(x) __builtin_expect(!!(x), 1)
3634
#define unlikely(x) __builtin_expect(!!(x), 0)
@@ -167,18 +165,15 @@ void RecCompletionServiceImpl::process_async_impl(
167165
}
168166

169167
const auto& rpc_request_ref = call->request();
170-
std::optional<MMData> mm_data = std::nullopt;
168+
std::optional<std::vector<proto::InferInputTensor>> input_tensors =
169+
std::nullopt;
171170
if (rpc_request_ref.input_tensors_size()) {
172-
// HISTOGRAM_OBSERVE(rec_input_first_dim,
173-
// rpc_request_ref.input_tensors(0).shape(0));
174-
175-
MMDict mm_dict;
171+
std::vector<proto::InferInputTensor> tensors;
172+
tensors.reserve(rpc_request_ref.input_tensors_size());
176173
for (int i = 0; i < rpc_request_ref.input_tensors_size(); ++i) {
177-
const auto& tensor = rpc_request_ref.input_tensors(i);
178-
mm_dict[tensor.name()] =
179-
xllm::util::convert_rec_tensor_to_torch(tensor).to(torch::kBFloat16);
174+
tensors.push_back(rpc_request_ref.input_tensors(i));
180175
}
181-
mm_data = std::move(MMData(MMType::EMBEDDING, mm_dict));
176+
input_tensors = std::move(tensors);
182177
}
183178

184179
// schedule the request
@@ -187,7 +182,7 @@ void RecCompletionServiceImpl::process_async_impl(
187182
master_->handle_request(
188183
std::move(rpc_request_ref.prompt()),
189184
std::move(prompt_tokens),
190-
std::move(mm_data),
185+
std::move(input_tensors),
191186
std::move(request_params),
192187
[call,
193188
model,
@@ -219,4 +214,4 @@ void RecCompletionServiceImpl::process_async_impl(
219214
});
220215
}
221216

222-
} // namespace xllm
217+
} // namespace xllm

xllm/core/common/types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,4 +292,9 @@ struct EplbInfo {
292292
inline constexpr int REC_TOKEN_SIZE = 3;
293293

294294
using RecTokenTriple = std::array<int32_t, REC_TOKEN_SIZE>;
295+
296+
inline constexpr const char* LLM_REC_INPUT_TOKENS = "llm_rec_input_tokens";
297+
inline constexpr const char* LLM_REC_INPUT_INDICES = "llm_rec_input_indices";
298+
inline constexpr const char* LLM_REC_INPUT_EMBEDDING =
299+
"llm_rec_input_embedding";
295300
} // namespace xllm

0 commit comments

Comments
 (0)