Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions xllm/api_service/rec_completion_service_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@ limitations under the License.
#include "completion.pb.h"
#include "core/distributed_runtime/llm_master.h"
#include "core/distributed_runtime/rec_master.h"
#include "core/framework/request/mm_data.h"
#include "core/framework/request/request_output.h"
#include "core/util/utils.h"

#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
Expand Down Expand Up @@ -167,18 +165,15 @@ void RecCompletionServiceImpl::process_async_impl(
}

const auto& rpc_request_ref = call->request();
std::optional<MMData> mm_data = std::nullopt;
std::optional<std::vector<proto::InferInputTensor>> input_tensors =
std::nullopt;
if (rpc_request_ref.input_tensors_size()) {
// HISTOGRAM_OBSERVE(rec_input_first_dim,
// rpc_request_ref.input_tensors(0).shape(0));

MMDict mm_dict;
std::vector<proto::InferInputTensor> tensors;
tensors.reserve(rpc_request_ref.input_tensors_size());
for (int i = 0; i < rpc_request_ref.input_tensors_size(); ++i) {
const auto& tensor = rpc_request_ref.input_tensors(i);
mm_dict[tensor.name()] =
xllm::util::convert_rec_tensor_to_torch(tensor).to(torch::kBFloat16);
tensors.push_back(rpc_request_ref.input_tensors(i));
}
mm_data = std::move(MMData(MMType::EMBEDDING, mm_dict));
input_tensors = std::move(tensors);
}

// schedule the request
Expand All @@ -187,7 +182,7 @@ void RecCompletionServiceImpl::process_async_impl(
master_->handle_request(
std::move(rpc_request_ref.prompt()),
std::move(prompt_tokens),
std::move(mm_data),
std::move(input_tensors),
std::move(request_params),
[call,
model,
Expand Down Expand Up @@ -219,4 +214,4 @@ void RecCompletionServiceImpl::process_async_impl(
});
}

} // namespace xllm
} // namespace xllm
5 changes: 5 additions & 0 deletions xllm/core/common/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,4 +292,9 @@ struct EplbInfo {
inline constexpr int REC_TOKEN_SIZE = 3;

using RecTokenTriple = std::array<int32_t, REC_TOKEN_SIZE>;

inline constexpr const char* LLM_REC_INPUT_TOKENS = "llm_rec_input_tokens";
inline constexpr const char* LLM_REC_INPUT_INDICES = "llm_rec_input_indices";
inline constexpr const char* LLM_REC_INPUT_EMBEDDING =
"llm_rec_input_embedding";
} // namespace xllm
Loading
Loading