@@ -28,9 +28,7 @@ limitations under the License.
2828#include " completion.pb.h"
2929#include " core/distributed_runtime/llm_master.h"
3030#include " core/distributed_runtime/rec_master.h"
31- #include " core/framework/request/mm_data.h"
3231#include " core/framework/request/request_output.h"
33- #include " core/util/utils.h"
3432
3533#define likely (x ) __builtin_expect(!!(x), 1 )
3634#define unlikely (x ) __builtin_expect(!!(x), 0 )
@@ -167,18 +165,15 @@ void RecCompletionServiceImpl::process_async_impl(
167165 }
168166
169167 const auto & rpc_request_ref = call->request ();
170- std::optional<MMData> mm_data = std::nullopt ;
168+ std::optional<std::vector<proto::InferInputTensor>> input_tensors =
169+ std::nullopt ;
171170 if (rpc_request_ref.input_tensors_size ()) {
172- // HISTOGRAM_OBSERVE(rec_input_first_dim,
173- // rpc_request_ref.input_tensors(0).shape(0));
174-
175- MMDict mm_dict;
171+ std::vector<proto::InferInputTensor> tensors;
172+ tensors.reserve (rpc_request_ref.input_tensors_size ());
176173 for (int i = 0 ; i < rpc_request_ref.input_tensors_size (); ++i) {
177- const auto & tensor = rpc_request_ref.input_tensors (i);
178- mm_dict[tensor.name ()] =
179- xllm::util::convert_rec_tensor_to_torch (tensor).to (torch::kBFloat16 );
174+ tensors.push_back (rpc_request_ref.input_tensors (i));
180175 }
181- mm_data = std::move (MMData (MMType::EMBEDDING, mm_dict) );
176+ input_tensors = std::move (tensors );
182177 }
183178
184179 // schedule the request
@@ -187,7 +182,7 @@ void RecCompletionServiceImpl::process_async_impl(
187182 master_->handle_request (
188183 std::move (rpc_request_ref.prompt ()),
189184 std::move (prompt_tokens),
190- std::move (mm_data ),
185+ std::move (input_tensors ),
191186 std::move (request_params),
192187 [call,
193188 model,
@@ -219,4 +214,4 @@ void RecCompletionServiceImpl::process_async_impl(
219214 });
220215}
221216
222- } // namespace xllm
217+ } // namespace xllm
0 commit comments