PaddlePaddle · co63oc · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/backends/mlu/kernels/funcs/elementwise_utils.h b/backends/mlu/kernels/funcs/elementwise_utils.h
@@ -120,11 +120,11 @@ inline void GetReduceAxesAndDstDims(const int axis,
 
 template <typename T>
 void MLUOpTensorKernel(const Context& dev_ctx,
-                       const phi::DenseTensor& x,
-                       const phi::DenseTensor& y,
+                       const DenseTensor& x,
+                       const DenseTensor& y,
                        int axis,
                        const cnnlOpTensorDesc_t op_tensor_type,
-                       phi::DenseTensor* out) {
+                       DenseTensor* out) {
   PADDLE_ENFORCE_EQ((op_tensor_type == CNNL_OP_TENSOR_ADD) ||
                         (op_tensor_type == CNNL_OP_TENSOR_SUB) ||
                         (op_tensor_type == CNNL_OP_TENSOR_MUL),
@@ -241,10 +241,10 @@ inline void MLUBinary<POW>(const Context& dev_ctx,
 
 template <BINARY_FUNCTOR Functor, typename T>
 void MLUBinaryOp(const Context& dev_ctx,
-                 const phi::DenseTensor& x,
-                 const phi::DenseTensor& y,
+                 const DenseTensor& x,
+                 const DenseTensor& y,
                  int axis,
-                 phi::DenseTensor* out) {
+                 DenseTensor* out) {
   dev_ctx.template Alloc<T>(out);
   Tensor x_t, y_t;
   x_t = x;
@@ -319,8 +319,8 @@ inline void MLUUnary<RECIPROCAL>(const Context& dev_ctx,
 
 template <UNARY_FUNCTOR Functor, typename Tin, typename Tout = Tin>
 void MLUUnaryOp(const Context& dev_ctx,
-                const phi::DenseTensor& x,
-                phi::DenseTensor* out) {
+                const DenseTensor& x,
+                DenseTensor* out) {
   dev_ctx.template Alloc<Tout>(out);
 
   MLUCnnlTensorDesc x_desc(x, CNNL_LAYOUT_ARRAY, ToCnnlDataType<Tin>());
@@ -342,12 +342,12 @@ enum MINMAX_GRAD_FUNCTOR {
 };
 template <MINMAX_GRAD_FUNCTOR Functor, typename Tin, typename Tout = Tin>
 void MLUMinMaxGradHelper(const Context& dev_ctx,
-                         const phi::DenseTensor& x,
-                         const phi::DenseTensor& y,
-                         const phi::DenseTensor& dout,
+                         const DenseTensor& x,
+                         const DenseTensor& y,
+                         const DenseTensor& dout,
                          int axis,
-                         phi::DenseTensor* dx,
-                         phi::DenseTensor* dy) {
+                         DenseTensor* dx,
+                         DenseTensor* dy) {
   const auto& x_dims = x.dims();
   const auto& y_dims = y.dims();
   axis =

diff --git a/backends/mlu/kernels/funcs/logic_op.h b/backends/mlu/kernels/funcs/logic_op.h
@@ -21,10 +21,10 @@ namespace custom_kernel {
 
 template <typename Context>
 void MLULogicOp(const Context& dev_ctx,
-                const phi::DenseTensor& x,
-                const phi::DenseTensor& y,
+                const DenseTensor& x,
+                const DenseTensor& y,
                 const std::string& logic_name,
-                phi::DenseTensor* out) {
+                DenseTensor* out) {
   dev_ctx.template Alloc<bool>(out);
 
   MLUCnnlTensorDesc input_x(x, CNNL_LAYOUT_ARRAY, ToCnnlDataType(x.dtype()));

diff --git a/backends/mlu/kernels/funcs/mlu_baseop.h b/backends/mlu/kernels/funcs/mlu_baseop.h
@@ -25,6 +25,10 @@
 namespace custom_kernel {
 
 using Tensor = phi::DenseTensor;
+using DenseTensor = phi::DenseTensor;
+using DenseTensorMeta = phi::DenseTensorMeta;
+using Scalar = phi::Scalar;
+using DDim = phi::DDim;
 using Context = phi::CustomContext;
 using DataType = phi::DataType;
 using DataLayout = phi::DataLayout;

diff --git a/backends/mlu/kernels/funcs/mlu_funcs.h b/backends/mlu/kernels/funcs/mlu_funcs.h
@@ -25,9 +25,9 @@ namespace custom_kernel {
  */
 template <typename Context>
 inline void TensorCopy(const Context& dev_ctx,
-                       const phi::DenseTensor& src,
+                       const DenseTensor& src,
                        bool blocking,
-                       phi::DenseTensor* dst,
+                       DenseTensor* dst,
                        const phi::Place& dst_place = phi::CustomPlace()) {
   dev_ctx.Wait();
   auto* src_ptr = src.data();
@@ -103,7 +103,7 @@ template <typename T>
 inline void TensorFromVector(const phi::CustomContext& ctx,
                              const std::vector<T>& src,
                              const phi::CustomContext& dev_ctx,
-                             phi::DenseTensor* dst) {
+                             DenseTensor* dst) {
   auto dst_place = dev_ctx.GetPlace();
   C_Device_st device{dst_place.GetDeviceId()};
   auto src_ptr = static_cast<const void*>(src.data());
@@ -128,7 +128,7 @@ template <>
 inline void TensorFromVector<bool>(const phi::CustomContext& ctx,
                                    const std::vector<bool>& src,
                                    const phi::CustomContext& dev_ctx,
-                                   phi::DenseTensor* dst) {
+                                   DenseTensor* dst) {
   // vector<bool> has no data() member, use array instead.
   // See details:
   // https://stackoverflow.com/questions/46115669/why-does-stdvectorbool-have-no-data/46115714
@@ -166,7 +166,7 @@ template <typename T>
 inline void TensorFromVector(const phi::CustomContext& ctx,
                              const std::vector<T>& src,
                              const phi::CPUContext& dev_ctx,
-                             phi::DenseTensor* dst) {
+                             DenseTensor* dst) {
   auto dst_place = dev_ctx.GetPlace();
   C_Device_st device{dst_place.GetDeviceId()};
   auto src_ptr = static_cast<const void*>(src.data());
@@ -191,7 +191,7 @@ template <>
 inline void TensorFromVector<bool>(const phi::CustomContext& ctx,
                                    const std::vector<bool>& src,
                                    const phi::CPUContext& dev_ctx,
-                                   phi::DenseTensor* dst) {
+                                   DenseTensor* dst) {
   auto dst_place = dev_ctx.GetPlace();
   PADDLE_THROW(phi::errors::Unimplemented(
       "TensorFromVector on %s is not supported.", dst_place));
@@ -202,7 +202,7 @@ void TensorFromArray(const phi::CustomContext& ctx,
                      const T* src,
                      const size_t& array_size,
                      const phi::CustomContext& dev_ctx,
-                     phi::DenseTensor* dst) {
+                     DenseTensor* dst) {
   auto dst_place = dev_ctx.GetPlace();
   C_Device_st device{dst_place.GetDeviceId()};
   auto src_ptr = static_cast<const void*>(src);
@@ -227,7 +227,7 @@ void TensorFromArray(const phi::CustomContext& ctx,
  */
 template <typename T>
 inline void TensorToVector(const phi::CustomContext& ctx,
-                           const phi::DenseTensor& src,
+                           const DenseTensor& src,
                            const phi::CustomContext& dev_ctx,
                            std::vector<T>* dst) {
   auto src_ptr = static_cast<const void*>(src.data<T>());
@@ -251,7 +251,7 @@ inline void TensorToVector(const phi::CustomContext& ctx,
 
 template <>
 inline void TensorToVector<bool>(const phi::CustomContext& ctx,
-                                 const phi::DenseTensor& src,
+                                 const DenseTensor& src,
                                  const phi::CustomContext& dev_ctx,
                                  std::vector<bool>* dst) {
   auto src_ptr = static_cast<const void*>(src.data<bool>());
@@ -359,11 +359,10 @@ inline void ExtractNCDWH(const phi::DDim& dims,
 
 template <typename T>
 inline std::vector<T> get_new_data_from_tensor(
-    const phi::CustomContext& dev_ctx,
-    const phi::DenseTensor* new_data_tensor) {
+    const phi::CustomContext& dev_ctx, const DenseTensor* new_data_tensor) {
   std::vector<T> vec_new_data;
   auto place = new_data_tensor->place();
-  phi::DenseTensor cpu_starts_tensor;
+  DenseTensor cpu_starts_tensor;
   if (place.GetType() == phi::AllocationType::CUSTOM) {
     // if tensor on CUSTOM place, do memcpy to host
     cpu_starts_tensor.Resize(new_data_tensor->dims());
@@ -381,22 +380,21 @@ inline std::vector<T> get_new_data_from_tensor(
 }
 
 template <typename T>
-inline phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src,
-                                        T num_col_dims) {
+inline DenseTensor ReshapeToMatrix(const DenseTensor& src, T num_col_dims) {
   int rank = src.dims().size();
   PADDLE_ENFORCE_GE(
       rank,
       2,
       phi::errors::InvalidArgument(
           "'ReshapeToMatrix()' is only used for flatten high rank "
-          "tensors to matrixs. The dimensions of phi::DenseTensor must be "
+          "tensors to matrixs. The dimensions of DenseTensor must be "
           "greater or equal than 2. "
-          "But received dimensions of phi::DenseTensor is %d",
+          "But received dimensions of DenseTensor is %d",
           rank));
   if (rank == 2) {
     return src;
   }
-  phi::DenseTensor res;
+  DenseTensor res;
   res = src;
   res.Resize(phi::flatten_to_2d(src.dims(), num_col_dims));
   return res;

diff --git a/backends/mlu/kernels/funcs/range_op.h b/backends/mlu/kernels/funcs/range_op.h
@@ -24,7 +24,7 @@ void ArangeRawKernel(const Context& dev_ctx,
                      const T start_value,
                      const T end_value,
                      const T step_value,
-                     phi::DenseTensor* out) {
+                     DenseTensor* out) {
   int64_t size = 0;
   GetSize(start_value, end_value, step_value, &size);
 

diff --git a/backends/mlu/kernels/funcs/reduce_op.h b/backends/mlu/kernels/funcs/reduce_op.h
@@ -21,12 +21,12 @@ namespace custom_kernel {
 
 template <typename T, typename Context>
 void MLUReduceOp(const Context& dev_ctx,
-                 const phi::DenseTensor& x,
+                 const DenseTensor& x,
                  const std::vector<int64_t>& axes,
                  bool keep_dim,
                  bool reduce_all,
                  const std::string& reduce_name,
-                 phi::DenseTensor* out) {
+                 DenseTensor* out) {
   dev_ctx.template Alloc<T>(out);
   if (x.dims().size() == 0) {
     TensorCopy(dev_ctx, x, true, out);