From 5532eaef551452c99dcb5bb50d393f99b1d2e66e Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Tue, 11 Apr 2023 18:30:11 +0800
Subject: [PATCH 01/15] fix the bug which all the op use the only one context

---
 python/conformance/conformance_test.py | 14 ++++++++------
 python/conformance/diopi_functions.py  | 12 ++++++------
 python/conformance/diopi_runtime.py    |  7 +++----
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/python/conformance/conformance_test.py b/python/conformance/conformance_test.py
index 0082b8b..c303c5e 100644
--- a/python/conformance/conformance_test.py
+++ b/python/conformance/conformance_test.py
@@ -5,13 +5,13 @@
 from . import diopi_functions as F
 from .utils import logger, FunctionNotImplementedError, DiopiException
 from .utils import need_process_func, glob_vars, nhwc_op, dtype_op
-from .diopi_runtime import Tensor, compute_nhwc_stride
+from .diopi_runtime import Tensor, compute_nhwc_stride, Context, default_context
 from .utils import save_precision, record, write_precision
 from .utils import get_saved_pth_list, get_data_from_file
 from .utils import cfg_file_name
 
 
-def convert_input_tensors(function_paras: dict, test_tag: list, nhwc_list=[], dtype_list=[], filter_dtype_str_list=[]):
+def convert_input_tensors(ctx, function_paras: dict, test_tag: list, nhwc_list=[], dtype_list=[], filter_dtype_str_list=[]):
     tensor_info = []
     for para in function_paras["kwargs"].keys():
         tensor = function_paras['kwargs'][para]
@@ -44,13 +44,13 @@ def convert_input_tensors(function_paras: dict, test_tag: list, nhwc_list=[], dt
                 raise DiopiException(f"Skipped: {tensor.dtype} Tensor skipped for test")
             if tensor is not None and str(tensor.dtype) not in test_tag:
                 test_tag.append(str(tensor.dtype))
-            function_paras['kwargs'][para] = Tensor.from_numpy(tensor)
+            function_paras['kwargs'][para] = Tensor.from_numpy(ctx, tensor)
             tensor_info.append((para, str(tensor.dtype), str(tensor.shape)))
 
         if para == "tensors":
             tensors = function_paras['kwargs'][para]
             for idx, ele in enumerate(tensors):
-                tensors[idx] = Tensor.from_numpy(ele)
+                tensors[idx] = Tensor.from_numpy(ctx, ele)
                 if ele is not None and str(ele.dtype) not in test_tag:
                     test_tag.append(str(ele.dtype))
             function_paras['kwargs'][para] = tensors
@@ -252,12 +252,14 @@ def run(func_name, model_name, filter_dtype_str_list):
                 func_call_list.append(f"{module}.{test_func_name}(**kwargs, inplace=True)")
 
             for func_call in func_call_list:
+                ctx = Context()
+                ctx = default_context
                 if "inplace=True" in func_call:
                     if test_tag and test_tag[-1] == 'backward':
                         test_tag.pop()
                     test_tag.append("inplace")
                 try:
-                    info = convert_input_tensors(function_paras, test_tag, nhwc_list, dtype_list, filter_dtype_str_list)
+                    info = convert_input_tensors(ctx, function_paras, test_tag, nhwc_list, dtype_list, filter_dtype_str_list)
                     tensor_info = info if info else tensor_info
                     output = eval(func_call)
                     sum_to_compare = True if 'sorted' in kwargs and ~kwargs['sorted'] else False
@@ -299,7 +301,6 @@ def run(func_name, model_name, filter_dtype_str_list):
 
                     try:
                         grad_input = eval(f"F.{cfg_func_name}_backward(**kwargs, **backward_para)")
-                        # import pdb;pdb.set_trace()
                         passed = compare_with_gen_output(grad_input, data['cfg'], backward_out_reference)
                         logger.info(f"Run diopi_functions.{cfg_func_name}_backward succeed") \
                             if passed else logger.error(f"Run diopi_functions.{cfg_func_name}_backward failed", tag=test_tag, info=tensor_info)
@@ -310,3 +311,4 @@ def run(func_name, model_name, filter_dtype_str_list):
                         logger.error(f"AttributeError: {e}")
                     except Exception as e:
                         logger.error(f"Failed: {e}")
+                    del ctx
diff --git a/python/conformance/diopi_functions.py b/python/conformance/diopi_functions.py
index e631269..e54caac 100644
--- a/python/conformance/diopi_functions.py
+++ b/python/conformance/diopi_functions.py
@@ -969,7 +969,7 @@ def nonzero(input):
     ret = func(input.context_handle, pointer(out_tensor_handle),
                input.tensor_handle)
     check_returncode(ret)
-    out = Tensor.from_handle(out_tensor_handle)
+    out = Tensor.from_handle(input.context_handle, out_tensor_handle)
     return out
 
 
@@ -1509,7 +1509,7 @@ def nms(boxes, scores, iou_threshold) -> Tensor:
     func = check_function("diopiNms")
     ret = func(boxes.context_handle, pointer(out_tensor_handle), boxes.tensor_handle,
                scores.tensor_handle, c_double(iou_threshold))
-    out = Tensor.from_handle(out_tensor_handle)
+    out = Tensor.from_handle(boxes.context_handle, out_tensor_handle)
     check_returncode(ret)
     return out
 
@@ -1580,7 +1580,7 @@ def index(input, **kwargs) -> Tensor:
     func = check_function("diopiIndex")
     ret = func(input.context_handle, pointer(out_tensor_handle), input.tensor_handle,
                pointer(c_indices), c_int64(nums))
-    out = Tensor.from_handle(out_tensor_handle)
+    out = Tensor.from_handle(input.context_handle, out_tensor_handle)
     check_returncode(ret)
     return out
 
@@ -2558,7 +2558,7 @@ def masked_select(input, mask) -> Tensor:
     ret = func(input.context_handle, pointer(out_tensor_handle), input.tensor_handle,
                mask.tensor_handle)
     check_returncode(ret)
-    out = Tensor.from_handle(out_tensor_handle)
+    out = Tensor.from_handle(input.context_handle, out_tensor_handle)
     return out
 
 
@@ -3190,9 +3190,9 @@ def unique(input, sorted=True, return_inverse=False, return_counts=False, dim=No
     ret = func(input.context_handle, pointer(out_tensor_handle), input.tensor_handle, dim, c_bool(sorted),
                c_bool(return_counts), indices_handle, pointer(counts))
     check_returncode(ret)
-    out = Tensor.from_handle(out_tensor_handle)
+    out = Tensor.from_handle(input.context_handle, out_tensor_handle)
     if return_counts:
-        counts = Tensor.from_handle(counts)
+        counts = Tensor.from_handle(input.context_handle, counts)
     if return_inverse and not return_counts:
         return out, indices
     elif not return_inverse and return_counts:
diff --git a/python/conformance/diopi_runtime.py b/python/conformance/diopi_runtime.py
index 8643840..c04283c 100644
--- a/python/conformance/diopi_runtime.py
+++ b/python/conformance/diopi_runtime.py
@@ -226,8 +226,7 @@ def __init__(
             )
 
     @classmethod
-    def from_handle(cls, tensor_handle):
-        ctx_handle = ContextHandle()
+    def from_handle(cls, ctx_handle, tensor_handle):
         diopirt_lib._diopiTensorGetCtxHandle(tensor_handle, byref(ctx_handle))
         return cls(size=None, dtype=None, context_handle=ctx_handle, tensor_handle=tensor_handle)
 
@@ -297,14 +296,14 @@ def reset_shape(self, shape):
         diopirt_lib._diopiTensorResetShape(self.tensor_handle, byref(Sizes(tuple(shape))))
 
     @classmethod
-    def from_numpy(cls, darray):
+    def from_numpy(cls, ctx, darray):
         if not isinstance(darray, (np.generic, np.ndarray)):
             raise TypeError(f"expected np.ndarray (got {type(darray)})")
 
         dtype = from_numpy_dtype(darray.dtype)
         stride = [int(darray.strides[i] / darray.itemsize)
                   for i in range(len(darray.strides))]
-        tr = cls(size=darray.shape, dtype=dtype, stride=stride)
+        tr = cls(size=darray.shape, dtype=dtype, stride=stride, context_handle=ctx.context_handle)
         diopirt_lib._diopiTensorCopyFromBuffer(tr.context_handle,
                                                c_void_p(darray.ctypes.data),
                                                tr.tensor_handle)

From 6cb812d5918f4fb598e49b3995fa4b3730195300 Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Thu, 20 Apr 2023 00:34:25 +0800
Subject: [PATCH 02/15] fix context problem about mlu memory leak

---
 csrc/litert.cpp                        | 11 +++++++--
 python/conformance/conformance_test.py | 25 +++++++++++++-------
 python/conformance/diopi_runtime.py    | 32 ++++++++++++++++++--------
 3 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/csrc/litert.cpp b/csrc/litert.cpp
index 439b22e..8fe7f6d 100644
--- a/csrc/litert.cpp
+++ b/csrc/litert.cpp
@@ -472,9 +472,10 @@ DIOPI_RT_API diopiError_t _diopiCreateContext(diopiContextHandle_t* ctx) {
     return diopiSuccess;
 }
 
-DIOPI_RT_API diopiError_t _diopiDestroyContext(diopiContextHandle_t ctx) {
+DIOPI_RT_API diopiError_t _diopiDestroyContext(diopiContextHandle_t* ctx) {
     diopi_log("destroy a Context instance: %16p", ctx);
-    delete ctx;
+    delete *ctx;
+    *ctx = nullptr;
     return diopiSuccess;
 }
 
@@ -536,6 +537,12 @@ DIOPI_RT_API diopiError_t diopiFinalize() {
     return diopiSuccess;
 }
 
+DIOPI_RT_API diopiError_t _diopiDeviceStreamSync(diopiContextHandle_t ctx) {
+    diopiStreamHandle_t stream;
+    diopiGetStream(ctx, &stream);
+    synchronize_stream_func(stream);
+}
+
 DIOPI_RT_API diopiError_t _diopiTensorCopyFromBuffer(diopiContextHandle_t ctx,
                                                   const void*          src,
                                                   diopiTensorHandle_t  tensor) {
diff --git a/python/conformance/conformance_test.py b/python/conformance/conformance_test.py
index c303c5e..7e609fd 100644
--- a/python/conformance/conformance_test.py
+++ b/python/conformance/conformance_test.py
@@ -5,7 +5,8 @@
 from . import diopi_functions as F
 from .utils import logger, FunctionNotImplementedError, DiopiException
 from .utils import need_process_func, glob_vars, nhwc_op, dtype_op
-from .diopi_runtime import Tensor, compute_nhwc_stride, Context, default_context
+from . import diopi_runtime
+from .diopi_runtime import Tensor, compute_nhwc_stride, Context
 from .utils import save_precision, record, write_precision
 from .utils import get_saved_pth_list, get_data_from_file
 from .utils import cfg_file_name
@@ -251,9 +252,8 @@ def run(func_name, model_name, filter_dtype_str_list):
             if data["cfg"].get("is_inplace", False):
                 func_call_list.append(f"{module}.{test_func_name}(**kwargs, inplace=True)")
 
+            ctx = Context()
             for func_call in func_call_list:
-                ctx = Context()
-                ctx = default_context
                 if "inplace=True" in func_call:
                     if test_tag and test_tag[-1] == 'backward':
                         test_tag.pop()
@@ -261,20 +261,23 @@ def run(func_name, model_name, filter_dtype_str_list):
                 try:
                     info = convert_input_tensors(ctx, function_paras, test_tag, nhwc_list, dtype_list, filter_dtype_str_list)
                     tensor_info = info if info else tensor_info
+                    # import pdb;pdb.set_trace()
                     output = eval(func_call)
+                    ctx.streamSync()
                     sum_to_compare = True if 'sorted' in kwargs and ~kwargs['sorted'] else False
                     passed = compare_with_gen_output(output, data['cfg'], output_reference, sum_to_compare) \
                         if need_output else True
                     logger.info(f"Run diopi_functions.{cfg_func_name} succeed") \
                         if passed else logger.error(f"Run diopi_functions.{cfg_func_name} failed", tag=test_tag, info=tensor_info)
                 except FunctionNotImplementedError as e:
-                    logger.error(f"NotImplemented: {e}")
+                    ctx.streamSync()
+                    logger.error(f"NotImplemented: {e} in {func_call}")
                     continue
                 except AttributeError as e:
-                    logger.error(f"AttributeError: {e}")
-                    continue
+                    ctx.streamSync()
                 except Exception as e:
-                    logger.error(f"{e}")
+                    ctx.streamSync()
+                    logger.error(f"{e} in {func_call}")
                     continue
 
                 write_precision(data["cfg"], cfg_func_name, passed)
@@ -285,6 +288,7 @@ def run(func_name, model_name, filter_dtype_str_list):
                     saved_backward_pth = os.path.join(outputs_dir_path, saved_backward_pth)
                     backward_out_reference = get_data_from_file(saved_backward_pth, saved_pth, "backward output")
                     if backward_out_reference is None:
+                        ctx.streamSync()
                         continue
                     if not isinstance(output, (list, tuple)):
                         output = [output]
@@ -306,9 +310,14 @@ def run(func_name, model_name, filter_dtype_str_list):
                             if passed else logger.error(f"Run diopi_functions.{cfg_func_name}_backward failed", tag=test_tag, info=tensor_info)
                         write_precision(data["cfg"], cfg_func_name + '_bp', passed)
                     except FunctionNotImplementedError as e:
+                        ctx.streamSync()
                         logger.error(f"NotImplemented: {e}")
                     except AttributeError as e:
+                        ctx.streamSync()
                         logger.error(f"AttributeError: {e}")
                     except Exception as e:
+                        ctx.streamSync()
                         logger.error(f"Failed: {e}")
-                    del ctx
+                    else:
+                        ctx.streamSync()
+            ctx.clear()
diff --git a/python/conformance/diopi_runtime.py b/python/conformance/diopi_runtime.py
index c04283c..70854d6 100644
--- a/python/conformance/diopi_runtime.py
+++ b/python/conformance/diopi_runtime.py
@@ -153,20 +153,31 @@ def get_last_error():
 ContextHandle = c_void_p
 TensorHandle = c_void_p
 
-
+cnt = 0
+cnt_del = 0
 class Context:
     _c_lib = diopirt_lib
 
     def __init__(self):
         self.context_handle = ContextHandle()
-        self.__class__._c_lib._diopiCreateContext(byref(self.context_handle))
-
-    def __del__(self):
-        self.__class__._c_lib._diopiDestroyContext(self.context_handle)
-
+        self._c_lib._diopiCreateContext(byref(self.context_handle))
+
+    # def __del__(self):
+    #     if self.context_handle.value is not None and self._c_lib is not None:
+    #         try:
+    #             self._c_lib._diopiDestroyContext(byref(self.context_handle))
+    #         except :
+    #             import pdb;pdb.set_trace()
+    #             print("=========")
+    def clear(self):
+        if self.context_handle.value is not None and self._c_lib is not None:
+            self._c_lib._diopiDestroyContext(byref(self.context_handle))
     def get_handle(self):
         return self.context_handle
 
+    def streamSync(self):
+        self._c_lib._diopiDeviceStreamSync(self.context_handle)
+
 
 default_context = Context()
 
@@ -226,13 +237,15 @@ def __init__(
             )
 
     @classmethod
-    def from_handle(cls, ctx_handle, tensor_handle):
+    def from_handle(cls, tensor_handle):
+        ctx_handle = ContextHandle()
         diopirt_lib._diopiTensorGetCtxHandle(tensor_handle, byref(ctx_handle))
         return cls(size=None, dtype=None, context_handle=ctx_handle, tensor_handle=tensor_handle)
 
     def __del__(self):
-        diopirt_lib._diopiDestoryTensor(self.context_handle,
-                                        self.tensor_handle)
+        if self.context_handle.value is not None:
+            diopirt_lib._diopiDestoryTensor(self.context_handle,
+                                            self.tensor_handle)
 
     def __str__(self):
         array = self.numpy()
@@ -299,7 +312,6 @@ def reset_shape(self, shape):
     def from_numpy(cls, ctx, darray):
         if not isinstance(darray, (np.generic, np.ndarray)):
             raise TypeError(f"expected np.ndarray (got {type(darray)})")
-
         dtype = from_numpy_dtype(darray.dtype)
         stride = [int(darray.strides[i] / darray.itemsize)
                   for i in range(len(darray.strides))]

From d8853968c1a96ea244e6e740f62a6a6f73345411 Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Thu, 20 Apr 2023 00:45:48 +0800
Subject: [PATCH 03/15] format

---
 python/conformance/conformance_test.py | 2 +-
 python/conformance/diopi_runtime.py    | 8 +-------
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/python/conformance/conformance_test.py b/python/conformance/conformance_test.py
index 7e609fd..8ea526c 100644
--- a/python/conformance/conformance_test.py
+++ b/python/conformance/conformance_test.py
@@ -5,7 +5,6 @@
 from . import diopi_functions as F
 from .utils import logger, FunctionNotImplementedError, DiopiException
 from .utils import need_process_func, glob_vars, nhwc_op, dtype_op
-from . import diopi_runtime
 from .diopi_runtime import Tensor, compute_nhwc_stride, Context
 from .utils import save_precision, record, write_precision
 from .utils import get_saved_pth_list, get_data_from_file
@@ -320,4 +319,5 @@ def run(func_name, model_name, filter_dtype_str_list):
                         logger.error(f"Failed: {e}")
                     else:
                         ctx.streamSync()
+            # do not forget to clear the ctx.
             ctx.clear()
diff --git a/python/conformance/diopi_runtime.py b/python/conformance/diopi_runtime.py
index 70854d6..864a0b2 100644
--- a/python/conformance/diopi_runtime.py
+++ b/python/conformance/diopi_runtime.py
@@ -162,16 +162,10 @@ def __init__(self):
         self.context_handle = ContextHandle()
         self._c_lib._diopiCreateContext(byref(self.context_handle))
 
-    # def __del__(self):
-    #     if self.context_handle.value is not None and self._c_lib is not None:
-    #         try:
-    #             self._c_lib._diopiDestroyContext(byref(self.context_handle))
-    #         except :
-    #             import pdb;pdb.set_trace()
-    #             print("=========")
     def clear(self):
         if self.context_handle.value is not None and self._c_lib is not None:
             self._c_lib._diopiDestroyContext(byref(self.context_handle))
+
     def get_handle(self):
         return self.context_handle
 

From e0c4abe9ded183683c2255abc60c46c351359c9b Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Thu, 20 Apr 2023 00:58:48 +0800
Subject: [PATCH 04/15] delete streamSync

---
 python/conformance/conformance_test.py | 13 ++-----------
 python/conformance/diopi_runtime.py    |  3 +--
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/python/conformance/conformance_test.py b/python/conformance/conformance_test.py
index 8ea526c..efa42a9 100644
--- a/python/conformance/conformance_test.py
+++ b/python/conformance/conformance_test.py
@@ -260,22 +260,19 @@ def run(func_name, model_name, filter_dtype_str_list):
                 try:
                     info = convert_input_tensors(ctx, function_paras, test_tag, nhwc_list, dtype_list, filter_dtype_str_list)
                     tensor_info = info if info else tensor_info
-                    # import pdb;pdb.set_trace()
                     output = eval(func_call)
-                    ctx.streamSync()
                     sum_to_compare = True if 'sorted' in kwargs and ~kwargs['sorted'] else False
                     passed = compare_with_gen_output(output, data['cfg'], output_reference, sum_to_compare) \
                         if need_output else True
                     logger.info(f"Run diopi_functions.{cfg_func_name} succeed") \
                         if passed else logger.error(f"Run diopi_functions.{cfg_func_name} failed", tag=test_tag, info=tensor_info)
                 except FunctionNotImplementedError as e:
-                    ctx.streamSync()
                     logger.error(f"NotImplemented: {e} in {func_call}")
                     continue
                 except AttributeError as e:
-                    ctx.streamSync()
+                    logger.error(f"{e} in {func_call}")
+                    continue
                 except Exception as e:
-                    ctx.streamSync()
                     logger.error(f"{e} in {func_call}")
                     continue
 
@@ -287,7 +284,6 @@ def run(func_name, model_name, filter_dtype_str_list):
                     saved_backward_pth = os.path.join(outputs_dir_path, saved_backward_pth)
                     backward_out_reference = get_data_from_file(saved_backward_pth, saved_pth, "backward output")
                     if backward_out_reference is None:
-                        ctx.streamSync()
                         continue
                     if not isinstance(output, (list, tuple)):
                         output = [output]
@@ -309,15 +305,10 @@ def run(func_name, model_name, filter_dtype_str_list):
                             if passed else logger.error(f"Run diopi_functions.{cfg_func_name}_backward failed", tag=test_tag, info=tensor_info)
                         write_precision(data["cfg"], cfg_func_name + '_bp', passed)
                     except FunctionNotImplementedError as e:
-                        ctx.streamSync()
                         logger.error(f"NotImplemented: {e}")
                     except AttributeError as e:
-                        ctx.streamSync()
                         logger.error(f"AttributeError: {e}")
                     except Exception as e:
-                        ctx.streamSync()
                         logger.error(f"Failed: {e}")
-                    else:
-                        ctx.streamSync()
             # do not forget to clear the ctx.
             ctx.clear()
diff --git a/python/conformance/diopi_runtime.py b/python/conformance/diopi_runtime.py
index 864a0b2..d37614b 100644
--- a/python/conformance/diopi_runtime.py
+++ b/python/conformance/diopi_runtime.py
@@ -153,8 +153,7 @@ def get_last_error():
 ContextHandle = c_void_p
 TensorHandle = c_void_p
 
-cnt = 0
-cnt_del = 0
+
 class Context:
     _c_lib = diopirt_lib
 

From 467dd99d0a3bb3af0664c8e4c6e518a871c20c8e Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Thu, 20 Apr 2023 01:01:58 +0800
Subject: [PATCH 05/15] add return

---
 csrc/litert.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/csrc/litert.cpp b/csrc/litert.cpp
index 8fe7f6d..5c9d0be 100644
--- a/csrc/litert.cpp
+++ b/csrc/litert.cpp
@@ -541,6 +541,7 @@ DIOPI_RT_API diopiError_t _diopiDeviceStreamSync(diopiContextHandle_t ctx) {
     diopiStreamHandle_t stream;
     diopiGetStream(ctx, &stream);
     synchronize_stream_func(stream);
+    return diopiSuccess;
 }
 
 DIOPI_RT_API diopiError_t _diopiTensorCopyFromBuffer(diopiContextHandle_t ctx,

From d6b25c5674b7a4fc6fa18fc8bbc5ece8baff8057 Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Thu, 20 Apr 2023 01:07:26 +0800
Subject: [PATCH 06/15] delete redundancy

---
 python/conformance/diopi_runtime.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/conformance/diopi_runtime.py b/python/conformance/diopi_runtime.py
index d37614b..6a6c590 100644
--- a/python/conformance/diopi_runtime.py
+++ b/python/conformance/diopi_runtime.py
@@ -162,7 +162,7 @@ def __init__(self):
         self._c_lib._diopiCreateContext(byref(self.context_handle))
 
     def clear(self):
-        if self.context_handle.value is not None and self._c_lib is not None:
+        if self.context_handle.value is not None:
             self._c_lib._diopiDestroyContext(byref(self.context_handle))
 
     def get_handle(self):

From a5dfc8d35ba4db4fa1df1bbc50a734f01095bee7 Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Thu, 20 Apr 2023 01:12:43 +0800
Subject: [PATCH 07/15] add __class__

---
 python/conformance/diopi_runtime.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/conformance/diopi_runtime.py b/python/conformance/diopi_runtime.py
index 6a6c590..6457248 100644
--- a/python/conformance/diopi_runtime.py
+++ b/python/conformance/diopi_runtime.py
@@ -159,17 +159,17 @@ class Context:
 
     def __init__(self):
         self.context_handle = ContextHandle()
-        self._c_lib._diopiCreateContext(byref(self.context_handle))
+        self.__class__._c_lib._diopiCreateContext(byref(self.context_handle))
 
     def clear(self):
         if self.context_handle.value is not None:
-            self._c_lib._diopiDestroyContext(byref(self.context_handle))
+            self.__class__._c_lib._diopiDestroyContext(byref(self.context_handle))
 
     def get_handle(self):
         return self.context_handle
 
     def streamSync(self):
-        self._c_lib._diopiDeviceStreamSync(self.context_handle)
+        self.__class__._c_lib._diopiDeviceStreamSync(self.context_handle)
 
 
 default_context = Context()

From bec2e55e8ef5d0acc9fdd2ae6aeb66898e9b5a44 Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Thu, 20 Apr 2023 15:57:58 +0800
Subject: [PATCH 08/15] add AttributedError

---
 python/conformance/conformance_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/conformance/conformance_test.py b/python/conformance/conformance_test.py
index efa42a9..7ad2164 100644
--- a/python/conformance/conformance_test.py
+++ b/python/conformance/conformance_test.py
@@ -270,7 +270,7 @@ def run(func_name, model_name, filter_dtype_str_list):
                     logger.error(f"NotImplemented: {e} in {func_call}")
                     continue
                 except AttributeError as e:
-                    logger.error(f"{e} in {func_call}")
+                    logger.error(f"AttributeError: {e} in {func_call}")
                     continue
                 except Exception as e:
                     logger.error(f"{e} in {func_call}")

From 7456dd1758638c64d6bfb57efca8237a69f0cee6 Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Fri, 21 Apr 2023 10:09:23 +0800
Subject: [PATCH 09/15] fix from_handle

---
 python/conformance/diopi_functions.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/conformance/diopi_functions.py b/python/conformance/diopi_functions.py
index e54caac..e631269 100644
--- a/python/conformance/diopi_functions.py
+++ b/python/conformance/diopi_functions.py
@@ -969,7 +969,7 @@ def nonzero(input):
     ret = func(input.context_handle, pointer(out_tensor_handle),
                input.tensor_handle)
     check_returncode(ret)
-    out = Tensor.from_handle(input.context_handle, out_tensor_handle)
+    out = Tensor.from_handle(out_tensor_handle)
     return out
 
 
@@ -1509,7 +1509,7 @@ def nms(boxes, scores, iou_threshold) -> Tensor:
     func = check_function("diopiNms")
     ret = func(boxes.context_handle, pointer(out_tensor_handle), boxes.tensor_handle,
                scores.tensor_handle, c_double(iou_threshold))
-    out = Tensor.from_handle(boxes.context_handle, out_tensor_handle)
+    out = Tensor.from_handle(out_tensor_handle)
     check_returncode(ret)
     return out
 
@@ -1580,7 +1580,7 @@ def index(input, **kwargs) -> Tensor:
     func = check_function("diopiIndex")
     ret = func(input.context_handle, pointer(out_tensor_handle), input.tensor_handle,
                pointer(c_indices), c_int64(nums))
-    out = Tensor.from_handle(input.context_handle, out_tensor_handle)
+    out = Tensor.from_handle(out_tensor_handle)
     check_returncode(ret)
     return out
 
@@ -2558,7 +2558,7 @@ def masked_select(input, mask) -> Tensor:
     ret = func(input.context_handle, pointer(out_tensor_handle), input.tensor_handle,
                mask.tensor_handle)
     check_returncode(ret)
-    out = Tensor.from_handle(input.context_handle, out_tensor_handle)
+    out = Tensor.from_handle(out_tensor_handle)
     return out
 
 
@@ -3190,9 +3190,9 @@ def unique(input, sorted=True, return_inverse=False, return_counts=False, dim=No
     ret = func(input.context_handle, pointer(out_tensor_handle), input.tensor_handle, dim, c_bool(sorted),
                c_bool(return_counts), indices_handle, pointer(counts))
     check_returncode(ret)
-    out = Tensor.from_handle(input.context_handle, out_tensor_handle)
+    out = Tensor.from_handle(out_tensor_handle)
     if return_counts:
-        counts = Tensor.from_handle(input.context_handle, counts)
+        counts = Tensor.from_handle(counts)
     if return_inverse and not return_counts:
         return out, indices
     elif not return_inverse and return_counts:

From 6efa31d51f79230093df3a584dd255a17dfd9ae0 Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Fri, 21 Apr 2023 10:10:52 +0800
Subject: [PATCH 10/15] add sync

---
 python/conformance/conformance_test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/conformance/conformance_test.py b/python/conformance/conformance_test.py
index 7ad2164..af8c2b2 100644
--- a/python/conformance/conformance_test.py
+++ b/python/conformance/conformance_test.py
@@ -261,6 +261,7 @@ def run(func_name, model_name, filter_dtype_str_list):
                     info = convert_input_tensors(ctx, function_paras, test_tag, nhwc_list, dtype_list, filter_dtype_str_list)
                     tensor_info = info if info else tensor_info
                     output = eval(func_call)
+                    ctx.streamSync()
                     sum_to_compare = True if 'sorted' in kwargs and ~kwargs['sorted'] else False
                     passed = compare_with_gen_output(output, data['cfg'], output_reference, sum_to_compare) \
                         if need_output else True
@@ -300,6 +301,7 @@ def run(func_name, model_name, filter_dtype_str_list):
 
                     try:
                         grad_input = eval(f"F.{cfg_func_name}_backward(**kwargs, **backward_para)")
+                        ctx.streamSync()
                         passed = compare_with_gen_output(grad_input, data['cfg'], backward_out_reference)
                         logger.info(f"Run diopi_functions.{cfg_func_name}_backward succeed") \
                             if passed else logger.error(f"Run diopi_functions.{cfg_func_name}_backward failed", tag=test_tag, info=tensor_info)

From dc535cbac45d54594061c4c3b8c4fbfc1e164e6c Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Sun, 23 Apr 2023 10:43:58 +0800
Subject: [PATCH 11/15] skip the destruction of tensor secondly

---
 python/conformance/diopi_runtime.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/python/conformance/diopi_runtime.py b/python/conformance/diopi_runtime.py
index 6457248..6e0d78b 100644
--- a/python/conformance/diopi_runtime.py
+++ b/python/conformance/diopi_runtime.py
@@ -174,6 +174,9 @@ def streamSync(self):
 
 default_context = Context()
 
+# store the context_hanlde.value where tensor maybe deconstruct twice
+skip_tensors_for_del_in_ctx = set()
+
 
 class Sizes(Structure):
     _fields_ = [("data", POINTER(c_int64)), ("len", c_int64)]
@@ -236,9 +239,13 @@ def from_handle(cls, tensor_handle):
         return cls(size=None, dtype=None, context_handle=ctx_handle, tensor_handle=tensor_handle)
 
     def __del__(self):
-        if self.context_handle.value is not None:
-            diopirt_lib._diopiDestoryTensor(self.context_handle,
-                                            self.tensor_handle)
+        # skip the deletion because of maybe causing double destructions
+        if skip_tensors_for_del_in_ctx is not None and self.context_handle.value in skip_tensors_for_del_in_ctx:
+            pass
+        else:
+            if self.context_handle.value is not None and self.tensor_handle.value is not None and diopirt_lib is not None:
+                diopirt_lib._diopiDestoryTensor(self.context_handle,
+                                                self.tensor_handle)
 
     def __str__(self):
         array = self.numpy()

From a7256d163bd8b063d690b99d1cab11d463cb5174 Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Sun, 23 Apr 2023 11:07:32 +0800
Subject: [PATCH 12/15] fix skip del tensor bug

---
 python/conformance/diopi_runtime.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/conformance/diopi_runtime.py b/python/conformance/diopi_runtime.py
index 6e0d78b..5131cdc 100644
--- a/python/conformance/diopi_runtime.py
+++ b/python/conformance/diopi_runtime.py
@@ -236,6 +236,7 @@ def __init__(
     def from_handle(cls, tensor_handle):
         ctx_handle = ContextHandle()
         diopirt_lib._diopiTensorGetCtxHandle(tensor_handle, byref(ctx_handle))
+        skip_tensors_for_del_in_ctx.add(ctx_handle.value)
         return cls(size=None, dtype=None, context_handle=ctx_handle, tensor_handle=tensor_handle)
 
     def __del__(self):

From 4dca2f51746d2e489d23ebac578ff6080e02011d Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Sun, 23 Apr 2023 13:11:51 +0800
Subject: [PATCH 13/15] modify Tensor's __init__

---
 python/conformance/conformance_test.py |   6 +-
 python/conformance/diopi_functions.py  | 204 ++++++++++++-------------
 python/conformance/diopi_runtime.py    |   6 +-
 3 files changed, 108 insertions(+), 108 deletions(-)

diff --git a/python/conformance/conformance_test.py b/python/conformance/conformance_test.py
index af8c2b2..8fb5b82 100644
--- a/python/conformance/conformance_test.py
+++ b/python/conformance/conformance_test.py
@@ -307,10 +307,10 @@ def run(func_name, model_name, filter_dtype_str_list):
                             if passed else logger.error(f"Run diopi_functions.{cfg_func_name}_backward failed", tag=test_tag, info=tensor_info)
                         write_precision(data["cfg"], cfg_func_name + '_bp', passed)
                     except FunctionNotImplementedError as e:
-                        logger.error(f"NotImplemented: {e}")
+                        logger.error(f"NotImplemented: {e} in {func_call}")
                     except AttributeError as e:
-                        logger.error(f"AttributeError: {e}")
+                        logger.error(f"AttributeError: {e} in {func_call}")
                     except Exception as e:
-                        logger.error(f"Failed: {e}")
+                        logger.error(f"Failed: {e} in {func_call}")
             # do not forget to clear the ctx.
             ctx.clear()
diff --git a/python/conformance/diopi_functions.py b/python/conformance/diopi_functions.py
index e631269..174dc9c 100644
--- a/python/conformance/diopi_functions.py
+++ b/python/conformance/diopi_functions.py
@@ -3,7 +3,7 @@
 import math
 
 from ctypes import c_float, c_double, c_int64, c_bool, c_void_p, byref, pointer
-from .diopi_runtime import Sizes, Scalar, Tensor, TensorHandle, compute_nhwc_stride, compute_nhwc_stride_2d, compute_nhwc_stride_3d
+from .diopi_runtime import Sizes, Scalar, Tensor, TensorHandle, compute_nhwc_stride, compute_nhwc_stride_2d, compute_nhwc_stride_3d, default_context
 from .utils import check_returncode, check_function, glob_vars
 from . import Dtype, raw_like
 from collections import namedtuple
@@ -49,7 +49,7 @@ def reduce_op_process(input, dim=None, keepdim=False, dtype=None):
     if dtype is None:
         dtype = input.get_dtype()
 
-    out = Tensor(sizeO, dtype)
+    out = Tensor(sizeO, dtype, None, input.context_handle)
     return dim_list, out
 
 
@@ -121,7 +121,7 @@ def unary_op(input, inplace, call, dtype=None) -> Tensor:
         ret = func(input.context_handle, input.tensor_handle)
     else:
         if dtype is not None:
-            out = Tensor(input.size(), dtype)
+            out = Tensor(input.size(), dtype, None, input.context_handle)
         else:
             out = raw_like(input)
         func = check_function(call)
@@ -160,11 +160,11 @@ def binary_op_scalar(input, other, inplace, call, alpha=None, dtype=None) -> Ten
     else:
         sizeI = input.size()
         if not isinstance(other, Tensor):
-            out = Tensor(sizeI, dtype)
+            out = Tensor(sizeI, dtype, None, input.context_handle)
         else:
             sizeO = other.size()
             outsize = broadcast_out_size(list(sizeI), list(sizeO))
-            out = Tensor(outsize, dtype)
+            out = Tensor(outsize, dtype, None, input.context_handle)
         args = args + "out.tensor_handle, "
 
     if not isinstance(other, Tensor):
@@ -190,7 +190,7 @@ def softmax(input, dim, dtype=None):
         dim = 0
     if input.numel() == 0:
         return input
-    out = raw_like(input) if dtype is None else Tensor(input.size(), dtype)
+    out = raw_like(input) if dtype is None else Tensor(input.size(), dtype, None, input.context_handle)
 
     func = check_function('diopiSoftmax')
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, c_int64(dim))
@@ -305,11 +305,11 @@ def div(input, other, inplace=False, rounding_mode=None) -> Tensor:
     else:
         out_type = promote_type(input, Dtype.float32)
         if not isinstance(other, Tensor):
-            out = Tensor(sizeI, out_type)
+            out = Tensor(sizeI, out_type, None, input.context_handle)
         else:
             sizeO = other.size()
             outsize = broadcast_out_size(list(sizeI), list(sizeO))
-            out = Tensor(outsize, out_type)
+            out = Tensor(outsize, out_type, None, input.context_handle)
         args = args + "out.tensor_handle, "
 
     if not isinstance(other, Tensor):
@@ -365,7 +365,7 @@ def bmm(input, mat2) -> Tensor:
 
     size_out = size1
     size_out[2] = size2[2]
-    out = Tensor(size_out, input.get_dtype())
+    out = Tensor(size_out, input.get_dtype(), None, input.context_handle)
 
     func = check_function("diopiBmm")
     ret = func(input.context_handle, out.tensor_handle,
@@ -389,7 +389,7 @@ def addcmul(input, tensor1, tensor2, value=1, inplace=False) -> Tensor:
         ret = func(input.context_handle, input.tensor_handle,
                    tensor1.tensor_handle, tensor2.tensor_handle, value)
     else:
-        out = Tensor(sizeO, input.get_dtype())
+        out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
         func = check_function("diopiAddcmul")
         ret = func(input.context_handle, out.tensor_handle, input.tensor_handle,
                    tensor1.tensor_handle, tensor2.tensor_handle, value)
@@ -404,15 +404,15 @@ def matmul(input, other) -> Tensor:
 
     # vector x vector
     if len(sizeI) == 1 and len(sizeO) == 1:
-        out = Tensor((), input.get_dtype())
+        out = Tensor((), input.get_dtype(), None, input.context_handle)
     # (batched) matrix x vector
     elif len(sizeO) == 1:
         sizeI[-1] = 1
-        out = Tensor(sizeI, input.get_dtype())
+        out = Tensor(sizeI, input.get_dtype(), None, input.context_handle)
     # pretended matrix x (batched) matrix
     elif len(sizeI) == 1:
         sizeO[-2] = 1
-        out = Tensor(sizeO, input.get_dtype())
+        out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
     # (batched) matrix x (batched) matrix
     else:
         sizeI[-1] = sizeO[-1]
@@ -420,7 +420,7 @@ def matmul(input, other) -> Tensor:
             assert sizeI[-3] == sizeO[-3] or sizeI[-3] == 1 or sizeO[-3] == 1,\
                 'input and other should be broadcastable'
             sizeI[-3] = sizeI[-3] if sizeI[-3] == 1 else sizeO[-3]
-        out = Tensor(sizeI, input.get_dtype())
+        out = Tensor(sizeI, input.get_dtype(), None, input.context_handle)
 
     func = check_function("diopiMatmul")
     ret = func(input.context_handle, out.tensor_handle,
@@ -535,7 +535,7 @@ def std(input, unbiased=True, dim=None, keepdim=False) -> Tensor:
 
 def min(input, dim=None, keepdim=False) -> Tensor:
     if dim is None:
-        out = Tensor([], input.get_dtype())
+        out = Tensor([], input.get_dtype(), None, input.context_handle)
         func = check_function("diopiMinAll")
         ret = func(input.context_handle, out.tensor_handle, input.tensor_handle)
         check_returncode(ret)
@@ -548,8 +548,8 @@ def min(input, dim=None, keepdim=False) -> Tensor:
         sizeI[dim] = 1
     else:
         del sizeI[dim]
-    out = Tensor(sizeI, input.get_dtype())
-    indices = Tensor(out.size(), glob_vars.int_type)
+    out = Tensor(sizeI, input.get_dtype(), None, input.context_handle)
+    indices = Tensor(out.size(), glob_vars.int_type, None, input.context_handle)
     func = check_function("diopiMin")
 
     ret = func(input.context_handle, out.tensor_handle, indices.tensor_handle,
@@ -595,7 +595,7 @@ def binary_cross_entropy(input, target, weight=None, reduction='mean'):
     if reduction == 'none':
         out = raw_like(input)
     else:
-        out = Tensor((), input.get_dtype())
+        out = Tensor((), input.get_dtype(), None, input.context_handle)
 
     reduction_mode = convert_reduction(reduction)
     func = check_function("diopiBCELoss")
@@ -629,7 +629,7 @@ def binary_cross_entropy_with_logits(input, target, weight=None,
     if reduction == 'none':
         out = raw_like(input)
     else:
-        out = Tensor((), input.get_dtype())
+        out = Tensor((), input.get_dtype(), None, input.context_handle)
 
     reduction_mode = convert_reduction(reduction)
     func = check_function("diopiBCEWithLogits")
@@ -654,9 +654,9 @@ def cross_entropy(input, target, weight=None, ignore_index=- 100,
     sizeI = list(input.size())
     sizeO = [sizeI[0]] + sizeI[2:]
     if reduction == 'none':
-        out = Tensor(sizeO, input.get_dtype())
+        out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
     else:
-        out = Tensor((), input.get_dtype())
+        out = Tensor((), input.get_dtype(), None, input.context_handle)
 
     reduction_mode = convert_reduction(reduction)
     func = check_function("diopiCrossEntropyLoss")
@@ -676,7 +676,7 @@ def mse_loss(input, target, reduction='mean'):
     if reduction == 'none':
         out = raw_like(input)
     else:
-        out = Tensor((), input.get_dtype())
+        out = Tensor((), input.get_dtype(), None, input.context_handle)
 
     reduction_mode = convert_reduction(reduction)
     func = check_function("diopiMSELoss")
@@ -720,7 +720,7 @@ def conv2d(input, weight, bias=None, stride=1,
     dilation = Sizes(tuple(dilation))
 
     nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None
-    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
     func = check_function("diopiConvolution2d")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle,
                weight.tensor_handle, bias, stride, padding, dilation, groups)
@@ -756,7 +756,7 @@ def avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False,
     padding = Sizes(tuple(padding))
     kernel_size = Sizes(tuple(kernel_size))
     nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None
-    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
 
     if divisor_override is None:
         divisor_override = c_void_p()
@@ -806,7 +806,7 @@ def max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1,
     kernel_size = Sizes(tuple(kernel_size))
     dilation = Sizes(tuple(dilation))
     nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None
-    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
 
     if not return_indices:
         func = check_function("diopiMaxPool2d")
@@ -818,7 +818,7 @@ def max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1,
     else:
         func = check_function("diopiMaxPool2dWithIndices")
         nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None
-        indices = Tensor(sizeO, glob_vars.int_type, stride=nhwc_stride)
+        indices = Tensor(sizeO, glob_vars.int_type, stride=nhwc_stride, context_handle=input.context_handle)
         ret = func(input.context_handle, out.tensor_handle,
                    indices.tensor_handle, input.tensor_handle,
                    kernel_size, stride, padding, dilation, c_bool(ceil_mode))
@@ -846,7 +846,7 @@ def adaptive_avg_pool2d(input, output_size):
             sizeO.append(output_size[i])
 
     nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None
-    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
     output_size = Sizes((sizeO[-2], sizeO[-1]))
 
     func = check_function("diopiAdaptiveAvgPool2d")
@@ -876,13 +876,13 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
             sizeO.append(output_size[i])
 
     nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None
-    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
     output_size = Sizes(tuple(output_size))
 
     if return_indices:
         func = check_function("diopiAdaptiveMaxPool2dWithIndices")
         nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None
-        indices = Tensor(sizeO, glob_vars.int_type, stride=nhwc_stride)
+        indices = Tensor(sizeO, glob_vars.int_type, stride=nhwc_stride, context_handle=input.context_handle)
         ret = func(input.context_handle, out.tensor_handle, indices.tensor_handle,
                    input.tensor_handle, output_size)
         check_returncode(ret)
@@ -906,7 +906,7 @@ def dropout_impl(input, size_mask, p=0.5, training=True, inplace=False):
         out = raw_like(input)
         args = args + 'input.tensor_handle, '
 
-    mask = Tensor(size_mask, Dtype.uint8)
+    mask = Tensor(size_mask, Dtype.uint8, None, context_handle=input.context_handle)
     args = args + "c_double(p), c_bool(training)"
 
     func = check_function(call)
@@ -929,7 +929,7 @@ def dropout2d(input, p=0.5, training=True, inplace=False):
 def index_select(input, dim, index) -> Tensor:
     sizeI = list(input.size())
     sizeI[dim] = index.numel()
-    out = Tensor(sizeI, input.get_dtype())
+    out = Tensor(sizeI, input.get_dtype(), None, context_handle=input.context_handle)
 
     func = check_function("diopiIndexSelect")
     ret = func(input.context_handle, out.tensor_handle,
@@ -941,7 +941,7 @@ def index_select(input, dim, index) -> Tensor:
 def select(input, dim, index) -> Tensor:
     sizeI = list(input.size())
     del sizeI[dim]
-    out = Tensor(sizeI, input.get_dtype())
+    out = Tensor(sizeI, input.get_dtype(), None, context_handle=input.context_handle)
 
     func = check_function("diopiSelect")
     ret = func(input.context_handle, out.tensor_handle,
@@ -984,7 +984,7 @@ def linear(input, weight, bias=None) -> Tensor:
     sizeI = list(input.size())
     sizeW = list(weight.size())
     sizeI[-1] = sizeW[-2] if len(sizeW) == 2 else 1
-    out = Tensor(sizeI, input.get_dtype())
+    out = Tensor(sizeI, input.get_dtype(), None, context_handle=input.context_handle)
     func = check_function("diopiLinear")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle,
                weight.tensor_handle, bias)
@@ -997,7 +997,7 @@ def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0,
     sizeI = list(input.size())
     sizeW = weight.size()
     sizeI.append(sizeW[-1])
-    out = Tensor(sizeI, weight.get_dtype())
+    out = Tensor(sizeI, weight.get_dtype(), None, context_handle=input.context_handle)
     padding_idx = -100 if padding_idx is None else padding_idx
 
     if max_norm is not None:
@@ -1036,7 +1036,7 @@ def cat(tensors, dim=0) -> Tensor:
     c_tensors = (c_void_p * insNum)(*c_tensors)
 
     sizeI[dim] = sum
-    out = Tensor(sizeI, tensors[0].get_dtype())
+    out = Tensor(sizeI, tensors[0].get_dtype(), None, tensors[0].context_handle)
     func = check_function("diopiCat")
     ret = func(tensors[0].context_handle, out.tensor_handle,
                pointer(c_tensors), c_int64(insNum), c_int64(dim))
@@ -1058,7 +1058,7 @@ def stack(tensors, dim=0) -> Tensor:
     c_tensors = [t.tensor_handle for t in tensors]
     c_tensors = (c_void_p * insNum)(*c_tensors)
 
-    out = Tensor(sizeI, tensors[0].get_dtype())
+    out = Tensor(sizeI, tensors[0].get_dtype(), None, tensors[0].context_handle)
     func = check_function("diopiStack")
     ret = func(tensors[0].context_handle, out.tensor_handle,
                pointer(c_tensors), c_int64(insNum), c_int64(dim))
@@ -1069,7 +1069,7 @@ def stack(tensors, dim=0) -> Tensor:
 def sort(input, dim=- 1, descending=False, stable=False):
     vals = raw_like(input)
     sizeI = input.size()
-    indices = Tensor(sizeI, glob_vars.int_type)
+    indices = Tensor(sizeI, glob_vars.int_type, None, input.context_handle)
 
     stable = c_void_p() if stable is None else pointer(c_bool(stable))
 
@@ -1083,8 +1083,8 @@ def sort(input, dim=- 1, descending=False, stable=False):
 def topk(input, k, dim=-1, largest=True, sorted=True):
     sizeI = list(input.size())
     sizeI[dim] = k
-    values = Tensor(sizeI, input.get_dtype())
-    indices = Tensor(sizeI, glob_vars.int_type)
+    values = Tensor(sizeI, input.get_dtype(), None, input.context_handle)
+    indices = Tensor(sizeI, glob_vars.int_type, None, input.context_handle)
 
     func = check_function("diopiTopk")
     ret = func(input.context_handle, values.tensor_handle,
@@ -1097,7 +1097,7 @@ def topk(input, k, dim=-1, largest=True, sorted=True):
 def transpose(input, dim0, dim1) -> Tensor:
     sizeI = list(input.size())
     sizeI[dim0], sizeI[dim1] = sizeI[dim1], sizeI[dim0]
-    out = Tensor(sizeI, input.get_dtype())
+    out = Tensor(sizeI, input.get_dtype(), None, input.context_handle)
 
     func = check_function("diopiTranspose")
     ret = func(input.context_handle, out.tensor_handle,
@@ -1113,10 +1113,10 @@ def one_hot(input, num_classes=- 1):
     sizeI = input.size()
     if num_classes == -1:
         sizeI += (np.max(input.numpy()) + 1, )
-        out = Tensor(sizeI, glob_vars.int_type)
+        out = Tensor(sizeI, glob_vars.int_type, None, input.context_handle)
     else:
         sizeI += (num_classes, )
-        out = Tensor(sizeI, glob_vars.int_type)
+        out = Tensor(sizeI, glob_vars.int_type, None, input.context_handle)
 
     func = check_function("diopiOneHot")
     ret = func(input.context_handle, out.tensor_handle,
@@ -1141,7 +1141,7 @@ def split(tensor, split_size_or_sections, dim=0):
         idx += 1
         sum -= sizeI[dim]
         splitSizes += (sizeI[dim], )
-        out = Tensor(sizeI, tensor.get_dtype())
+        out = Tensor(sizeI, tensor.get_dtype(), None, tensor.context_handle)
         outs.append(out)
 
     c_outs = []
@@ -1172,7 +1172,7 @@ def pow(input=None, self=None, exponent=None, inplace=False) -> Tensor:
             out_dtype = exponent_dtype if exponent_dtype in float_types else Dtype.float32
         else:
             out_dtype = exponent_dtype
-        out = Tensor(exponent.size(), out_dtype)
+        out = Tensor(exponent.size(), out_dtype, None, default_context.context_handle)
         self = byref(Scalar(self))
         ret = func(exponent.context_handle, out.tensor_handle, self, exponent.tensor_handle)
     elif not isinstance(exponent, Tensor):
@@ -1186,7 +1186,7 @@ def pow(input=None, self=None, exponent=None, inplace=False) -> Tensor:
             func = check_function("diopiPow")
             input_dtype = input.get_dtype()
             out_dtype = Dtype.float32 if input_dtype not in float_types else input_dtype
-            out = Tensor(input.size(), out_dtype)
+            out = Tensor(input.size(), out_dtype, None, input.context_handle)
             ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, exponent)
     elif inplace:
         func = check_function("diopiPowInpTensor")
@@ -1196,7 +1196,7 @@ def pow(input=None, self=None, exponent=None, inplace=False) -> Tensor:
         sizeE = list(exponent.size())
         sizeO = broadcast_out_size(sizeI, sizeE)
         out_dtype = common_dtype(input, exponent)
-        out = Tensor(sizeO, out_dtype)
+        out = Tensor(sizeO, out_dtype, None, default_context.context_handle)
         func = check_function("diopiPowTensor")
         ret = func(input.context_handle, out.tensor_handle,
                    input.tensor_handle, exponent.tensor_handle)
@@ -1218,7 +1218,7 @@ def where(condition, input, other) -> Tensor:
     sizeO = broadcast_out_size(sizeC, sizeO)
     assert (input.get_dtype() == other.get_dtype()),\
         " input and other shoule be the same type "
-    out = Tensor(sizeO, input.get_dtype())
+    out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
 
     func = check_function("diopiWhere")
     ret = func(input.context_handle, out.tensor_handle, condition.tensor_handle,
@@ -1283,7 +1283,7 @@ def log_softmax(input, dim=None, dtype=None):
         dim = 0
     if input.numel() == 0:
         return input
-    out = raw_like(input) if dtype is None else Tensor(input.size(), dtype)
+    out = raw_like(input) if dtype is None else Tensor(input.size(), dtype, None, input.context_handle)
 
     func = check_function('diopiLogSoftmax')
     ret = func(input.context_handle, out.tensor_handle,
@@ -1362,7 +1362,7 @@ def addcdiv(input, tensor1, tensor2, value=1, inplace=False) -> Tensor:
         ret = func(input.context_handle, input.tensor_handle,
                    tensor1.tensor_handle, tensor2.tensor_handle, value)
     else:
-        out = Tensor(sizeO, input.get_dtype())
+        out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
         func = check_function("diopiAddcdiv")
         ret = func(input.context_handle, out.tensor_handle, input.tensor_handle,
                    tensor1.tensor_handle, tensor2.tensor_handle, value)
@@ -1376,7 +1376,7 @@ def addmm(input, mat1, mat2, beta=1, alpha=1) -> Tensor:
     size1[-1] = size2[-1]
     sizeI = list(input.size())
     sizeO = broadcast_out_size(sizeI, size1)
-    out = Tensor(sizeO, input.get_dtype())
+    out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
     alpha = byref(Scalar(alpha))
     beta = byref(Scalar(beta))
 
@@ -1401,7 +1401,7 @@ def sum(input, dim=None, keepdim=False, dtype=None) -> Tensor:
 
 def max(input, dim=None, keepdim=False):
     if dim is None:
-        out = Tensor([], input.get_dtype())
+        out = Tensor([], input.get_dtype(), None, input.context_handle)
         func = check_function("diopiMaxAll")
         ret = func(input.context_handle, out.tensor_handle, input.tensor_handle)
         check_returncode(ret)
@@ -1413,8 +1413,8 @@ def max(input, dim=None, keepdim=False):
         sizeI[dim] = 1
     else:
         del sizeI[dim]
-    out = Tensor(sizeI, input.get_dtype())
-    indices = Tensor(out.size(), glob_vars.int_type)
+    out = Tensor(sizeI, input.get_dtype(), None, input.context_handle)
+    indices = Tensor(out.size(), glob_vars.int_type, None, input.context_handle)
 
     func = check_function("diopiMax")
     ret = func(input.context_handle, out.tensor_handle, indices.tensor_handle,
@@ -1427,7 +1427,7 @@ def max(input, dim=None, keepdim=False):
 
 def any(input, dim=None, keepdim=False) -> Tensor:
     if dim is None:
-        out = Tensor([], Dtype.bool)
+        out = Tensor([], Dtype.bool, None, input.context_handle)
         dim = c_void_p()
     else:
         assert isinstance(dim, int), "dim should be int"
@@ -1441,7 +1441,7 @@ def any(input, dim=None, keepdim=False) -> Tensor:
 
 def all(input, dim=None, keepdim=False) -> Tensor:
     if dim is None:
-        out = Tensor([], Dtype.bool)
+        out = Tensor([], Dtype.bool, None, input.context_handle)
         dim = c_void_p()
     else:
         assert isinstance(dim, int), "dim should be int"
@@ -1465,9 +1465,9 @@ def nll_loss(input, target, weight=None, ignore_index=-100, reduction='mean'):
         weight = c_void_p()
 
     if reduction == 'none':
-        out = Tensor(target.size(), input.get_dtype())
+        out = Tensor(target.size(), input.get_dtype(), None, input.context_handle)
     else:
-        out = Tensor((), input.get_dtype())
+        out = Tensor((), input.get_dtype(), None, input.context_handle)
 
     reduction_mode = convert_reduction(reduction)
     func = check_function("diopiNLLLoss")
@@ -1486,7 +1486,7 @@ def sigmoid_focal_loss(inputs, targets, alpha=0.25, gamma=2, reduction='none') -
     if reduction == 'none':
         out = raw_like(inputs)
     else:
-        out = Tensor((), inputs.get_dtype())
+        out = Tensor((), inputs.get_dtype(), None, inputs.context_handle)
 
     reduction_mode = convert_reduction(reduction)
     func = check_function("diopiSigmoidFocalLoss")
@@ -1531,7 +1531,7 @@ def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1, a
     sizeI[-2] = output_size[-2]
 
     nhwc_stride = compute_nhwc_stride_2d(sizeI) if glob_vars.nhwc else None
-    out = Tensor(sizeI, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeI, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
     func = check_function("diopiRoiAlign")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle,
                boxes.tensor_handle, c_double(spatial_scale), c_int64(output_size[-2]),
@@ -1544,7 +1544,7 @@ def slice_op(input, dim, index) -> Tensor:
     sizeI = list(input.size())
     num = int((index.stop - index.start + index.step - 1) / index.step)
     sizeI[dim] = num
-    out = Tensor(sizeI, input.get_dtype())
+    out = Tensor(sizeI, input.get_dtype(), None, input.context_handle)
 
     func = check_function("diopiSlice")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle,
@@ -2054,7 +2054,7 @@ def arange(end, start=0, step=1, dtype=None) -> Tensor:
             dtype = glob_vars.int_type
 
     numel = int((end - start) / step)
-    out = Tensor((numel,), dtype)
+    out = Tensor((numel,), dtype, None, default_context.context_handle)
 
     func = check_function("diopiArange")
     ret = func(out.context_handle, out.tensor_handle, byref(Scalar(start)), byref(Scalar(end)), byref(Scalar(step)))
@@ -2065,7 +2065,7 @@ def arange(end, start=0, step=1, dtype=None) -> Tensor:
 def randperm(n: int, dtype=None) -> Tensor:
     dtype = glob_vars.int_type if dtype is None else dtype
     numel = n
-    out = Tensor((numel,), dtype)
+    out = Tensor((numel,), dtype, None, default_context.context_handle)
 
     func = check_function("diopiRandperm")
     ret = func(out.context_handle, out.tensor_handle, c_int64(n), c_int64(0))
@@ -2213,7 +2213,7 @@ def conv_transpose2d(input, weight, bias=None, stride=1,
     output_padding = Sizes(tuple(output_padding))
     dilation = Sizes(tuple(dilation))
 
-    out = Tensor(sizeO, input.get_dtype())
+    out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
     func = check_function("diopiConvTranspose2d")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle,
                weight.tensor_handle, bias, stride, padding, output_padding, c_int64(groups), dilation)
@@ -2227,7 +2227,7 @@ def cumsum(input, dim, dtype=None):
     sizeI = list(input.size())
     assert dim < len(sizeI), "dim out of index"
 
-    out = Tensor(input.size(), promote_type(input, Dtype.int64)) if dtype is None else Tensor(input.size(), dtype)
+    out = Tensor(input.size(), promote_type(input, Dtype.int64), None, input.context_handle) if dtype is None else Tensor(input.size(), dtype, None, input.context_handle)
     func = check_function("diopiCumsum")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, c_int64(dim))
     check_returncode(ret)
@@ -2256,7 +2256,7 @@ def cdist(x1, x2, p, compute_mode=None):
             "size1 and size2 must be broadcastable"
         sizeO[i] = sizeX1[i] if sizeX2[i] == 1 else sizeX2[i]
     sizeO[-1] = sizeX2[-2]
-    out = Tensor(sizeO, x1.get_dtype())
+    out = Tensor(sizeO, x1.get_dtype(), None, x1.context_handle)
     func = check_function("diopiCdist")
     ret = func(x1.context_handle, out.tensor_handle, x1.tensor_handle, x2.tensor_handle, c_double(p), compute_mode)
     check_returncode(ret)
@@ -2288,7 +2288,7 @@ def reciprocal(input, inplace=False) -> Tensor:
         func = check_function(call)
         ret = func(input.context_handle, input.tensor_handle)
     else:
-        out = Tensor(input.size(), promote_type(input, Dtype.float32))
+        out = Tensor(input.size(), promote_type(input, Dtype.float32), None, input.context_handle)
         func = check_function(call)
         ret = func(input.context_handle, out.tensor_handle, input.tensor_handle)
 
@@ -2339,7 +2339,7 @@ def argmax(input, dim=None, keepdim=False):
         sizeO = [1]
         dim = c_void_p()
 
-    out = Tensor(sizeO, glob_vars.int_type)
+    out = Tensor(sizeO, glob_vars.int_type, None, input.context_handle)
     func = check_function("diopiArgmax")
     # todo: check the reason of using keepdim
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, dim, c_bool(keepdim))
@@ -2357,7 +2357,7 @@ def smooth_l1_loss(input, target, reduction='mean', beta=1.0):
     if reduction == 'none':
         out = raw_like(input)
     else:
-        out = Tensor((), input.get_dtype())
+        out = Tensor((), input.get_dtype(), None, input.context_handle)
 
     reduction_mode = convert_reduction(reduction)
     func = check_function("diopiSmoothL1Loss")
@@ -2381,7 +2381,7 @@ def smooth_l1_loss_backward(input, grad_outputs, target, reduction='mean', beta=
 
 def maximum(input, other) -> Tensor:
     size = broadcast_out_size(list(input.size()), list(other.size()))
-    out = Tensor(size, common_dtype(input, other))
+    out = Tensor(size, common_dtype(input, other), None, input.context_handle)
 
     func = check_function("diopiMaximum")
     ret = func(input.context_handle, out.tensor_handle,
@@ -2392,7 +2392,7 @@ def maximum(input, other) -> Tensor:
 
 def minimum(input, other) -> Tensor:
     size = broadcast_out_size(list(input.size()), list(other.size()))
-    out = Tensor(size, common_dtype(input, other))
+    out = Tensor(size, common_dtype(input, other), None, input.context_handle)
 
     func = check_function("diopiMinimum")
     ret = func(input.context_handle, out.tensor_handle,
@@ -2410,7 +2410,7 @@ def mm(input, mat2) -> Tensor:
 
     size_out = size1
     size_out[1] = size2[1]
-    out = Tensor(size_out, input.get_dtype())
+    out = Tensor(size_out, input.get_dtype(), None, input.context_handle)
 
     func = check_function("diopiMm")
     ret = func(input.context_handle, out.tensor_handle,
@@ -2453,7 +2453,7 @@ def conv3d(input, weight, bias=None, stride=1,
     dilation = Sizes(tuple(dilation))
 
     nhwc_stride = compute_nhwc_stride_3d(sizeO) if glob_vars.nhwc else None
-    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
     func = check_function("diopiConvolution3d")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle,
                weight.tensor_handle, bias, stride, padding, dilation, c_int64(groups))
@@ -2518,7 +2518,7 @@ def expand(input, size) -> Tensor:
     if len(size) > len(SizeI):
         assert size[0] >= 0, "the size of new dimension can't be negative"
 
-    out = Tensor(size, input.get_dtype())
+    out = Tensor(size, input.get_dtype(), None, input.context_handle)
 
     func = check_function("diopiExpand")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle)
@@ -2531,7 +2531,7 @@ def unfold(input, dimension, size, step):
     sizeO[dimension] = int((sizeO[dimension] - size) / step + 1)
     sizeO.append(size)
 
-    out = Tensor(sizeO, input.get_dtype())
+    out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
     func = check_function("diopiUnfold")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, c_int64(dimension), c_int64(size), c_int64(step))
     check_returncode(ret)
@@ -2606,7 +2606,7 @@ def index_fill(input, dim, index, value, inplace=False) -> Tensor:
 def linspace(start, end, steps, dtype=None):
     dtype = Dtype.float32 if dtype is None else dtype
 
-    out = Tensor((steps, ), dtype)
+    out = Tensor((steps, ), dtype, None, default_context.context_handle)
 
     start = byref(Scalar(start))
     end = byref(Scalar(end))
@@ -2647,7 +2647,7 @@ def norm(input, p, dim=None, keepdim=False, dtype=None):
 
 def group_norm(input, num_groups, weight=None, bias=None, eps=1e-05, backward=False):
     dim = list(input.size())
-    save_mean = Tensor((dim[0], num_groups), input.get_dtype())
+    save_mean = Tensor((dim[0], num_groups), input.get_dtype(), None, input.context_handle)
     save_invstd = raw_like(save_mean)
 
     weight = c_void_p() if weight is None else weight.tensor_handle
@@ -2685,7 +2685,7 @@ def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-05, backw
     sizeI = input.size()
     dims = len(sizeI) - len(normalized_shape)
     size = [i for i in sizeI[0:dims]]
-    save_mean = Tensor(size, input.get_dtype())
+    save_mean = Tensor(size, input.get_dtype(), None, input.context_handle)
     save_invstd = raw_like(save_mean)
 
     weight = c_void_p() if weight is None else weight.tensor_handle
@@ -2752,7 +2752,7 @@ def adaptive_avg_pool3d(input, output_size):
             sizeO.append(output_size[i])
 
     nhwc_stride = compute_nhwc_stride_3d(sizeO) if glob_vars.nhwc else None
-    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
     output_size = Sizes((sizeO[-3], sizeO[-2], sizeO[-1]))
 
     func = check_function("diopiAdaptiveAvgPool3d")
@@ -2793,13 +2793,13 @@ def adaptive_max_pool3d(input, output_size, return_indices=False):
             sizeO.append(output_size[i])
 
     nhwc_stride = compute_nhwc_stride_3d(sizeO) if glob_vars.nhwc else None
-    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
     output_size = Sizes(tuple(output_size))
 
     if return_indices:
         func = check_function("diopiAdaptiveMaxPool3dWithIndices")
         nhwc_stride = compute_nhwc_stride_3d(sizeO) if glob_vars.nhwc else None
-        indices = Tensor(sizeO, glob_vars.int_type, stride=nhwc_stride)
+        indices = Tensor(sizeO, glob_vars.int_type, stride=nhwc_stride, context_handle=input.context_handle)
         ret = func(input.context_handle, out.tensor_handle, indices.tensor_handle,
                    input.tensor_handle, output_size)
         check_returncode(ret)
@@ -2857,7 +2857,7 @@ def max_pool3d(input, kernel_size, stride=None, padding=0, dilation=1,
     padding = Sizes(tuple(padding))
     kernel_size = Sizes(tuple(kernel_size))
     dilation = Sizes(tuple(dilation))
-    out = Tensor(sizeO, input.get_dtype())
+    out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
 
     if not return_indices:
         func = check_function("diopiMaxPool3d")
@@ -2913,7 +2913,7 @@ def permute(input, dims=None) -> Tensor:
     sizeO = list(input.size())
     for i in range(len(dims)):
         sizeO[i] = sizeI[dims[i]]
-    out = Tensor(sizeO, input.get_dtype())
+    out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
     dims = Sizes(tuple(dims))
     func = check_function("diopiPermute")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, dims)
@@ -2931,7 +2931,7 @@ def copy_(input, other) -> Tensor:
 def gather(input, dim, index):
     assert isinstance(dim, int), "dim must be int"
     assert len(input.size()) == len(index.size()), "input and index must have the same number of dimensions"
-    out = Tensor(index.size(), input.get_dtype())
+    out = Tensor(index.size(), input.get_dtype(), None, input.context_handle)
     func = check_function("diopiGather")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, c_int64(dim), index.tensor_handle)
     check_returncode(ret)
@@ -2966,20 +2966,20 @@ def remainder(other, input=None, self=None):
                     if sizeO[i] == 1:
                         sizeO[i] = sizeOther[i]
             out_dtype = common_dtype(input, other)
-            out = Tensor(sizeO, out_dtype)
+            out = Tensor(sizeO, out_dtype, None, input.context_handle)
             input = input.tensor_handle
             other = other.tensor_handle
         else:
             call += "Scalar"
             out_dtype = common_dtype(input, other)
-            out = Tensor(input.size(), out_dtype)
+            out = Tensor(input.size(), out_dtype, None, default_context.context_handle)
             other = byref(Scalar(other))
             input = input.tensor_handle
     else:
         assert isinstance(other, Tensor), "input or other must be tensor"
         context = other.context_handle
         out_dtype = common_dtype(input, other)
-        out = Tensor(other.size(), out_dtype)
+        out = Tensor(other.size(), out_dtype, None, other.context_handle)
         input = byref(Scalar(input))
         other = other.tensor_handle
     func = check_function(call)
@@ -2996,8 +2996,8 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
     max_target_length = 2 * max_target_length + 1
     if reduction == 'none':
         sizeO = (sizeI[1], )
-    neg_log_likelihood = Tensor((sizeI[1], ), log_probs.get_dtype())
-    log_alpha = Tensor((sizeI[1], sizeI[0], max_target_length), log_probs.get_dtype())
+    neg_log_likelihood = Tensor((sizeI[1], ), log_probs.get_dtype(), None, default_context.context_handle)
+    log_alpha = Tensor((sizeI[1], sizeI[0], max_target_length, None, default_context.context_handle), log_probs.get_dtype())
     out = Tensor(sizeO, log_probs.get_dtype())
 
     func = check_function("diopiCTCLoss")
@@ -3108,7 +3108,7 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
             sizeI[i] = int(scale_factor[i - 2] * sizeI[i])
 
     nhwc_stride = compute_nhwc_stride(sizeI) if glob_vars.nhwc else None
-    out = Tensor(sizeI, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeI, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
 
     c_size = Sizes(tuple(sizeI[2:]))
     if mode == "nearest":
@@ -3161,7 +3161,7 @@ def pad(input, pad, mode='constant', value=None):
         value = byref(c_double(value))
 
     nhwc_stride = compute_nhwc_stride(sizeO) if glob_vars.nhwc else None
-    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride)
+    out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride, context_handle=input.context_handle)
     func = check_function("diopiPad")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, pad,
                mode.encode('UTF-8'), value)
@@ -3175,7 +3175,7 @@ def unique(input, sorted=True, return_inverse=False, return_counts=False, dim=No
         sizeI = list(input.size())
         if dim is not None:
             sizeI = (sizeI[dim], )
-        indices = Tensor(sizeI, glob_vars.int_type)
+        indices = Tensor(sizeI, glob_vars.int_type, None, input.context_handle)
         indices_handle = indices.tensor_handle
     else:
         indices_handle = c_void_p()
@@ -3293,7 +3293,7 @@ def im2col(input, kernel_size, dilation=1, padding=0, stride=1) -> Tensor:
     kernel_size = Sizes(tuple(kernel_size))
     dilation = Sizes(tuple(dilation))
 
-    out = Tensor(sizeO, input.get_dtype())
+    out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
     func = check_function("diopiIm2Col")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, kernel_size,
                dilation, padding, stride)
@@ -3326,7 +3326,7 @@ def col2im(input, output_size, kernel_size, dilation=1, padding=0, stride=1) ->
     kernel_size = Sizes(tuple(kernel_size))
     dilation = Sizes(tuple(dilation))
 
-    out = Tensor(sizeO, input.get_dtype())
+    out = Tensor(sizeO, input.get_dtype(), None, input.context_handle)
     func = check_function("diopiCol2Im")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, output_size, kernel_size,
                dilation, padding, stride)
@@ -3347,7 +3347,7 @@ def cholesky_ex(input, upper=False, check_errors=False):
     out = raw_like(input)
     sizeI = input.size()
     nums = sizeI[0:-2] if len(sizeI) > 2 else ()
-    info = Tensor(nums, Dtype.int32)
+    info = Tensor(nums, Dtype.int32, None, input.context_handle)
     func = check_function("diopiCholesky")
     ret = func(input.context_handle, out.tensor_handle, info.tensor_handle, input.tensor_handle, c_bool(upper), c_bool(check_errors))
     check_returncode(ret)
@@ -3368,9 +3368,9 @@ def triangular_solve(input, A, upper=True, transpose=False, unitriangular=False)
     sizeI = list(input.size())
     sizeO = sizeA if len(sizeA) > len(sizeI) else sizeI
     sizeO[-1] = sizeI[-1]
-    out = Tensor(sizeO, A.get_dtype())
+    out = Tensor(sizeO, A.get_dtype(), None, A.context_handle)
     sizeO[-1] = sizeA[-1]
-    cloned_mat = Tensor(sizeO, A.get_dtype())
+    cloned_mat = Tensor(sizeO, A.get_dtype(), None, A.context_handle)
     func = check_function("diopiTriangularSolve")
     ret = func(input.context_handle, out.tensor_handle, cloned_mat.tensor_handle, input.tensor_handle,
                A.tensor_handle, c_bool(upper), c_bool(transpose), c_bool(unitriangular))
@@ -3408,7 +3408,7 @@ def repeat(input, repeats):
     sizeO = Sizes(output_size)
     repeats_size = Sizes(repeats)
 
-    out = Tensor(output_size, input.get_dtype())
+    out = Tensor(output_size, input.get_dtype(), None, input.context_handle)
     func = check_function("diopiRepeat")
     ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, repeats_size)
     check_returncode(ret)
@@ -3419,17 +3419,17 @@ def normal(mean, std, size=None):
     call = "diopiNormal"
     if isinstance(mean, Tensor) and isinstance(std, Tensor):
         assert mean.numel() == std.numel(), 'the total number of elements in each tensor need to be the same.'
-        out = Tensor(mean.size(), mean.get_dtype())
+        out = Tensor(mean.size(), mean.get_dtype(), None, mean.context_handle)
         call += "Tensor"
     elif isinstance(mean, Tensor):
-        out = Tensor(mean.size(), mean.get_dtype())
+        out = Tensor(mean.size(), mean.get_dtype(), None, mean.context_handle)
         call += "TensorScalar"
     elif isinstance(std, Tensor):
-        out = Tensor(std.size(), std.get_dtype())
+        out = Tensor(std.size(), std.get_dtype(), None, std.context_handle)
         call += "ScalarTensor"
     else:
         assert size is not None, "need the shape of output while both mean and std are scalar"
-        out = Tensor(size, Dtype.float32)
+        out = Tensor(size, Dtype.float32, None, default_context.context_handle)
 
     arg_mean = mean.tensor_handle if isinstance(mean, Tensor) else c_double(mean)
     arg_std = std.tensor_handle if isinstance(std, Tensor) else c_double(std)
@@ -3460,7 +3460,7 @@ def meshgrid(tensors, shape=None):
         c_tensors.append(tensor.tensor_handle)
         dims.append(tensor.size()[0])
     c_tensors = (c_void_p * inputsNum)(*c_tensors)
-    out = [Tensor(dims, tensors[0].get_dtype()) for i in range(inputsNum)]
+    out = [Tensor(dims, tensors[0].get_dtype(), None, tensors[0].context_handle) for i in range(inputsNum)]
     for tensor in out:
         co_tensors.append(tensor.tensor_handle)
     co_tensors = (c_void_p * inputsNum)(*co_tensors)
diff --git a/python/conformance/diopi_runtime.py b/python/conformance/diopi_runtime.py
index 5131cdc..aaeccbe 100644
--- a/python/conformance/diopi_runtime.py
+++ b/python/conformance/diopi_runtime.py
@@ -209,8 +209,8 @@ def __init__(
         self,
         size,
         dtype,
-        stride=None,
-        context_handle=default_context.get_handle(),
+        stride,
+        context_handle,
         tensor_handle=None,
     ):
         if tensor_handle is not None and size is None:
@@ -237,7 +237,7 @@ def from_handle(cls, tensor_handle):
         ctx_handle = ContextHandle()
         diopirt_lib._diopiTensorGetCtxHandle(tensor_handle, byref(ctx_handle))
         skip_tensors_for_del_in_ctx.add(ctx_handle.value)
-        return cls(size=None, dtype=None, context_handle=ctx_handle, tensor_handle=tensor_handle)
+        return cls(size=None, dtype=None, stride=None, context_handle=ctx_handle, tensor_handle=tensor_handle)
 
     def __del__(self):
         # skip the deletion because of maybe causing double destructions

From a8cf5dcd2a5d425ef0adefb5b9ec981a038d01e2 Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Sun, 23 Apr 2023 13:22:57 +0800
Subject: [PATCH 14/15] fix bug

---
 python/conformance/diopi_functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/conformance/diopi_functions.py b/python/conformance/diopi_functions.py
index 174dc9c..bc70753 100644
--- a/python/conformance/diopi_functions.py
+++ b/python/conformance/diopi_functions.py
@@ -2868,7 +2868,7 @@ def max_pool3d(input, kernel_size, stride=None, padding=0, dilation=1,
         return out
     else:
         func = check_function("diopiMaxPool3dWithIndices")
-        indices = Tensor(sizeO, glob_vars.int_type)
+        indices = Tensor(sizeO, glob_vars.int_type, None, default_context.context_handle)
         ret = func(input.context_handle, out.tensor_handle,
                    indices.tensor_handle, input.tensor_handle,
                    kernel_size, stride, padding, dilation, c_bool(ceil_mode))
@@ -2998,7 +2998,7 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
         sizeO = (sizeI[1], )
     neg_log_likelihood = Tensor((sizeI[1], ), log_probs.get_dtype(), None, default_context.context_handle)
     log_alpha = Tensor((sizeI[1], sizeI[0], max_target_length, None, default_context.context_handle), log_probs.get_dtype())
-    out = Tensor(sizeO, log_probs.get_dtype())
+    out = Tensor(sizeO, log_probs.get_dtype(), None, default_context.context_handle)
 
     func = check_function("diopiCTCLoss")
     ret = func(log_probs.context_handle, out.tensor_handle, neg_log_likelihood.tensor_handle,

From 3e149bab59d5747dc4db011d02f1a33b04acd09e Mon Sep 17 00:00:00 2001
From: yangbofun <yangbofun@163.com>
Date: Sun, 23 Apr 2023 15:46:16 +0800
Subject: [PATCH 15/15] fix bug

---
 python/conformance/diopi_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/conformance/diopi_functions.py b/python/conformance/diopi_functions.py
index bb5230a..a455541 100644
--- a/python/conformance/diopi_functions.py
+++ b/python/conformance/diopi_functions.py
@@ -3068,7 +3068,7 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
     if reduction == 'none':
         sizeO = (sizeI[1], )
     neg_log_likelihood = Tensor((sizeI[1], ), log_probs.get_dtype(), None, default_context.context_handle)
-    log_alpha = Tensor((sizeI[1], sizeI[0], max_target_length, None, default_context.context_handle), log_probs.get_dtype())
+    log_alpha = Tensor((sizeI[1], sizeI[0], max_target_length), log_probs.get_dtype(), None, default_context.context_handle)
     out = Tensor(sizeO, log_probs.get_dtype(), None, default_context.context_handle)
 
     func = check_function("diopiCTCLoss")