From 22e2b29750d5fdf6e2236e2ce24958838773dbd2 Mon Sep 17 00:00:00 2001
From: Evangelos Katralis <vkatralis@gmail.com>
Date: Wed, 10 Dec 2025 01:10:11 +0100
Subject: [PATCH 1/6] Add support for ROCm in ContextCupy

---
 tests/test_shared_memory.py  |  2 +-
 xobjects/context_cupy.py     |  7 +++++--
 xobjects/headers/atomicadd.h | 11 ++++++++++-
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/tests/test_shared_memory.py b/tests/test_shared_memory.py
index 31156d6..b010cdd 100644
--- a/tests/test_shared_memory.py
+++ b/tests/test_shared_memory.py
@@ -47,7 +47,7 @@ class TestElement(xo.HybridClass):
                   unsigned int gid = blockIdx.x*blockDim.x + threadIdx.x;  // global thread ID: 0,1,2,3
 
                   // init shared memory with chunk of input array
-                  extern __shared__ double sdata[2];
+                  extern __shared__ double sdata[];
                   sdata[tid] = input_arr[gid];
                   __syncthreads();
 
diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py
index 3838376..4de785b 100644
--- a/xobjects/context_cupy.py
+++ b/xobjects/context_cupy.py
@@ -352,15 +352,18 @@ def __invert__(self):
 
 cudaheader: List[SourceType] = [
     """\
-typedef signed long long   int64_t;  //only_for_context cuda
 typedef signed int         int32_t;  //only_for_context cuda
 typedef signed short       int16_t;  //only_for_context cuda
 typedef signed char        int8_t;   //only_for_context cuda
-typedef unsigned long long uint64_t; //only_for_context cuda
 typedef unsigned int       uint32_t; //only_for_context cuda
 typedef unsigned short     uint16_t; //only_for_context cuda
 typedef unsigned char      uint8_t;  //only_for_context cuda
 
+#if defined(__CUDACC__) && !defined(__HIPCC__)
+typedef signed long long   int64_t;
+typedef unsigned long long uint64_t;
+#endif
+
 """
 ]
 
diff --git a/xobjects/headers/atomicadd.h b/xobjects/headers/atomicadd.h
index 6782705..6e40d4a 100644
--- a/xobjects/headers/atomicadd.h
+++ b/xobjects/headers/atomicadd.h
@@ -101,7 +101,7 @@ DEF_ATOMIC_ADD(double  , f64)
 // -------------------------------------------
 #if defined(XO_CONTEXT_CUDA)
     // CUDA compiler may not have <stdint.h>, so define the types if needed.
-    #ifdef __CUDACC_RTC__
+    #if defined(__CUDACC_RTC__) && !defined(__HIPCC__)
         // NVRTC (CuPy RawModule default) can’t see <stdint.h>, so detect it via __CUDACC_RTC__
         typedef signed char        int8_t;
         typedef short              int16_t;
@@ -111,6 +111,15 @@ DEF_ATOMIC_ADD(double  , f64)
         typedef unsigned short     uint16_t;
         typedef unsigned int       uint32_t;
         typedef unsigned long long uint64_t;
+    #elif defined(__HIPCC__) && !defined(__CUDACC_RTC__)
+        typedef signed char        int8_t;
+        typedef short              int16_t;
+        typedef int                int32_t;
+        // typedef long long          int64_t;
+        typedef unsigned char      uint8_t;
+        typedef unsigned short     uint16_t;
+        typedef unsigned int       uint32_t;
+        // typedef unsigned long long uint64_t;
     #else
         // Alternatively, NVCC path is fine with host headers
         #include <stdint.h>

From 4f87acbb33a0cd372737b957608c313f74a7e439 Mon Sep 17 00:00:00 2001
From: Evangelos Katralis <vkatralis@gmail.com>
Date: Wed, 10 Dec 2025 09:55:26 +0100
Subject: [PATCH 2/6] Add definition for NULL, missing for ROCm

---
 xobjects/context_cupy.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py
index 4de785b..5ba68e6 100644
--- a/xobjects/context_cupy.py
+++ b/xobjects/context_cupy.py
@@ -364,6 +364,10 @@ def __invert__(self):
 typedef unsigned long long uint64_t;
 #endif
 
+#ifndef NULL
+  #define NULL nullptr
+#endif
+
 """
 ]
 

From 19258bdf406e47b157c2d0c5a8f4d7e718168e08 Mon Sep 17 00:00:00 2001
From: Evangelos Katralis <vkatralis@gmail.com>
Date: Mon, 15 Dec 2025 14:47:58 +0100
Subject: [PATCH 3/6] Clean up and add comments

---
 xobjects/headers/atomicadd.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/xobjects/headers/atomicadd.h b/xobjects/headers/atomicadd.h
index 6e40d4a..89e4f83 100644
--- a/xobjects/headers/atomicadd.h
+++ b/xobjects/headers/atomicadd.h
@@ -112,14 +112,13 @@ DEF_ATOMIC_ADD(double  , f64)
         typedef unsigned int       uint32_t;
         typedef unsigned long long uint64_t;
     #elif defined(__HIPCC__) && !defined(__CUDACC_RTC__)
+        // ROCm appears to have definitions for 64-bit int types
         typedef signed char        int8_t;
         typedef short              int16_t;
         typedef int                int32_t;
-        // typedef long long          int64_t;
         typedef unsigned char      uint8_t;
         typedef unsigned short     uint16_t;
         typedef unsigned int       uint32_t;
-        // typedef unsigned long long uint64_t;
     #else
         // Alternatively, NVCC path is fine with host headers
         #include <stdint.h>

From 947e0f267a3ef374523793fa68fa7de04fe3e10f Mon Sep 17 00:00:00 2001
From: Evangelos Katralis <vkatralis@gmail.com>
Date: Sat, 17 Jan 2026 00:33:55 +0100
Subject: [PATCH 4/6] Fix headers on HIPRTC

---
 xobjects/context_cupy.py     | 2 +-
 xobjects/headers/atomicadd.h | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py
index 5ba68e6..5bc9dc0 100644
--- a/xobjects/context_cupy.py
+++ b/xobjects/context_cupy.py
@@ -359,7 +359,7 @@ def __invert__(self):
 typedef unsigned short     uint16_t; //only_for_context cuda
 typedef unsigned char      uint8_t;  //only_for_context cuda
 
-#if defined(__CUDACC__) && !defined(__HIPCC__)
+#if defined(__CUDACC__) || defined(__HIPCC_RTC__)
 typedef signed long long   int64_t;
 typedef unsigned long long uint64_t;
 #endif
diff --git a/xobjects/headers/atomicadd.h b/xobjects/headers/atomicadd.h
index 89e4f83..ab4f1f3 100644
--- a/xobjects/headers/atomicadd.h
+++ b/xobjects/headers/atomicadd.h
@@ -101,8 +101,9 @@ DEF_ATOMIC_ADD(double  , f64)
 // -------------------------------------------
 #if defined(XO_CONTEXT_CUDA)
     // CUDA compiler may not have <stdint.h>, so define the types if needed.
-    #if defined(__CUDACC_RTC__) && !defined(__HIPCC__)
-        // NVRTC (CuPy RawModule default) can’t see <stdint.h>, so detect it via __CUDACC_RTC__
+    #if defined(__CUDACC_RTC__) || defined(__HIPCC_RTC__)
+        // NVRTC and HIPRTC (CuPy RawModule default) can’t see <stdint.h> 
+        // We detect via __CUDACC_RTC__ (Nvidia) or __HIPCC_RTC__ (ROCm)
         typedef signed char        int8_t;
         typedef short              int16_t;
         typedef int                int32_t;
@@ -111,8 +112,8 @@ DEF_ATOMIC_ADD(double  , f64)
         typedef unsigned short     uint16_t;
         typedef unsigned int       uint32_t;
         typedef unsigned long long uint64_t;
-    #elif defined(__HIPCC__) && !defined(__CUDACC_RTC__)
-        // ROCm appears to have definitions for 64-bit int types
+    #elif defined(__HIPCC__) && !defined(__HIPCC_RTC__)
+        // ROCm-HIPCC compiler appears to have definitions for 64-bit int types
         typedef signed char        int8_t;
         typedef short              int16_t;
         typedef int                int32_t;

From ebc7fa47b06d8f90d5602c175a180b194e2a02a8 Mon Sep 17 00:00:00 2001
From: Evangelos Katralis <vkatralis@gmail.com>
Date: Mon, 19 Jan 2026 10:02:38 +0100
Subject: [PATCH 5/6] Fix formatting for black in CI

---
 examples/ex_unionref_method.py        | 18 ++++++------------
 examples/kernel_basics/kernel_cffi.py |  6 ++----
 examples/sixtracklib.py               |  1 -
 tests/notest_capi.py                  | 15 +++------------
 tests/test_capi.py                    |  4 +---
 tests/test_common.py                  |  6 ++----
 tests/test_kernel.py                  |  6 ++----
 tests/test_ref.py                     | 18 ++++++------------
 tests/test_shared_memory.py           |  6 ++----
 tests/test_struct.py                  | 12 ++++--------
 xobjects/capi.py                      |  6 ++----
 xobjects/context_cupy.py              |  6 ++----
 xobjects/context_pyopencl.py          |  6 ++----
 xobjects/scalar.py                    |  1 -
 xobjects/struct.py                    |  1 +
 15 files changed, 35 insertions(+), 77 deletions(-)

diff --git a/examples/ex_unionref_method.py b/examples/ex_unionref_method.py
index 8eac05b..05d71cb 100644
--- a/examples/ex_unionref_method.py
+++ b/examples/ex_unionref_method.py
@@ -10,30 +10,26 @@ class Triangle(xo.Struct):
     b = xo.Float64
     h = xo.Float64
 
-    _extra_c_sources = [
-        """
+    _extra_c_sources = ["""
     /*gpufun*/
     double Triangle_compute_area(Triangle tr, double scale){
         double b = Triangle_get_b(tr);
         double h = Triangle_get_h(tr);
         return 0.5*b*h*scale;
     }
-    """
-    ]
+    """]
 
 
 class Square(xo.Struct):
     a = xo.Float64
 
-    _extra_c_sources = [
-        """
+    _extra_c_sources = ["""
     /*gpufun*/
     double Square_compute_area(Square sq, double scale){
         double a = Square_get_a(sq);
         return a*a*scale;
     }
-    """
-    ]
+    """]
 
 
 class Base(xo.UnionRef):
@@ -52,8 +48,7 @@ class Prism(xo.Struct):
     height = xo.Float64
     volume = xo.Float64
 
-    _extra_c_sources = [
-        """
+    _extra_c_sources = ["""
     /*gpukern*/
     void Prism_compute_volume(Prism pr){
         Base base = Prism_getp_base(pr);
@@ -61,8 +56,7 @@ class Prism(xo.Struct):
         double base_area = Base_compute_area(base, 3.);
         Prism_set_volume(pr, base_area*height);
     }
-    """
-    ]
+    """]
 
 
 context = xo.ContextCpu()
diff --git a/examples/kernel_basics/kernel_cffi.py b/examples/kernel_basics/kernel_cffi.py
index 8febef6..956a415 100644
--- a/examples/kernel_basics/kernel_cffi.py
+++ b/examples/kernel_basics/kernel_cffi.py
@@ -23,12 +23,10 @@
 }
 """
 
-ffi_interface.cdef(
-    """
+ffi_interface.cdef("""
    void mymul(int n,
        double* x1, double* x2,
-       double* y);"""
-)
+       double* y);""")
 
 
 ffi_interface.set_source(
diff --git a/examples/sixtracklib.py b/examples/sixtracklib.py
index 2389e27..c9c690c 100644
--- a/examples/sixtracklib.py
+++ b/examples/sixtracklib.py
@@ -24,7 +24,6 @@
 array Elements Element :
 """
 
-
 import xobject as xo
 
 
diff --git a/tests/notest_capi.py b/tests/notest_capi.py
index cb30ba6..37b4f49 100644
--- a/tests/notest_capi.py
+++ b/tests/notest_capi.py
@@ -56,21 +56,16 @@ def test_gen_get():
     path = [Multipole.order]
 
     source, _ = capi.gen_method_get(Multipole, path, default_conf)
-    assert (
-        source
-        == """\
+    assert source == """\
 /*gpufun*/ int8_t Multipole_get_order(const Multipole/*restrict*/ obj){
   int64_t offset=0;
   offset+=8;
   return *((/*gpuglmem*/int8_t*) obj+offset);
 }"""
-    )
 
     path = [Multipole.field, Field_N, Field.skew]
     source, _ = capi.gen_method_get(Multipole, path, default_conf)
-    assert (
-        source
-        == """\
+    assert source == """\
 /*gpufun*/ double Multipole_get_field_skew(const Multipole/*restrict*/ obj, int64_t i0){
   int64_t offset=0;
   offset+=32;
@@ -78,7 +73,6 @@ def test_gen_get():
   offset+=8;
   return *(/*gpuglmem*/double*)((/*gpuglmem*/char*) obj+offset);
 }"""
-    )
 
 
 def test_gen_set():
@@ -86,15 +80,12 @@ def test_gen_set():
     path = [Multipole.order]
 
     source, _ = capi.gen_method_set(Multipole, path, default_conf)
-    assert (
-        source
-        == """\
+    assert source == """\
 /*gpufun*/ void Multipole_set_order(Multipole/*restrict*/ obj, int8_t value){
   int64_t offset=0;
   offset+=8;
   *((/*gpuglmem*/int8_t*) obj+offset)=value;
 }"""
-    )
 
 
 def test_gen_c_api():
diff --git a/tests/test_capi.py b/tests/test_capi.py
index 096750d..72f16c1 100644
--- a/tests/test_capi.py
+++ b/tests/test_capi.py
@@ -179,9 +179,7 @@ def test_array_get_shape(test_context, array_type):
             *out_nd = ARRAY_TYPE_nd(arr);
             ARRAY_TYPE_shape(arr, out_shape);
         }
-    """.replace(
-        "ARRAY_TYPE", array_type.__name__
-    )
+    """.replace("ARRAY_TYPE", array_type.__name__)
 
     kernels = {
         "get_nd_and_shape": xo.Kernel(
diff --git a/tests/test_common.py b/tests/test_common.py
index c1a741c..0bbca21 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -87,8 +87,7 @@ def test_atomic(overload, ctype, test_context):
 
     class TestAtomic(xo.Struct):
         val = ctype
-        _extra_c_sources = [
-            f"""
+        _extra_c_sources = [f"""
             #include "xobjects/headers/common.h"
             #include "xobjects/headers/atomicadd.h"
 
@@ -101,8 +100,7 @@ class TestAtomic(xo.Struct):
                     retvals[ii] = ret;
                 END_VECTORIZE;
             }}
-            """
-        ]
+            """]
 
     kernels = {
         "run_atomic_test": xo.Kernel(
diff --git a/tests/test_kernel.py b/tests/test_kernel.py
index 6f47413..e8547bc 100644
--- a/tests/test_kernel.py
+++ b/tests/test_kernel.py
@@ -216,15 +216,13 @@ class TestClass(xo.HybridClass):
             "x": xo.Float64,
             "y": xo.Float64,
         }
-        _extra_c_sources = [
-            """
+        _extra_c_sources = ["""
             /*gpufun*/ double myfun(TestClassData tc){
                 double x = TestClassData_get_x(tc);
                 double y = TestClassData_get_y(tc);
                 return x * y;
             }
-        """
-        ]
+        """]
         _kernels = {
             "myfun": xo.Kernel(
                 args=[
diff --git a/tests/test_ref.py b/tests/test_ref.py
index e36adfa..538f3e3 100644
--- a/tests/test_ref.py
+++ b/tests/test_ref.py
@@ -220,29 +220,25 @@ class Triangle(xo.Struct):
         b = xo.Float64
         h = xo.Float64
 
-        _extra_c_sources = [
-            """
+        _extra_c_sources = ["""
             /*gpufun*/
             double Triangle_compute_area(Triangle tr, double scale){
                 double b = Triangle_get_b(tr);
                 double h = Triangle_get_h(tr);
                 return 0.5*b*h*scale;
             }
-            """
-        ]
+            """]
 
     class Square(xo.Struct):
         a = xo.Float64
 
-        _extra_c_sources = [
-            """
+        _extra_c_sources = ["""
             /*gpufun*/
             double Square_compute_area(Square sq, double scale){
                 double a = Square_get_a(sq);
                 return a*a*scale;
             }
-            """
-        ]
+            """]
 
     class Base(xo.UnionRef):
         _reftypes = (Triangle, Square)
@@ -259,8 +255,7 @@ class Prism(xo.Struct):
         height = xo.Float64
         volume = xo.Float64
 
-        _extra_c_sources = [
-            """
+        _extra_c_sources = ["""
             /*gpukern*/
             void Prism_compute_volume(Prism pr){
                 Base base = Prism_getp_base(pr);
@@ -269,8 +264,7 @@ class Prism(xo.Struct):
                 printf("base_area = %e", base_area);
                 Prism_set_volume(pr, base_area*height);
             }
-            """
-        ]
+            """]
 
     test_context.add_kernels(
         kernels={
diff --git a/tests/test_shared_memory.py b/tests/test_shared_memory.py
index b010cdd..7a62675 100644
--- a/tests/test_shared_memory.py
+++ b/tests/test_shared_memory.py
@@ -35,8 +35,7 @@ def test_shared_memory():
 
         class TestElement(xo.HybridClass):
             _xofields = {}
-            _extra_c_sources = [
-                """
+            _extra_c_sources = ["""
                 __global__ void test_shared_memory(const double* input_arr, double* result, const int n) {
                   // simple kernel to test shared memory
                   // reduction with an array of 4 doubles using 2 blocks each 2 threads
@@ -59,8 +58,7 @@ class TestElement(xo.HybridClass):
                     atomicAdd(&result[tid], sdata[tid]);
                   }
                 }
-                """
-            ]
+                """]
             _kernels = _test_shared_memory_kernels
 
             def __init__(
diff --git a/tests/test_struct.py b/tests/test_struct.py
index 5f62ecb..19f763a 100644
--- a/tests/test_struct.py
+++ b/tests/test_struct.py
@@ -222,8 +222,7 @@ class MyStruct(xo.Struct):
         var_mult_3 = xo.Float64[:]
         var_mult_4 = xo.Float64[:]
 
-        _extra_c_sources = [
-            r"""
+        _extra_c_sources = [r"""
 double mul(MyStruct stru) {
     int32_t n = MyStruct_get_n(stru);
     double* var_mult_1 = MyStruct_getp1_var_mult_1(stru, 0);
@@ -258,8 +257,7 @@ class MyStruct(xo.Struct):
         y+= var_mult_1[tid] * var_mult_2[tid] * var_mult_3[tid] * var_mult_4[tid];
         }
     return y;
-    }"""
-        ]
+    }"""]
 
     kernel_descriptions = {
         "mul": xo.Kernel(
@@ -316,15 +314,13 @@ class TestClass(xo.HybridClass):
             "x": xo.Float64,
             "y": xo.Float64,
         }
-        _extra_c_sources = [
-            """
+        _extra_c_sources = ["""
             /*gpufun*/ double myfun(TestClassData tc){
                 double x = TestClassData_get_x(tc);
                 double y = TestClassData_get_y(tc);
                 return x * y;
             }
-        """
-        ]
+        """]
         _kernels = {
             "myfun": xo.Kernel(
                 args=[
diff --git a/xobjects/capi.py b/xobjects/capi.py
index ec09ea6..af07f9b 100644
--- a/xobjects/capi.py
+++ b/xobjects/capi.py
@@ -525,14 +525,12 @@ def gen_method_switch(cls, path, conf, method):
         for arg in kernel.args[1:]:
             targs.append(f"{arg.name}")
         targs = ",".join(targs)
-        lst.append(
-            f"""\
+        lst.append(f"""\
         #ifndef {refname.upper()}_SKIP_{atname.upper()}
         case {refname}_{atname}_t:
             return {atname}_{method.c_name}({targs});
             break;
-        #endif"""
-        )
+        #endif""")
     lst.append("  }")
     lst.append(f"  return{'' if method.ret is None else ' 0'};")
     lst.append("}")
diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py
index 5bc9dc0..acf7686 100644
--- a/xobjects/context_cupy.py
+++ b/xobjects/context_cupy.py
@@ -350,8 +350,7 @@ def __invert__(self):
             return cupy.ndarray.__invert__(self._as_cupy())
 
 
-cudaheader: List[SourceType] = [
-    """\
+cudaheader: List[SourceType] = ["""\
 typedef signed int         int32_t;  //only_for_context cuda
 typedef signed short       int16_t;  //only_for_context cuda
 typedef signed char        int8_t;   //only_for_context cuda
@@ -368,8 +367,7 @@ def __invert__(self):
   #define NULL nullptr
 #endif
 
-"""
-]
+"""]
 
 
 def nplike_to_cupy(arr):
diff --git a/xobjects/context_pyopencl.py b/xobjects/context_pyopencl.py
index c61e21e..13bac43 100644
--- a/xobjects/context_pyopencl.py
+++ b/xobjects/context_pyopencl.py
@@ -44,8 +44,7 @@
 
 from ._patch_pyopencl_array import _patch_pyopencl_array
 
-openclheader: List[SourceType] = [
-    """\
+openclheader: List[SourceType] = ["""\
 #ifndef XOBJ_STDINT
 typedef long           int64_t;
 typedef int            int32_t;
@@ -59,8 +58,7 @@
 #ifndef NULL
 #define NULL 0L
 #endif
-"""
-]
+"""]
 
 if _enabled:
     # order of base classes matters as it defines which __setitem__ is used
diff --git a/xobjects/scalar.py b/xobjects/scalar.py
index a67c402..ee6f4e0 100644
--- a/xobjects/scalar.py
+++ b/xobjects/scalar.py
@@ -14,7 +14,6 @@
 import logging
 from .typeutils import Info
 
-
 log = logging.getLogger(__name__)
 
 
diff --git a/xobjects/struct.py b/xobjects/struct.py
index 7a1a98c..64b7482 100644
--- a/xobjects/struct.py
+++ b/xobjects/struct.py
@@ -45,6 +45,7 @@
 
 
 """
+
 import logging
 from typing import Callable, Optional
 

From 901dde368d77ab84a6ecb5870d3d0b26805b2e83 Mon Sep 17 00:00:00 2001
From: Evangelos Katralis <vkatralis@gmail.com>
Date: Mon, 19 Jan 2026 10:03:54 +0100
Subject: [PATCH 6/6] Pin black version in CI

---
 .github/workflows/black.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
index b04fb15..42e277a 100644
--- a/.github/workflows/black.yml
+++ b/.github/workflows/black.yml
@@ -7,4 +7,4 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-      - uses: psf/black@stable
+      - uses: psf/black@26.1.0