diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
index b04fb15..42e277a 100644
--- a/.github/workflows/black.yml
+++ b/.github/workflows/black.yml
@@ -7,4 +7,4 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-      - uses: psf/black@stable
+      - uses: psf/black@26.1.0
diff --git a/examples/ex_unionref_method.py b/examples/ex_unionref_method.py
index 8eac05b..05d71cb 100644
--- a/examples/ex_unionref_method.py
+++ b/examples/ex_unionref_method.py
@@ -10,30 +10,26 @@ class Triangle(xo.Struct):
     b = xo.Float64
     h = xo.Float64
 
-    _extra_c_sources = [
-        """
+    _extra_c_sources = ["""
     /*gpufun*/
     double Triangle_compute_area(Triangle tr, double scale){
         double b = Triangle_get_b(tr);
         double h = Triangle_get_h(tr);
         return 0.5*b*h*scale;
     }
-    """
-    ]
+    """]
 
 
 class Square(xo.Struct):
     a = xo.Float64
 
-    _extra_c_sources = [
-        """
+    _extra_c_sources = ["""
     /*gpufun*/
     double Square_compute_area(Square sq, double scale){
         double a = Square_get_a(sq);
         return a*a*scale;
     }
-    """
-    ]
+    """]
 
 
 class Base(xo.UnionRef):
@@ -52,8 +48,7 @@ class Prism(xo.Struct):
     height = xo.Float64
     volume = xo.Float64
 
-    _extra_c_sources = [
-        """
+    _extra_c_sources = ["""
     /*gpukern*/
     void Prism_compute_volume(Prism pr){
         Base base = Prism_getp_base(pr);
@@ -61,8 +56,7 @@ class Prism(xo.Struct):
         double base_area = Base_compute_area(base, 3.);
         Prism_set_volume(pr, base_area*height);
     }
-    """
-    ]
+    """]
 
 
 context = xo.ContextCpu()
diff --git a/examples/kernel_basics/kernel_cffi.py b/examples/kernel_basics/kernel_cffi.py
index 8febef6..956a415 100644
--- a/examples/kernel_basics/kernel_cffi.py
+++ b/examples/kernel_basics/kernel_cffi.py
@@ -23,12 +23,10 @@
 }
 """
 
-ffi_interface.cdef(
-    """
+ffi_interface.cdef("""
    void mymul(int n,
        double* x1, double* x2,
-       double* y);"""
-)
+       double* y);""")
 
 
 ffi_interface.set_source(
diff --git a/examples/sixtracklib.py b/examples/sixtracklib.py
index 2389e27..c9c690c 100644
--- a/examples/sixtracklib.py
+++ b/examples/sixtracklib.py
@@ -24,7 +24,6 @@
 array Elements Element :
 """
 
-
 import xobject as xo
 
 
diff --git a/tests/notest_capi.py b/tests/notest_capi.py
index cb30ba6..37b4f49 100644
--- a/tests/notest_capi.py
+++ b/tests/notest_capi.py
@@ -56,21 +56,16 @@ def test_gen_get():
     path = [Multipole.order]
 
     source, _ = capi.gen_method_get(Multipole, path, default_conf)
-    assert (
-        source
-        == """\
+    assert source == """\
 /*gpufun*/ int8_t Multipole_get_order(const Multipole/*restrict*/ obj){
   int64_t offset=0;
   offset+=8;
   return *((/*gpuglmem*/int8_t*) obj+offset);
 }"""
-    )
 
     path = [Multipole.field, Field_N, Field.skew]
     source, _ = capi.gen_method_get(Multipole, path, default_conf)
-    assert (
-        source
-        == """\
+    assert source == """\
 /*gpufun*/ double Multipole_get_field_skew(const Multipole/*restrict*/ obj, int64_t i0){
   int64_t offset=0;
   offset+=32;
@@ -78,7 +73,6 @@ def test_gen_get():
   offset+=8;
   return *(/*gpuglmem*/double*)((/*gpuglmem*/char*) obj+offset);
 }"""
-    )
 
 
 def test_gen_set():
@@ -86,15 +80,12 @@ def test_gen_set():
     path = [Multipole.order]
 
     source, _ = capi.gen_method_set(Multipole, path, default_conf)
-    assert (
-        source
-        == """\
+    assert source == """\
 /*gpufun*/ void Multipole_set_order(Multipole/*restrict*/ obj, int8_t value){
   int64_t offset=0;
   offset+=8;
   *((/*gpuglmem*/int8_t*) obj+offset)=value;
 }"""
-    )
 
 
 def test_gen_c_api():
diff --git a/tests/test_capi.py b/tests/test_capi.py
index 096750d..72f16c1 100644
--- a/tests/test_capi.py
+++ b/tests/test_capi.py
@@ -179,9 +179,7 @@ def test_array_get_shape(test_context, array_type):
             *out_nd = ARRAY_TYPE_nd(arr);
             ARRAY_TYPE_shape(arr, out_shape);
         }
-    """.replace(
-        "ARRAY_TYPE", array_type.__name__
-    )
+    """.replace("ARRAY_TYPE", array_type.__name__)
 
     kernels = {
         "get_nd_and_shape": xo.Kernel(
diff --git a/tests/test_common.py b/tests/test_common.py
index c1a741c..0bbca21 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -87,8 +87,7 @@ def test_atomic(overload, ctype, test_context):
 
     class TestAtomic(xo.Struct):
         val = ctype
-        _extra_c_sources = [
-            f"""
+        _extra_c_sources = [f"""
             #include "xobjects/headers/common.h"
             #include "xobjects/headers/atomicadd.h"
 
@@ -101,8 +100,7 @@ class TestAtomic(xo.Struct):
                     retvals[ii] = ret;
                 END_VECTORIZE;
             }}
-            """
-        ]
+            """]
 
     kernels = {
         "run_atomic_test": xo.Kernel(
diff --git a/tests/test_kernel.py b/tests/test_kernel.py
index 6f47413..e8547bc 100644
--- a/tests/test_kernel.py
+++ b/tests/test_kernel.py
@@ -216,15 +216,13 @@ class TestClass(xo.HybridClass):
             "x": xo.Float64,
             "y": xo.Float64,
         }
-        _extra_c_sources = [
-            """
+        _extra_c_sources = ["""
             /*gpufun*/ double myfun(TestClassData tc){
                 double x = TestClassData_get_x(tc);
                 double y = TestClassData_get_y(tc);
                 return x * y;
             }
-        """
-        ]
+        """]
         _kernels = {
             "myfun": xo.Kernel(
                 args=[
diff --git a/tests/test_ref.py b/tests/test_ref.py
index e36adfa..538f3e3 100644
--- a/tests/test_ref.py
+++ b/tests/test_ref.py
@@ -220,29 +220,25 @@ class Triangle(xo.Struct):
         b = xo.Float64
         h = xo.Float64
 
-        _extra_c_sources = [
-            """
+        _extra_c_sources = ["""
             /*gpufun*/
             double Triangle_compute_area(Triangle tr, double scale){
                 double b = Triangle_get_b(tr);
                 double h = Triangle_get_h(tr);
                 return 0.5*b*h*scale;
             }
-            """
-        ]
+            """]
 
     class Square(xo.Struct):
         a = xo.Float64
 
-        _extra_c_sources = [
-            """
+        _extra_c_sources = ["""
             /*gpufun*/
             double Square_compute_area(Square sq, double scale){
                 double a = Square_get_a(sq);
                 return a*a*scale;
             }
-            """
-        ]
+            """]
 
     class Base(xo.UnionRef):
         _reftypes = (Triangle, Square)
@@ -259,8 +255,7 @@ class Prism(xo.Struct):
         height = xo.Float64
         volume = xo.Float64
 
-        _extra_c_sources = [
-            """
+        _extra_c_sources = ["""
             /*gpukern*/
             void Prism_compute_volume(Prism pr){
                 Base base = Prism_getp_base(pr);
@@ -269,8 +264,7 @@ class Prism(xo.Struct):
                 printf("base_area = %e", base_area);
                 Prism_set_volume(pr, base_area*height);
             }
-            """
-        ]
+            """]
 
     test_context.add_kernels(
         kernels={
diff --git a/tests/test_shared_memory.py b/tests/test_shared_memory.py
index 31156d6..7a62675 100644
--- a/tests/test_shared_memory.py
+++ b/tests/test_shared_memory.py
@@ -35,8 +35,7 @@ def test_shared_memory():
 
         class TestElement(xo.HybridClass):
             _xofields = {}
-            _extra_c_sources = [
-                """
+            _extra_c_sources = ["""
                 __global__ void test_shared_memory(const double* input_arr, double* result, const int n) {
                   // simple kernel to test shared memory
                   // reduction with an array of 4 doubles using 2 blocks each 2 threads
@@ -47,7 +46,7 @@ class TestElement(xo.HybridClass):
                   unsigned int gid = blockIdx.x*blockDim.x + threadIdx.x;  // global thread ID: 0,1,2,3
 
                   // init shared memory with chunk of input array
-                  extern __shared__ double sdata[2];
+                  extern __shared__ double sdata[];
                   sdata[tid] = input_arr[gid];
                   __syncthreads();
 
@@ -59,8 +58,7 @@ class TestElement(xo.HybridClass):
                     atomicAdd(&result[tid], sdata[tid]);
                   }
                 }
-                """
-            ]
+                """]
             _kernels = _test_shared_memory_kernels
 
             def __init__(
diff --git a/tests/test_struct.py b/tests/test_struct.py
index 5f62ecb..19f763a 100644
--- a/tests/test_struct.py
+++ b/tests/test_struct.py
@@ -222,8 +222,7 @@ class MyStruct(xo.Struct):
         var_mult_3 = xo.Float64[:]
         var_mult_4 = xo.Float64[:]
 
-        _extra_c_sources = [
-            r"""
+        _extra_c_sources = [r"""
 double mul(MyStruct stru) {
     int32_t n = MyStruct_get_n(stru);
     double* var_mult_1 = MyStruct_getp1_var_mult_1(stru, 0);
@@ -258,8 +257,7 @@ class MyStruct(xo.Struct):
         y+= var_mult_1[tid] * var_mult_2[tid] * var_mult_3[tid] * var_mult_4[tid];
         }
     return y;
-    }"""
-        ]
+    }"""]
 
     kernel_descriptions = {
         "mul": xo.Kernel(
@@ -316,15 +314,13 @@ class TestClass(xo.HybridClass):
             "x": xo.Float64,
             "y": xo.Float64,
         }
-        _extra_c_sources = [
-            """
+        _extra_c_sources = ["""
             /*gpufun*/ double myfun(TestClassData tc){
                 double x = TestClassData_get_x(tc);
                 double y = TestClassData_get_y(tc);
                 return x * y;
             }
-        """
-        ]
+        """]
         _kernels = {
             "myfun": xo.Kernel(
                 args=[
diff --git a/xobjects/capi.py b/xobjects/capi.py
index ec09ea6..af07f9b 100644
--- a/xobjects/capi.py
+++ b/xobjects/capi.py
@@ -525,14 +525,12 @@ def gen_method_switch(cls, path, conf, method):
         for arg in kernel.args[1:]:
             targs.append(f"{arg.name}")
         targs = ",".join(targs)
-        lst.append(
-            f"""\
+        lst.append(f"""\
         #ifndef {refname.upper()}_SKIP_{atname.upper()}
         case {refname}_{atname}_t:
             return {atname}_{method.c_name}({targs});
             break;
-        #endif"""
-        )
+        #endif""")
     lst.append("  }")
     lst.append(f"  return{'' if method.ret is None else ' 0'};")
     lst.append("}")
diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py
index 3838376..acf7686 100644
--- a/xobjects/context_cupy.py
+++ b/xobjects/context_cupy.py
@@ -350,19 +350,24 @@ def __invert__(self):
             return cupy.ndarray.__invert__(self._as_cupy())
 
 
-cudaheader: List[SourceType] = [
-    """\
-typedef signed long long   int64_t;  //only_for_context cuda
+cudaheader: List[SourceType] = ["""\
 typedef signed int         int32_t;  //only_for_context cuda
 typedef signed short       int16_t;  //only_for_context cuda
 typedef signed char        int8_t;   //only_for_context cuda
-typedef unsigned long long uint64_t; //only_for_context cuda
 typedef unsigned int       uint32_t; //only_for_context cuda
 typedef unsigned short     uint16_t; //only_for_context cuda
 typedef unsigned char      uint8_t;  //only_for_context cuda
 
-"""
-]
+#if defined(__CUDACC__) || defined(__HIPCC_RTC__)
+typedef signed long long   int64_t;
+typedef unsigned long long uint64_t;
+#endif
+
+#ifndef NULL
+  #define NULL nullptr
+#endif
+
+"""]
 
 
 def nplike_to_cupy(arr):
diff --git a/xobjects/context_pyopencl.py b/xobjects/context_pyopencl.py
index c61e21e..13bac43 100644
--- a/xobjects/context_pyopencl.py
+++ b/xobjects/context_pyopencl.py
@@ -44,8 +44,7 @@
 
 from ._patch_pyopencl_array import _patch_pyopencl_array
 
-openclheader: List[SourceType] = [
-    """\
+openclheader: List[SourceType] = ["""\
 #ifndef XOBJ_STDINT
 typedef long           int64_t;
 typedef int            int32_t;
@@ -59,8 +58,7 @@
 #ifndef NULL
 #define NULL 0L
 #endif
-"""
-]
+"""]
 
 if _enabled:
     # order of base classes matters as it defines which __setitem__ is used
diff --git a/xobjects/headers/atomicadd.h b/xobjects/headers/atomicadd.h
index 6782705..ab4f1f3 100644
--- a/xobjects/headers/atomicadd.h
+++ b/xobjects/headers/atomicadd.h
@@ -101,8 +101,9 @@ DEF_ATOMIC_ADD(double  , f64)
 // -------------------------------------------
 #if defined(XO_CONTEXT_CUDA)
     // CUDA compiler may not have <stdint.h>, so define the types if needed.
-    #ifdef __CUDACC_RTC__
-        // NVRTC (CuPy RawModule default) can’t see <stdint.h>, so detect it via __CUDACC_RTC__
+    #if defined(__CUDACC_RTC__) || defined(__HIPCC_RTC__)
+        // NVRTC and HIPRTC (CuPy RawModule default) can’t see <stdint.h> 
+        // We detect via __CUDACC_RTC__ (Nvidia) or __HIPCC_RTC__ (ROCm)
         typedef signed char        int8_t;
         typedef short              int16_t;
         typedef int                int32_t;
@@ -111,6 +112,14 @@ DEF_ATOMIC_ADD(double  , f64)
         typedef unsigned short     uint16_t;
         typedef unsigned int       uint32_t;
         typedef unsigned long long uint64_t;
+    #elif defined(__HIPCC__) && !defined(__HIPCC_RTC__)
+        // ROCm-HIPCC compiler appears to have definitions for 64-bit int types
+        typedef signed char        int8_t;
+        typedef short              int16_t;
+        typedef int                int32_t;
+        typedef unsigned char      uint8_t;
+        typedef unsigned short     uint16_t;
+        typedef unsigned int       uint32_t;
     #else
         // Alternatively, NVCC path is fine with host headers
         #include <stdint.h>
diff --git a/xobjects/scalar.py b/xobjects/scalar.py
index a67c402..ee6f4e0 100644
--- a/xobjects/scalar.py
+++ b/xobjects/scalar.py
@@ -14,7 +14,6 @@
 import logging
 from .typeutils import Info
 
-
 log = logging.getLogger(__name__)
 
 
diff --git a/xobjects/struct.py b/xobjects/struct.py
index 7a1a98c..64b7482 100644
--- a/xobjects/struct.py
+++ b/xobjects/struct.py
@@ -45,6 +45,7 @@
 
 
 """
+
 import logging
 from typing import Callable, Optional