From 22e2b29750d5fdf6e2236e2ce24958838773dbd2 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Wed, 10 Dec 2025 01:10:11 +0100 Subject: [PATCH 1/6] Add support for ROCm in ContextCupy --- tests/test_shared_memory.py | 2 +- xobjects/context_cupy.py | 7 +++++-- xobjects/headers/atomicadd.h | 11 ++++++++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/test_shared_memory.py b/tests/test_shared_memory.py index 31156d6..b010cdd 100644 --- a/tests/test_shared_memory.py +++ b/tests/test_shared_memory.py @@ -47,7 +47,7 @@ class TestElement(xo.HybridClass): unsigned int gid = blockIdx.x*blockDim.x + threadIdx.x; // global thread ID: 0,1,2,3 // init shared memory with chunk of input array - extern __shared__ double sdata[2]; + extern __shared__ double sdata[]; sdata[tid] = input_arr[gid]; __syncthreads(); diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py index 3838376..4de785b 100644 --- a/xobjects/context_cupy.py +++ b/xobjects/context_cupy.py @@ -352,15 +352,18 @@ def __invert__(self): cudaheader: List[SourceType] = [ """\ -typedef signed long long int64_t; //only_for_context cuda typedef signed int int32_t; //only_for_context cuda typedef signed short int16_t; //only_for_context cuda typedef signed char int8_t; //only_for_context cuda -typedef unsigned long long uint64_t; //only_for_context cuda typedef unsigned int uint32_t; //only_for_context cuda typedef unsigned short uint16_t; //only_for_context cuda typedef unsigned char uint8_t; //only_for_context cuda +#if defined(__CUDACC__) && !defined(__HIPCC__) +typedef signed long long int64_t; +typedef unsigned long long uint64_t; +#endif + """ ] diff --git a/xobjects/headers/atomicadd.h b/xobjects/headers/atomicadd.h index 6782705..6e40d4a 100644 --- a/xobjects/headers/atomicadd.h +++ b/xobjects/headers/atomicadd.h @@ -101,7 +101,7 @@ DEF_ATOMIC_ADD(double , f64) // ------------------------------------------- #if defined(XO_CONTEXT_CUDA) // CUDA compiler may not have , so define the types if needed. - #ifdef __CUDACC_RTC__ + #if defined(__CUDACC_RTC__) && !defined(__HIPCC__) // NVRTC (CuPy RawModule default) can’t see , so detect it via __CUDACC_RTC__ typedef signed char int8_t; typedef short int16_t; @@ -111,6 +111,15 @@ DEF_ATOMIC_ADD(double , f64) typedef unsigned short uint16_t; typedef unsigned int uint32_t; typedef unsigned long long uint64_t; + #elif defined(__HIPCC__) && !defined(__CUDACC_RTC__) + typedef signed char int8_t; + typedef short int16_t; + typedef int int32_t; + // typedef long long int64_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + // typedef unsigned long long uint64_t; #else // Alternatively, NVCC path is fine with host headers #include From 4f87acbb33a0cd372737b957608c313f74a7e439 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Wed, 10 Dec 2025 09:55:26 +0100 Subject: [PATCH 2/6] Add definition for NULL, missing for ROCm --- xobjects/context_cupy.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py index 4de785b..5ba68e6 100644 --- a/xobjects/context_cupy.py +++ b/xobjects/context_cupy.py @@ -364,6 +364,10 @@ def __invert__(self): typedef unsigned long long uint64_t; #endif +#ifndef NULL + #define NULL nullptr +#endif + """ ] From 19258bdf406e47b157c2d0c5a8f4d7e718168e08 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Mon, 15 Dec 2025 14:47:58 +0100 Subject: [PATCH 3/6] Clean up and add comments --- xobjects/headers/atomicadd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xobjects/headers/atomicadd.h b/xobjects/headers/atomicadd.h index 6e40d4a..89e4f83 100644 --- a/xobjects/headers/atomicadd.h +++ b/xobjects/headers/atomicadd.h @@ -112,14 +112,13 @@ DEF_ATOMIC_ADD(double , f64) typedef unsigned int uint32_t; typedef unsigned long long uint64_t; #elif defined(__HIPCC__) && !defined(__CUDACC_RTC__) + // ROCm appears to have definitions for 64-bit int types typedef signed char int8_t; typedef short int16_t; typedef int int32_t; - // typedef long long int64_t; typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; - // typedef unsigned long long uint64_t; #else // Alternatively, NVCC path is fine with host headers #include From 947e0f267a3ef374523793fa68fa7de04fe3e10f Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Sat, 17 Jan 2026 00:33:55 +0100 Subject: [PATCH 4/6] Fix headers on HIPRTC --- xobjects/context_cupy.py | 2 +- xobjects/headers/atomicadd.h | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py index 5ba68e6..5bc9dc0 100644 --- a/xobjects/context_cupy.py +++ b/xobjects/context_cupy.py @@ -359,7 +359,7 @@ def __invert__(self): typedef unsigned short uint16_t; //only_for_context cuda typedef unsigned char uint8_t; //only_for_context cuda -#if defined(__CUDACC__) && !defined(__HIPCC__) +#if defined(__CUDACC__) || defined(__HIPCC_RTC__) typedef signed long long int64_t; typedef unsigned long long uint64_t; #endif diff --git a/xobjects/headers/atomicadd.h b/xobjects/headers/atomicadd.h index 89e4f83..ab4f1f3 100644 --- a/xobjects/headers/atomicadd.h +++ b/xobjects/headers/atomicadd.h @@ -101,8 +101,9 @@ DEF_ATOMIC_ADD(double , f64) // ------------------------------------------- #if defined(XO_CONTEXT_CUDA) // CUDA compiler may not have , so define the types if needed. - #if defined(__CUDACC_RTC__) && !defined(__HIPCC__) - // NVRTC (CuPy RawModule default) can’t see , so detect it via __CUDACC_RTC__ + #if defined(__CUDACC_RTC__) || defined(__HIPCC_RTC__) + // NVRTC and HIPRTC (CuPy RawModule default) can’t see + // We detect via __CUDACC_RTC__ (Nvidia) or __HIPCC_RTC__ (ROCm) typedef signed char int8_t; typedef short int16_t; typedef int int32_t; @@ -111,8 +112,8 @@ DEF_ATOMIC_ADD(double , f64) typedef unsigned short uint16_t; typedef unsigned int uint32_t; typedef unsigned long long uint64_t; - #elif defined(__HIPCC__) && !defined(__CUDACC_RTC__) - // ROCm appears to have definitions for 64-bit int types + #elif defined(__HIPCC__) && !defined(__HIPCC_RTC__) + // ROCm-HIPCC compiler appears to have definitions for 64-bit int types typedef signed char int8_t; typedef short int16_t; typedef int int32_t; From ebc7fa47b06d8f90d5602c175a180b194e2a02a8 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Mon, 19 Jan 2026 10:02:38 +0100 Subject: [PATCH 5/6] Fix formatting for black in CI --- examples/ex_unionref_method.py | 18 ++++++------------ examples/kernel_basics/kernel_cffi.py | 6 ++---- examples/sixtracklib.py | 1 - tests/notest_capi.py | 15 +++------------ tests/test_capi.py | 4 +--- tests/test_common.py | 6 ++---- tests/test_kernel.py | 6 ++---- tests/test_ref.py | 18 ++++++------------ tests/test_shared_memory.py | 6 ++---- tests/test_struct.py | 12 ++++-------- xobjects/capi.py | 6 ++---- xobjects/context_cupy.py | 6 ++---- xobjects/context_pyopencl.py | 6 ++---- xobjects/scalar.py | 1 - xobjects/struct.py | 1 + 15 files changed, 35 insertions(+), 77 deletions(-) diff --git a/examples/ex_unionref_method.py b/examples/ex_unionref_method.py index 8eac05b..05d71cb 100644 --- a/examples/ex_unionref_method.py +++ b/examples/ex_unionref_method.py @@ -10,30 +10,26 @@ class Triangle(xo.Struct): b = xo.Float64 h = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double Triangle_compute_area(Triangle tr, double scale){ double b = Triangle_get_b(tr); double h = Triangle_get_h(tr); return 0.5*b*h*scale; } - """ - ] + """] class Square(xo.Struct): a = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double Square_compute_area(Square sq, double scale){ double a = Square_get_a(sq); return a*a*scale; } - """ - ] + """] class Base(xo.UnionRef): @@ -52,8 +48,7 @@ class Prism(xo.Struct): height = xo.Float64 volume = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpukern*/ void Prism_compute_volume(Prism pr){ Base base = Prism_getp_base(pr); @@ -61,8 +56,7 @@ class Prism(xo.Struct): double base_area = Base_compute_area(base, 3.); Prism_set_volume(pr, base_area*height); } - """ - ] + """] context = xo.ContextCpu() diff --git a/examples/kernel_basics/kernel_cffi.py b/examples/kernel_basics/kernel_cffi.py index 8febef6..956a415 100644 --- a/examples/kernel_basics/kernel_cffi.py +++ b/examples/kernel_basics/kernel_cffi.py @@ -23,12 +23,10 @@ } """ -ffi_interface.cdef( - """ +ffi_interface.cdef(""" void mymul(int n, double* x1, double* x2, - double* y);""" -) + double* y);""") ffi_interface.set_source( diff --git a/examples/sixtracklib.py b/examples/sixtracklib.py index 2389e27..c9c690c 100644 --- a/examples/sixtracklib.py +++ b/examples/sixtracklib.py @@ -24,7 +24,6 @@ array Elements Element : """ - import xobject as xo diff --git a/tests/notest_capi.py b/tests/notest_capi.py index cb30ba6..37b4f49 100644 --- a/tests/notest_capi.py +++ b/tests/notest_capi.py @@ -56,21 +56,16 @@ def test_gen_get(): path = [Multipole.order] source, _ = capi.gen_method_get(Multipole, path, default_conf) - assert ( - source - == """\ + assert source == """\ /*gpufun*/ int8_t Multipole_get_order(const Multipole/*restrict*/ obj){ int64_t offset=0; offset+=8; return *((/*gpuglmem*/int8_t*) obj+offset); }""" - ) path = [Multipole.field, Field_N, Field.skew] source, _ = capi.gen_method_get(Multipole, path, default_conf) - assert ( - source - == """\ + assert source == """\ /*gpufun*/ double Multipole_get_field_skew(const Multipole/*restrict*/ obj, int64_t i0){ int64_t offset=0; offset+=32; @@ -78,7 +73,6 @@ def test_gen_get(): offset+=8; return *(/*gpuglmem*/double*)((/*gpuglmem*/char*) obj+offset); }""" - ) def test_gen_set(): @@ -86,15 +80,12 @@ def test_gen_set(): path = [Multipole.order] source, _ = capi.gen_method_set(Multipole, path, default_conf) - assert ( - source - == """\ + assert source == """\ /*gpufun*/ void Multipole_set_order(Multipole/*restrict*/ obj, int8_t value){ int64_t offset=0; offset+=8; *((/*gpuglmem*/int8_t*) obj+offset)=value; }""" - ) def test_gen_c_api(): diff --git a/tests/test_capi.py b/tests/test_capi.py index 096750d..72f16c1 100644 --- a/tests/test_capi.py +++ b/tests/test_capi.py @@ -179,9 +179,7 @@ def test_array_get_shape(test_context, array_type): *out_nd = ARRAY_TYPE_nd(arr); ARRAY_TYPE_shape(arr, out_shape); } - """.replace( - "ARRAY_TYPE", array_type.__name__ - ) + """.replace("ARRAY_TYPE", array_type.__name__) kernels = { "get_nd_and_shape": xo.Kernel( diff --git a/tests/test_common.py b/tests/test_common.py index c1a741c..0bbca21 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -87,8 +87,7 @@ def test_atomic(overload, ctype, test_context): class TestAtomic(xo.Struct): val = ctype - _extra_c_sources = [ - f""" + _extra_c_sources = [f""" #include "xobjects/headers/common.h" #include "xobjects/headers/atomicadd.h" @@ -101,8 +100,7 @@ class TestAtomic(xo.Struct): retvals[ii] = ret; END_VECTORIZE; }} - """ - ] + """] kernels = { "run_atomic_test": xo.Kernel( diff --git a/tests/test_kernel.py b/tests/test_kernel.py index 6f47413..e8547bc 100644 --- a/tests/test_kernel.py +++ b/tests/test_kernel.py @@ -216,15 +216,13 @@ class TestClass(xo.HybridClass): "x": xo.Float64, "y": xo.Float64, } - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double myfun(TestClassData tc){ double x = TestClassData_get_x(tc); double y = TestClassData_get_y(tc); return x * y; } - """ - ] + """] _kernels = { "myfun": xo.Kernel( args=[ diff --git a/tests/test_ref.py b/tests/test_ref.py index e36adfa..538f3e3 100644 --- a/tests/test_ref.py +++ b/tests/test_ref.py @@ -220,29 +220,25 @@ class Triangle(xo.Struct): b = xo.Float64 h = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double Triangle_compute_area(Triangle tr, double scale){ double b = Triangle_get_b(tr); double h = Triangle_get_h(tr); return 0.5*b*h*scale; } - """ - ] + """] class Square(xo.Struct): a = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double Square_compute_area(Square sq, double scale){ double a = Square_get_a(sq); return a*a*scale; } - """ - ] + """] class Base(xo.UnionRef): _reftypes = (Triangle, Square) @@ -259,8 +255,7 @@ class Prism(xo.Struct): height = xo.Float64 volume = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpukern*/ void Prism_compute_volume(Prism pr){ Base base = Prism_getp_base(pr); @@ -269,8 +264,7 @@ class Prism(xo.Struct): printf("base_area = %e", base_area); Prism_set_volume(pr, base_area*height); } - """ - ] + """] test_context.add_kernels( kernels={ diff --git a/tests/test_shared_memory.py b/tests/test_shared_memory.py index b010cdd..7a62675 100644 --- a/tests/test_shared_memory.py +++ b/tests/test_shared_memory.py @@ -35,8 +35,7 @@ def test_shared_memory(): class TestElement(xo.HybridClass): _xofields = {} - _extra_c_sources = [ - """ + _extra_c_sources = [""" __global__ void test_shared_memory(const double* input_arr, double* result, const int n) { // simple kernel to test shared memory // reduction with an array of 4 doubles using 2 blocks each 2 threads @@ -59,8 +58,7 @@ class TestElement(xo.HybridClass): atomicAdd(&result[tid], sdata[tid]); } } - """ - ] + """] _kernels = _test_shared_memory_kernels def __init__( diff --git a/tests/test_struct.py b/tests/test_struct.py index 5f62ecb..19f763a 100644 --- a/tests/test_struct.py +++ b/tests/test_struct.py @@ -222,8 +222,7 @@ class MyStruct(xo.Struct): var_mult_3 = xo.Float64[:] var_mult_4 = xo.Float64[:] - _extra_c_sources = [ - r""" + _extra_c_sources = [r""" double mul(MyStruct stru) { int32_t n = MyStruct_get_n(stru); double* var_mult_1 = MyStruct_getp1_var_mult_1(stru, 0); @@ -258,8 +257,7 @@ class MyStruct(xo.Struct): y+= var_mult_1[tid] * var_mult_2[tid] * var_mult_3[tid] * var_mult_4[tid]; } return y; - }""" - ] + }"""] kernel_descriptions = { "mul": xo.Kernel( @@ -316,15 +314,13 @@ class TestClass(xo.HybridClass): "x": xo.Float64, "y": xo.Float64, } - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double myfun(TestClassData tc){ double x = TestClassData_get_x(tc); double y = TestClassData_get_y(tc); return x * y; } - """ - ] + """] _kernels = { "myfun": xo.Kernel( args=[ diff --git a/xobjects/capi.py b/xobjects/capi.py index ec09ea6..af07f9b 100644 --- a/xobjects/capi.py +++ b/xobjects/capi.py @@ -525,14 +525,12 @@ def gen_method_switch(cls, path, conf, method): for arg in kernel.args[1:]: targs.append(f"{arg.name}") targs = ",".join(targs) - lst.append( - f"""\ + lst.append(f"""\ #ifndef {refname.upper()}_SKIP_{atname.upper()} case {refname}_{atname}_t: return {atname}_{method.c_name}({targs}); break; - #endif""" - ) + #endif""") lst.append(" }") lst.append(f" return{'' if method.ret is None else ' 0'};") lst.append("}") diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py index 5bc9dc0..acf7686 100644 --- a/xobjects/context_cupy.py +++ b/xobjects/context_cupy.py @@ -350,8 +350,7 @@ def __invert__(self): return cupy.ndarray.__invert__(self._as_cupy()) -cudaheader: List[SourceType] = [ - """\ +cudaheader: List[SourceType] = ["""\ typedef signed int int32_t; //only_for_context cuda typedef signed short int16_t; //only_for_context cuda typedef signed char int8_t; //only_for_context cuda @@ -368,8 +367,7 @@ def __invert__(self): #define NULL nullptr #endif -""" -] +"""] def nplike_to_cupy(arr): diff --git a/xobjects/context_pyopencl.py b/xobjects/context_pyopencl.py index c61e21e..13bac43 100644 --- a/xobjects/context_pyopencl.py +++ b/xobjects/context_pyopencl.py @@ -44,8 +44,7 @@ from ._patch_pyopencl_array import _patch_pyopencl_array -openclheader: List[SourceType] = [ - """\ +openclheader: List[SourceType] = ["""\ #ifndef XOBJ_STDINT typedef long int64_t; typedef int int32_t; @@ -59,8 +58,7 @@ #ifndef NULL #define NULL 0L #endif -""" -] +"""] if _enabled: # order of base classes matters as it defines which __setitem__ is used diff --git a/xobjects/scalar.py b/xobjects/scalar.py index a67c402..ee6f4e0 100644 --- a/xobjects/scalar.py +++ b/xobjects/scalar.py @@ -14,7 +14,6 @@ import logging from .typeutils import Info - log = logging.getLogger(__name__) diff --git a/xobjects/struct.py b/xobjects/struct.py index 7a1a98c..64b7482 100644 --- a/xobjects/struct.py +++ b/xobjects/struct.py @@ -45,6 +45,7 @@ """ + import logging from typing import Callable, Optional From 901dde368d77ab84a6ecb5870d3d0b26805b2e83 Mon Sep 17 00:00:00 2001 From: Evangelos Katralis Date: Mon, 19 Jan 2026 10:03:54 +0100 Subject: [PATCH 6/6] Pin black version in CI --- .github/workflows/black.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index b04fb15..42e277a 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -7,4 +7,4 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: psf/black@stable + - uses: psf/black@26.1.0