diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index b04fb15..42e277a 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -7,4 +7,4 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: psf/black@stable + - uses: psf/black@26.1.0 diff --git a/examples/ex_unionref_method.py b/examples/ex_unionref_method.py index 8eac05b..05d71cb 100644 --- a/examples/ex_unionref_method.py +++ b/examples/ex_unionref_method.py @@ -10,30 +10,26 @@ class Triangle(xo.Struct): b = xo.Float64 h = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double Triangle_compute_area(Triangle tr, double scale){ double b = Triangle_get_b(tr); double h = Triangle_get_h(tr); return 0.5*b*h*scale; } - """ - ] + """] class Square(xo.Struct): a = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double Square_compute_area(Square sq, double scale){ double a = Square_get_a(sq); return a*a*scale; } - """ - ] + """] class Base(xo.UnionRef): @@ -52,8 +48,7 @@ class Prism(xo.Struct): height = xo.Float64 volume = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpukern*/ void Prism_compute_volume(Prism pr){ Base base = Prism_getp_base(pr); @@ -61,8 +56,7 @@ class Prism(xo.Struct): double base_area = Base_compute_area(base, 3.); Prism_set_volume(pr, base_area*height); } - """ - ] + """] context = xo.ContextCpu() diff --git a/examples/kernel_basics/kernel_cffi.py b/examples/kernel_basics/kernel_cffi.py index 8febef6..956a415 100644 --- a/examples/kernel_basics/kernel_cffi.py +++ b/examples/kernel_basics/kernel_cffi.py @@ -23,12 +23,10 @@ } """ -ffi_interface.cdef( - """ +ffi_interface.cdef(""" void mymul(int n, double* x1, double* x2, - double* y);""" -) + double* y);""") ffi_interface.set_source( diff --git a/examples/sixtracklib.py b/examples/sixtracklib.py index 2389e27..c9c690c 100644 --- a/examples/sixtracklib.py +++ b/examples/sixtracklib.py @@ -24,7 +24,6 @@ array Elements Element : """ - import xobject as xo diff --git a/tests/notest_capi.py b/tests/notest_capi.py index cb30ba6..37b4f49 100644 --- a/tests/notest_capi.py +++ b/tests/notest_capi.py @@ -56,21 +56,16 @@ def test_gen_get(): path = [Multipole.order] source, _ = capi.gen_method_get(Multipole, path, default_conf) - assert ( - source - == """\ + assert source == """\ /*gpufun*/ int8_t Multipole_get_order(const Multipole/*restrict*/ obj){ int64_t offset=0; offset+=8; return *((/*gpuglmem*/int8_t*) obj+offset); }""" - ) path = [Multipole.field, Field_N, Field.skew] source, _ = capi.gen_method_get(Multipole, path, default_conf) - assert ( - source - == """\ + assert source == """\ /*gpufun*/ double Multipole_get_field_skew(const Multipole/*restrict*/ obj, int64_t i0){ int64_t offset=0; offset+=32; @@ -78,7 +73,6 @@ def test_gen_get(): offset+=8; return *(/*gpuglmem*/double*)((/*gpuglmem*/char*) obj+offset); }""" - ) def test_gen_set(): @@ -86,15 +80,12 @@ def test_gen_set(): path = [Multipole.order] source, _ = capi.gen_method_set(Multipole, path, default_conf) - assert ( - source - == """\ + assert source == """\ /*gpufun*/ void Multipole_set_order(Multipole/*restrict*/ obj, int8_t value){ int64_t offset=0; offset+=8; *((/*gpuglmem*/int8_t*) obj+offset)=value; }""" - ) def test_gen_c_api(): diff --git a/tests/test_capi.py b/tests/test_capi.py index 096750d..72f16c1 100644 --- a/tests/test_capi.py +++ b/tests/test_capi.py @@ -179,9 +179,7 @@ def test_array_get_shape(test_context, array_type): *out_nd = ARRAY_TYPE_nd(arr); ARRAY_TYPE_shape(arr, out_shape); } - """.replace( - "ARRAY_TYPE", array_type.__name__ - ) + """.replace("ARRAY_TYPE", array_type.__name__) kernels = { "get_nd_and_shape": xo.Kernel( diff --git a/tests/test_common.py b/tests/test_common.py index c1a741c..0bbca21 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -87,8 +87,7 @@ def test_atomic(overload, ctype, test_context): class TestAtomic(xo.Struct): val = ctype - _extra_c_sources = [ - f""" + _extra_c_sources = [f""" #include "xobjects/headers/common.h" #include "xobjects/headers/atomicadd.h" @@ -101,8 +100,7 @@ class TestAtomic(xo.Struct): retvals[ii] = ret; END_VECTORIZE; }} - """ - ] + """] kernels = { "run_atomic_test": xo.Kernel( diff --git a/tests/test_kernel.py b/tests/test_kernel.py index 6f47413..e8547bc 100644 --- a/tests/test_kernel.py +++ b/tests/test_kernel.py @@ -216,15 +216,13 @@ class TestClass(xo.HybridClass): "x": xo.Float64, "y": xo.Float64, } - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double myfun(TestClassData tc){ double x = TestClassData_get_x(tc); double y = TestClassData_get_y(tc); return x * y; } - """ - ] + """] _kernels = { "myfun": xo.Kernel( args=[ diff --git a/tests/test_ref.py b/tests/test_ref.py index e36adfa..538f3e3 100644 --- a/tests/test_ref.py +++ b/tests/test_ref.py @@ -220,29 +220,25 @@ class Triangle(xo.Struct): b = xo.Float64 h = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double Triangle_compute_area(Triangle tr, double scale){ double b = Triangle_get_b(tr); double h = Triangle_get_h(tr); return 0.5*b*h*scale; } - """ - ] + """] class Square(xo.Struct): a = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double Square_compute_area(Square sq, double scale){ double a = Square_get_a(sq); return a*a*scale; } - """ - ] + """] class Base(xo.UnionRef): _reftypes = (Triangle, Square) @@ -259,8 +255,7 @@ class Prism(xo.Struct): height = xo.Float64 volume = xo.Float64 - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpukern*/ void Prism_compute_volume(Prism pr){ Base base = Prism_getp_base(pr); @@ -269,8 +264,7 @@ class Prism(xo.Struct): printf("base_area = %e", base_area); Prism_set_volume(pr, base_area*height); } - """ - ] + """] test_context.add_kernels( kernels={ diff --git a/tests/test_shared_memory.py b/tests/test_shared_memory.py index 31156d6..7a62675 100644 --- a/tests/test_shared_memory.py +++ b/tests/test_shared_memory.py @@ -35,8 +35,7 @@ def test_shared_memory(): class TestElement(xo.HybridClass): _xofields = {} - _extra_c_sources = [ - """ + _extra_c_sources = [""" __global__ void test_shared_memory(const double* input_arr, double* result, const int n) { // simple kernel to test shared memory // reduction with an array of 4 doubles using 2 blocks each 2 threads @@ -47,7 +46,7 @@ class TestElement(xo.HybridClass): unsigned int gid = blockIdx.x*blockDim.x + threadIdx.x; // global thread ID: 0,1,2,3 // init shared memory with chunk of input array - extern __shared__ double sdata[2]; + extern __shared__ double sdata[]; sdata[tid] = input_arr[gid]; __syncthreads(); @@ -59,8 +58,7 @@ class TestElement(xo.HybridClass): atomicAdd(&result[tid], sdata[tid]); } } - """ - ] + """] _kernels = _test_shared_memory_kernels def __init__( diff --git a/tests/test_struct.py b/tests/test_struct.py index 5f62ecb..19f763a 100644 --- a/tests/test_struct.py +++ b/tests/test_struct.py @@ -222,8 +222,7 @@ class MyStruct(xo.Struct): var_mult_3 = xo.Float64[:] var_mult_4 = xo.Float64[:] - _extra_c_sources = [ - r""" + _extra_c_sources = [r""" double mul(MyStruct stru) { int32_t n = MyStruct_get_n(stru); double* var_mult_1 = MyStruct_getp1_var_mult_1(stru, 0); @@ -258,8 +257,7 @@ class MyStruct(xo.Struct): y+= var_mult_1[tid] * var_mult_2[tid] * var_mult_3[tid] * var_mult_4[tid]; } return y; - }""" - ] + }"""] kernel_descriptions = { "mul": xo.Kernel( @@ -316,15 +314,13 @@ class TestClass(xo.HybridClass): "x": xo.Float64, "y": xo.Float64, } - _extra_c_sources = [ - """ + _extra_c_sources = [""" /*gpufun*/ double myfun(TestClassData tc){ double x = TestClassData_get_x(tc); double y = TestClassData_get_y(tc); return x * y; } - """ - ] + """] _kernels = { "myfun": xo.Kernel( args=[ diff --git a/xobjects/capi.py b/xobjects/capi.py index ec09ea6..af07f9b 100644 --- a/xobjects/capi.py +++ b/xobjects/capi.py @@ -525,14 +525,12 @@ def gen_method_switch(cls, path, conf, method): for arg in kernel.args[1:]: targs.append(f"{arg.name}") targs = ",".join(targs) - lst.append( - f"""\ + lst.append(f"""\ #ifndef {refname.upper()}_SKIP_{atname.upper()} case {refname}_{atname}_t: return {atname}_{method.c_name}({targs}); break; - #endif""" - ) + #endif""") lst.append(" }") lst.append(f" return{'' if method.ret is None else ' 0'};") lst.append("}") diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py index 3838376..acf7686 100644 --- a/xobjects/context_cupy.py +++ b/xobjects/context_cupy.py @@ -350,19 +350,24 @@ def __invert__(self): return cupy.ndarray.__invert__(self._as_cupy()) -cudaheader: List[SourceType] = [ - """\ -typedef signed long long int64_t; //only_for_context cuda +cudaheader: List[SourceType] = ["""\ typedef signed int int32_t; //only_for_context cuda typedef signed short int16_t; //only_for_context cuda typedef signed char int8_t; //only_for_context cuda -typedef unsigned long long uint64_t; //only_for_context cuda typedef unsigned int uint32_t; //only_for_context cuda typedef unsigned short uint16_t; //only_for_context cuda typedef unsigned char uint8_t; //only_for_context cuda -""" -] +#if defined(__CUDACC__) || defined(__HIPCC_RTC__) +typedef signed long long int64_t; +typedef unsigned long long uint64_t; +#endif + +#ifndef NULL + #define NULL nullptr +#endif + +"""] def nplike_to_cupy(arr): diff --git a/xobjects/context_pyopencl.py b/xobjects/context_pyopencl.py index c61e21e..13bac43 100644 --- a/xobjects/context_pyopencl.py +++ b/xobjects/context_pyopencl.py @@ -44,8 +44,7 @@ from ._patch_pyopencl_array import _patch_pyopencl_array -openclheader: List[SourceType] = [ - """\ +openclheader: List[SourceType] = ["""\ #ifndef XOBJ_STDINT typedef long int64_t; typedef int int32_t; @@ -59,8 +58,7 @@ #ifndef NULL #define NULL 0L #endif -""" -] +"""] if _enabled: # order of base classes matters as it defines which __setitem__ is used diff --git a/xobjects/headers/atomicadd.h b/xobjects/headers/atomicadd.h index 6782705..ab4f1f3 100644 --- a/xobjects/headers/atomicadd.h +++ b/xobjects/headers/atomicadd.h @@ -101,8 +101,9 @@ DEF_ATOMIC_ADD(double , f64) // ------------------------------------------- #if defined(XO_CONTEXT_CUDA) // CUDA compiler may not have , so define the types if needed. - #ifdef __CUDACC_RTC__ - // NVRTC (CuPy RawModule default) can’t see , so detect it via __CUDACC_RTC__ + #if defined(__CUDACC_RTC__) || defined(__HIPCC_RTC__) + // NVRTC and HIPRTC (CuPy RawModule default) can’t see + // We detect via __CUDACC_RTC__ (Nvidia) or __HIPCC_RTC__ (ROCm) typedef signed char int8_t; typedef short int16_t; typedef int int32_t; @@ -111,6 +112,14 @@ DEF_ATOMIC_ADD(double , f64) typedef unsigned short uint16_t; typedef unsigned int uint32_t; typedef unsigned long long uint64_t; + #elif defined(__HIPCC__) && !defined(__HIPCC_RTC__) + // ROCm-HIPCC compiler appears to have definitions for 64-bit int types + typedef signed char int8_t; + typedef short int16_t; + typedef int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; #else // Alternatively, NVCC path is fine with host headers #include diff --git a/xobjects/scalar.py b/xobjects/scalar.py index a67c402..ee6f4e0 100644 --- a/xobjects/scalar.py +++ b/xobjects/scalar.py @@ -14,7 +14,6 @@ import logging from .typeutils import Info - log = logging.getLogger(__name__) diff --git a/xobjects/struct.py b/xobjects/struct.py index 7a1a98c..64b7482 100644 --- a/xobjects/struct.py +++ b/xobjects/struct.py @@ -45,6 +45,7 @@ """ + import logging from typing import Callable, Optional