From 019e27c0e25174a2766c924af24f651537631b8e Mon Sep 17 00:00:00 2001 From: Akira Naruse Date: Wed, 24 Nov 2021 23:18:13 +0900 Subject: [PATCH 01/57] Fixed cupy_thrust.cu for CUDA 11.6 --- cupy/cuda/cupy_thrust.cu | 108 +++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/cupy/cuda/cupy_thrust.cu b/cupy/cuda/cupy_thrust.cu index 70307cafdd9..9980f5b0bd9 100644 --- a/cupy/cuda/cupy_thrust.cu +++ b/cupy/cuda/cupy_thrust.cu @@ -23,15 +23,15 @@ #include "cupy_thrust.h" -using namespace thrust; - - #if CUPY_USE_HIP typedef hipStream_t cudaStream_t; namespace cuda { using thrust::hip::par; } - +#else // #if CUPY_USE_HIP +namespace cuda { + using thrust::cuda::par; +} #endif // #if CUPY_USE_HIP @@ -77,13 +77,13 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2)) THRUST_OPTIONAL_CPP11_CONSTEXPR #endif -bool _tuple_less(const tuple& lhs, - const tuple& rhs) { +bool _tuple_less(const thrust::tuple& lhs, + const thrust::tuple& rhs) { const size_t& lhs_k = lhs.template get<0>(); const size_t& rhs_k = rhs.template get<0>(); const T& lhs_v = lhs.template get<1>(); const T& rhs_v = rhs.template get<1>(); - const less _less; + const thrust::less _less; // tuple's comparison rule: compare the 1st member, then 2nd, then 3rd, ..., // which should be respected @@ -152,7 +152,7 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2) || HIP_VERSION >= 402) THRUST_OPTIONAL_CPP11_CONSTEXPR_LESS #endif -bool less>::operator() ( +bool thrust::less>::operator() ( const complex& lhs, const complex& rhs) const { return _cmp_less>(lhs, rhs); @@ -164,7 +164,7 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2) || HIP_VERSION >= 402) THRUST_OPTIONAL_CPP11_CONSTEXPR_LESS #endif -bool less>::operator() ( +bool thrust::less>::operator() ( const complex& lhs, const complex& rhs) const { return _cmp_less>(lhs, rhs); @@ -176,8 +176,8 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2) || HIP_VERSION >= 402) THRUST_OPTIONAL_CPP11_CONSTEXPR_LESS #endif -bool less< tuple> >::operator() ( - const tuple>& lhs, const tuple>& rhs) const { +bool thrust::less< thrust::tuple> >::operator() ( + const thrust::tuple>& lhs, const thrust::tuple>& rhs) const { return _tuple_less>(lhs, rhs); } @@ -188,8 +188,8 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2) || HIP_VERSION >= 402) THRUST_OPTIONAL_CPP11_CONSTEXPR_LESS #endif -bool less< tuple> >::operator() ( - const tuple>& lhs, const tuple>& rhs) const { +bool thrust::less< thrust::tuple> >::operator() ( + const thrust::tuple>& lhs, const thrust::tuple>& rhs) const { return _tuple_less>(lhs, rhs); } @@ -228,7 +228,7 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2) || HIP_VERSION >= 402) THRUST_OPTIONAL_CPP11_CONSTEXPR_LESS #endif -bool less::operator() ( +bool thrust::less::operator() ( const float& lhs, const float& rhs) const { return _real_less(lhs, rhs); @@ -240,7 +240,7 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2) || HIP_VERSION >= 402) THRUST_OPTIONAL_CPP11_CONSTEXPR_LESS #endif -bool less::operator() ( +bool thrust::less::operator() ( const double& lhs, const double& rhs) const { return _real_less(lhs, rhs); @@ -252,8 +252,8 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2) || HIP_VERSION >= 402) THRUST_OPTIONAL_CPP11_CONSTEXPR_LESS #endif -bool less< tuple >::operator() ( - const tuple& lhs, const tuple& rhs) const { +bool thrust::less< thrust::tuple >::operator() ( + const thrust::tuple& lhs, const thrust::tuple& rhs) const { return _tuple_less(lhs, rhs); } @@ -264,8 +264,8 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2) || HIP_VERSION >= 402) THRUST_OPTIONAL_CPP11_CONSTEXPR_LESS #endif -bool less< tuple >::operator() ( - const tuple& lhs, const tuple& rhs) const { +bool thrust::less< thrust::tuple >::operator() ( + const thrust::tuple& lhs, const thrust::tuple& rhs) const { return _tuple_less(lhs, rhs); } @@ -292,7 +292,7 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2) || HIP_VERSION >= 402) THRUST_OPTIONAL_CPP11_CONSTEXPR_LESS #endif -bool less<__half>::operator() (const __half& lhs, const __half& rhs) const { +bool thrust::less<__half>::operator() (const __half& lhs, const __half& rhs) const { return _real_less<__half>(lhs, rhs); } @@ -302,8 +302,8 @@ __host__ __device__ __forceinline__ #if (__CUDACC_VER_MAJOR__ >11 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 2) || HIP_VERSION >= 402) THRUST_OPTIONAL_CPP11_CONSTEXPR_LESS #endif -bool less< tuple >::operator() ( - const tuple& lhs, const tuple& rhs) const { +bool thrust::less< thrust::tuple >::operator() ( + const thrust::tuple& lhs, const thrust::tuple& rhs) const { return _tuple_less<__half>(lhs, rhs); } @@ -326,8 +326,8 @@ struct _sort { void* memory) { size_t ndim = shape.size(); ptrdiff_t size; - device_ptr dp_data_first, dp_data_last; - device_ptr dp_keys_first, dp_keys_last; + thrust::device_ptr dp_data_first, dp_data_last; + thrust::device_ptr dp_keys_first, dp_keys_last; cudaStream_t stream_ = (cudaStream_t)stream; cupy_allocator alloc(memory); @@ -337,27 +337,27 @@ struct _sort { size *= shape[i]; } - dp_data_first = device_pointer_cast(static_cast(data_start)); - dp_data_last = device_pointer_cast(static_cast(data_start) + size); + dp_data_first = thrust::device_pointer_cast(static_cast(data_start)); + dp_data_last = thrust::device_pointer_cast(static_cast(data_start) + size); if (ndim == 1) { - stable_sort(cuda::par(alloc).on(stream_), dp_data_first, dp_data_last, less()); + stable_sort(cuda::par(alloc).on(stream_), dp_data_first, dp_data_last, thrust::less()); } else { // Generate key indices. - dp_keys_first = device_pointer_cast(keys_start); - dp_keys_last = device_pointer_cast(keys_start + size); + dp_keys_first = thrust::device_pointer_cast(keys_start); + dp_keys_last = thrust::device_pointer_cast(keys_start + size); transform(cuda::par(alloc).on(stream_), - make_counting_iterator(0), - make_counting_iterator(size), - make_constant_iterator(shape[ndim-1]), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size), + thrust::make_constant_iterator(shape[ndim-1]), dp_keys_first, - divides()); + thrust::divides()); stable_sort( cuda::par(alloc).on(stream_), make_zip_iterator(make_tuple(dp_keys_first, dp_data_first)), make_zip_iterator(make_tuple(dp_keys_last, dp_data_last)), - less< tuple >()); + thrust::less< thrust::tuple >()); } } }; @@ -372,7 +372,7 @@ class elem_less { public: elem_less(const T *data):_data(data) {} __device__ __forceinline__ bool operator()(size_t i, size_t j) const { - return less()(_data[i], _data[j]); + return thrust::less()(_data[i], _data[j]); } private: const T *_data; @@ -385,8 +385,8 @@ struct _lexsort { /* idx_start is the beginning of the output array where the indexes that would sort the data will be placed. The original contents of idx_start will be destroyed. */ - device_ptr dp_first = device_pointer_cast(idx_start); - device_ptr dp_last = device_pointer_cast(idx_start + n); + thrust::device_ptr dp_first = thrust::device_pointer_cast(idx_start); + thrust::device_ptr dp_last = thrust::device_pointer_cast(idx_start + n); cudaStream_t stream_ = (cudaStream_t)stream; cupy_allocator alloc(memory); sequence(cuda::par(alloc).on(stream_), dp_first, dp_last); @@ -422,9 +422,9 @@ struct _argsort { cudaStream_t stream_ = (cudaStream_t)stream; cupy_allocator alloc(memory); - device_ptr dp_idx_first, dp_idx_last; - device_ptr dp_data_first, dp_data_last; - device_ptr dp_keys_first, dp_keys_last; + thrust::device_ptr dp_idx_first, dp_idx_last; + thrust::device_ptr dp_data_first, dp_data_last; + thrust::device_ptr dp_keys_first, dp_keys_last; // Compute the total size of the data array. size = shape[0]; @@ -433,18 +433,18 @@ struct _argsort { } // Cast device pointers of data. - dp_data_first = device_pointer_cast(static_cast(data_start)); - dp_data_last = device_pointer_cast(static_cast(data_start) + size); + dp_data_first = thrust::device_pointer_cast(static_cast(data_start)); + dp_data_last = thrust::device_pointer_cast(static_cast(data_start) + size); // Generate an index sequence. - dp_idx_first = device_pointer_cast(static_cast(idx_start)); - dp_idx_last = device_pointer_cast(static_cast(idx_start) + size); + dp_idx_first = thrust::device_pointer_cast(static_cast(idx_start)); + dp_idx_last = thrust::device_pointer_cast(static_cast(idx_start) + size); transform(cuda::par(alloc).on(stream_), - make_counting_iterator(0), - make_counting_iterator(size), - make_constant_iterator(shape[ndim-1]), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size), + thrust::make_constant_iterator(shape[ndim-1]), dp_idx_first, - modulus()); + thrust::modulus()); if (ndim == 1) { // Sort the index sequence by data. @@ -454,14 +454,14 @@ struct _argsort { dp_idx_first); } else { // Generate key indices. - dp_keys_first = device_pointer_cast(static_cast(keys_start)); - dp_keys_last = device_pointer_cast(static_cast(keys_start) + size); + dp_keys_first = thrust::device_pointer_cast(static_cast(keys_start)); + dp_keys_last = thrust::device_pointer_cast(static_cast(keys_start) + size); transform(cuda::par(alloc).on(stream_), - make_counting_iterator(0), - make_counting_iterator(size), - make_constant_iterator(shape[ndim-1]), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size), + thrust::make_constant_iterator(shape[ndim-1]), dp_keys_first, - divides()); + thrust::divides()); stable_sort_by_key( cuda::par(alloc).on(stream_), From 43503bb62ed78d6522b4fbbb12da4a65ecf06ba9 Mon Sep 17 00:00:00 2001 From: hubertlu-tw Date: Tue, 4 Oct 2022 23:03:45 +0000 Subject: [PATCH 02/57] Fix missing rocprim identifier --- cupy/cuda/cupy_thrust.cu | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/cupy/cuda/cupy_thrust.cu b/cupy/cuda/cupy_thrust.cu index 9980f5b0bd9..b703f077d02 100644 --- a/cupy/cuda/cupy_thrust.cu +++ b/cupy/cuda/cupy_thrust.cu @@ -349,7 +349,11 @@ struct _sort { transform(cuda::par(alloc).on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(size), + #ifndef __HIP_PLATFORM_HCC__ thrust::make_constant_iterator(shape[ndim-1]), + #else + rocprim::make_constant_iterator(shape[ndim-1]), + #endif dp_keys_first, thrust::divides()); @@ -442,7 +446,11 @@ struct _argsort { transform(cuda::par(alloc).on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(size), - thrust::make_constant_iterator(shape[ndim-1]), + #ifndef __HIP_PLATFORM_HCC__ + thrust::make_constant_iterator(shape[ndim-1]), + #else + rocprim::make_constant_iterator(shape[ndim-1]), + #endif dp_idx_first, thrust::modulus()); @@ -459,7 +467,11 @@ struct _argsort { transform(cuda::par(alloc).on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(size), + #ifndef __HIP_PLATFORM_HCC__ thrust::make_constant_iterator(shape[ndim-1]), + #else + rocprim::make_constant_iterator(shape[ndim-1]), + #endif dp_keys_first, thrust::divides()); From 2bb6d49f5c47df498a2d7c9b2a8aa3c4e52906ac Mon Sep 17 00:00:00 2001 From: Aswin John Mathews Date: Fri, 14 Oct 2022 18:57:31 +0000 Subject: [PATCH 03/57] Cherry picked "Fix missing ROCPrim identifier in cupy_thrust.cu" --- cupy/cuda/cupy_thrust.cu | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/cupy/cuda/cupy_thrust.cu b/cupy/cuda/cupy_thrust.cu index 9980f5b0bd9..b703f077d02 100644 --- a/cupy/cuda/cupy_thrust.cu +++ b/cupy/cuda/cupy_thrust.cu @@ -349,7 +349,11 @@ struct _sort { transform(cuda::par(alloc).on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(size), + #ifndef __HIP_PLATFORM_HCC__ thrust::make_constant_iterator(shape[ndim-1]), + #else + rocprim::make_constant_iterator(shape[ndim-1]), + #endif dp_keys_first, thrust::divides()); @@ -442,7 +446,11 @@ struct _argsort { transform(cuda::par(alloc).on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(size), - thrust::make_constant_iterator(shape[ndim-1]), + #ifndef __HIP_PLATFORM_HCC__ + thrust::make_constant_iterator(shape[ndim-1]), + #else + rocprim::make_constant_iterator(shape[ndim-1]), + #endif dp_idx_first, thrust::modulus()); @@ -459,7 +467,11 @@ struct _argsort { transform(cuda::par(alloc).on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(size), + #ifndef __HIP_PLATFORM_HCC__ thrust::make_constant_iterator(shape[ndim-1]), + #else + rocprim::make_constant_iterator(shape[ndim-1]), + #endif dp_keys_first, thrust::divides()); From 1e2f8fc8b88c19fc061cf64d7d04944a432bce7c Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Tue, 24 Jan 2023 20:51:27 +0000 Subject: [PATCH 04/57] skipped all multi-gpu tests due to non-support --- tests/cupy_tests/fft_tests/test_cache.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/cupy_tests/fft_tests/test_cache.py b/tests/cupy_tests/fft_tests/test_cache.py index 7c821a61b4f..bf048ce1fa2 100644 --- a/tests/cupy_tests/fft_tests/test_cache.py +++ b/tests/cupy_tests/fft_tests/test_cache.py @@ -146,6 +146,8 @@ def test_LRU_cache4(self): cache[next(iterator)[0]] @testing.multi_gpu(2) + @pytest.mark.skipif(runtime.is_hip, + reason="hipFFT doesn't support multi-GPU") def test_LRU_cache5(self): # test if the LRU cache is thread-local @@ -198,6 +200,8 @@ def thread_init_caches(gpus, queue): assert stdout.count('uninitialized') == n_devices - 2 @testing.multi_gpu(2) + @pytest.mark.skipif(runtime.is_hip, + reason="hipFFT doesn't support multi-GPU") def test_LRU_cache6(self): # test if each device has a separate cache cache0 = self.caches[0] From 3b27b08691bdd4ef4a27e1e8cb59c10ab0b66e57 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Tue, 24 Jan 2023 21:17:35 +0000 Subject: [PATCH 05/57] skipped multi-gpu nccl tests due to non-multigpu hang --- tests/cupy_tests/cuda_tests/test_nccl.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/cupy_tests/cuda_tests/test_nccl.py b/tests/cupy_tests/cuda_tests/test_nccl.py index 12f66c15180..3a7e0b826e2 100644 --- a/tests/cupy_tests/cuda_tests/test_nccl.py +++ b/tests/cupy_tests/cuda_tests/test_nccl.py @@ -1,11 +1,12 @@ import pickle import unittest +import pytest import cupy from cupy import cuda from cupy.cuda import nccl from cupy import testing - +from cupy.cuda import runtime nccl_available = nccl.available @@ -70,6 +71,8 @@ def test_comm_size(self): @testing.multi_gpu(2) @unittest.skipUnless(nccl_version >= 2700, 'Using old NCCL') + @pytest.mark.skipif(runtime.is_hip, + reason="rccl doesn't support multi-GPU") def test_send_recv(self): devs = [0, 1] comms = nccl.NcclCommunicator.initAll(devs) From 57f4c6d70aebc85e7d714ed12d1da0b80bfe9086 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Mon, 20 Feb 2023 20:37:43 +0000 Subject: [PATCH 06/57] fix merge issue --- cupy/cuda/cupy_thrust.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cupy/cuda/cupy_thrust.cu b/cupy/cuda/cupy_thrust.cu index 3834a7def47..7be3a6c38b2 100644 --- a/cupy/cuda/cupy_thrust.cu +++ b/cupy/cuda/cupy_thrust.cu @@ -355,7 +355,6 @@ struct _sort { #else thrust::make_counting_iterator(0), thrust::make_counting_iterator(size), - #ifndef __HIP_PLATFORM_HCC__ thrust::make_constant_iterator(shape[ndim-1]), #endif dp_keys_first, From a83638afbde72346bbca1f2dd7664901131b9e3e Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Thu, 16 Mar 2023 13:24:24 +0000 Subject: [PATCH 07/57] xfail failed tests IFU_2023_02_20 --- tests/cupy_tests/random_tests/test_generator.py | 1 + tests/cupy_tests/random_tests/test_sample.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/cupy_tests/random_tests/test_generator.py b/tests/cupy_tests/random_tests/test_generator.py index 9fd81a804a3..99574110cf4 100644 --- a/tests/cupy_tests/random_tests/test_generator.py +++ b/tests/cupy_tests/random_tests/test_generator.py @@ -854,6 +854,7 @@ class TestChoiceChi(RandomGeneratorTestCase): target_method = 'choice' @_condition.repeat(3, 10) + @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_goodness_of_fit(self): trial = 100 vals = self.generate_many(3, 1, True, [0.3, 0.3, 0.4], _count=trial) diff --git a/tests/cupy_tests/random_tests/test_sample.py b/tests/cupy_tests/random_tests/test_sample.py index 2a6751aa1d4..adc0f91197b 100644 --- a/tests/cupy_tests/random_tests/test_sample.py +++ b/tests/cupy_tests/random_tests/test_sample.py @@ -100,7 +100,6 @@ def test_goodness_of_fit(self): assert _hypothesis.chi_square_test(counts, expected) @_condition.repeat(3, 10) - @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_goodness_of_fit_2(self): mx = 5 vals = random.randint(mx, size=(5, 20)).get() @@ -196,7 +195,6 @@ def test_goodness_of_fit(self): assert _hypothesis.chi_square_test(counts, expected) @_condition.repeat(3, 10) - @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_goodness_of_fit_2(self): mx = 5 vals = random.randint(0, mx, (5, 20)).get() From 9acc02d3ed765946a9f132a86c077479271c69cf Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Fri, 17 Mar 2023 13:20:56 +0000 Subject: [PATCH 08/57] xfailed tests that fail on rocm --- tests/cupy_tests/core_tests/test_ndarray_reduction.py | 2 ++ tests/cupy_tests/manipulation_tests/test_basic.py | 2 ++ tests/cupy_tests/math_tests/test_sumprod.py | 3 +++ tests/cupy_tests/random_tests/test_sample.py | 2 -- tests/cupy_tests/sorting_tests/test_search.py | 3 +++ tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py | 1 + 6 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/cupy_tests/core_tests/test_ndarray_reduction.py b/tests/cupy_tests/core_tests/test_ndarray_reduction.py index 608d9c0508f..4de5a5915fe 100644 --- a/tests/cupy_tests/core_tests/test_ndarray_reduction.py +++ b/tests/cupy_tests/core_tests/test_ndarray_reduction.py @@ -7,6 +7,7 @@ import cupy._core._accelerator as _acc from cupy._core import _cub_reduction from cupy import testing +from cupy.cuda import runtime @testing.parameterize(*testing.product({ @@ -412,6 +413,7 @@ def setUp(self): @testing.for_all_dtypes(no_bool=True) @testing.numpy_cupy_allclose( contiguous_check=False, accept_error=ValueError) + @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_min(self, xp, dtype, axis): a = testing.shaped_random(self.shape, xp, dtype, order=self.order) diff --git a/tests/cupy_tests/manipulation_tests/test_basic.py b/tests/cupy_tests/manipulation_tests/test_basic.py index 30eadbd9d0b..d563f9215fb 100644 --- a/tests/cupy_tests/manipulation_tests/test_basic.py +++ b/tests/cupy_tests/manipulation_tests/test_basic.py @@ -143,6 +143,7 @@ def get_numpy(): @testing.multi_gpu(2) @testing.for_all_dtypes() + @pytest.mark.xfail(cuda.runtime.is_hip, reason='ROCm/HIP may have a bug') def test_copyto_where_multigpu_raises(self, dtype): self._check_copyto_where_multigpu_raises(dtype, 2) @@ -169,6 +170,7 @@ def test_copyto_multigpu(self, xp, dtype): @testing.multi_gpu(2) @testing.for_all_dtypes() + @pytest.mark.xfail(cuda.runtime.is_hip, reason='ROCm/HIP may have a bug') def test_copyto_multigpu_noncontinguous(self, dtype): with cuda.Device(0): src = testing.shaped_arange((2, 3, 4), cupy, dtype) diff --git a/tests/cupy_tests/math_tests/test_sumprod.py b/tests/cupy_tests/math_tests/test_sumprod.py index 1e8e5a8f100..5b275a7b7e7 100644 --- a/tests/cupy_tests/math_tests/test_sumprod.py +++ b/tests/cupy_tests/math_tests/test_sumprod.py @@ -8,6 +8,7 @@ import cupy.cuda.cutensor from cupy._core import _cub_reduction from cupy import testing +from cupy.cuda import runtime @testing.gpu @@ -237,6 +238,7 @@ def setUp(self): # sum supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes('qQfdFD') @testing.numpy_cupy_allclose(rtol=1E-5) + @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_sum(self, xp, dtype, axis): a = testing.shaped_random(self.shape, xp, dtype) if self.order in ('c', 'C'): @@ -287,6 +289,7 @@ def test_cub_sum_empty_axis(self, xp, dtype): # prod supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes('qQfdFD') @testing.numpy_cupy_allclose(rtol=1E-5) + @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_prod(self, xp, dtype, axis): a = testing.shaped_random(self.shape, xp, dtype) if self.order in ('c', 'C'): diff --git a/tests/cupy_tests/random_tests/test_sample.py b/tests/cupy_tests/random_tests/test_sample.py index adc0f91197b..2b4a96f56f7 100644 --- a/tests/cupy_tests/random_tests/test_sample.py +++ b/tests/cupy_tests/random_tests/test_sample.py @@ -2,11 +2,9 @@ from unittest import mock import numpy -import pytest import cupy from cupy import cuda -from cupy.cuda import runtime from cupy import random from cupy import testing from cupy.testing import _condition diff --git a/tests/cupy_tests/sorting_tests/test_search.py b/tests/cupy_tests/sorting_tests/test_search.py index 9422f3d2001..ea2fe5a7982 100644 --- a/tests/cupy_tests/sorting_tests/test_search.py +++ b/tests/cupy_tests/sorting_tests/test_search.py @@ -5,6 +5,7 @@ import cupy._core._accelerator as _acc from cupy._core import _cub_reduction from cupy import testing +from cupy.cuda import runtime @testing.gpu @@ -197,6 +198,7 @@ def setUp(self): @testing.for_dtypes('bhilBHILefdFD') @testing.numpy_cupy_allclose(rtol=1E-5, contiguous_check=False) + @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_argmin(self, xp, dtype): _skip_cuda90(dtype) a = testing.shaped_random(self.shape, xp, dtype) @@ -232,6 +234,7 @@ def test_cub_argmin(self, xp, dtype): @testing.for_dtypes('bhilBHILefdFD') @testing.numpy_cupy_allclose(rtol=1E-5, contiguous_check=False) + @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_argmax(self, xp, dtype): _skip_cuda90(dtype) a = testing.shaped_random(self.shape, xp, dtype) diff --git a/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py b/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py index c86bd2caf40..b162a064c6f 100644 --- a/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py +++ b/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py @@ -325,6 +325,7 @@ class TestOrderFilter: @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) @testing.numpy_cupy_allclose(atol=1e-8, rtol=1e-8, scipy_name='scp', accept_error=ValueError) # for even kernels + @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_order_filter(self, xp, scp, dtype): a = testing.shaped_random(self.a, xp, dtype) d = self.domain From 92d2243fe562a90a226651c5889050cf101af66a Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Sat, 18 Mar 2023 07:01:37 +0000 Subject: [PATCH 09/57] skip ndimage_tests and skip tests flaky --- .../core_tests/test_ndarray_reduction.py | 2 +- tests/cupy_tests/math_tests/test_sumprod.py | 4 ++-- tests/cupy_tests/sorting_tests/test_search.py | 4 ++-- .../scipy_tests/ndimage_tests/test_filters.py | 16 ++++++++++++++ .../scipy_tests/ndimage_tests/test_fourier.py | 6 ++++++ .../ndimage_tests/test_interpolation.py | 21 +++++++++++++++++++ .../ndimage_tests/test_measurements.py | 6 ++++++ .../ndimage_tests/test_morphology.py | 13 ++++++++++++ .../signal_tests/test_signaltools.py | 3 ++- 9 files changed, 69 insertions(+), 6 deletions(-) diff --git a/tests/cupy_tests/core_tests/test_ndarray_reduction.py b/tests/cupy_tests/core_tests/test_ndarray_reduction.py index 4de5a5915fe..c2fa3e3e3b0 100644 --- a/tests/cupy_tests/core_tests/test_ndarray_reduction.py +++ b/tests/cupy_tests/core_tests/test_ndarray_reduction.py @@ -413,7 +413,7 @@ def setUp(self): @testing.for_all_dtypes(no_bool=True) @testing.numpy_cupy_allclose( contiguous_check=False, accept_error=ValueError) - @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_min(self, xp, dtype, axis): a = testing.shaped_random(self.shape, xp, dtype, order=self.order) diff --git a/tests/cupy_tests/math_tests/test_sumprod.py b/tests/cupy_tests/math_tests/test_sumprod.py index 5b275a7b7e7..7470b1e5752 100644 --- a/tests/cupy_tests/math_tests/test_sumprod.py +++ b/tests/cupy_tests/math_tests/test_sumprod.py @@ -238,7 +238,7 @@ def setUp(self): # sum supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes('qQfdFD') @testing.numpy_cupy_allclose(rtol=1E-5) - @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_sum(self, xp, dtype, axis): a = testing.shaped_random(self.shape, xp, dtype) if self.order in ('c', 'C'): @@ -289,7 +289,7 @@ def test_cub_sum_empty_axis(self, xp, dtype): # prod supports less dtypes; don't test float16 as it's not as accurate? @testing.for_dtypes('qQfdFD') @testing.numpy_cupy_allclose(rtol=1E-5) - @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_prod(self, xp, dtype, axis): a = testing.shaped_random(self.shape, xp, dtype) if self.order in ('c', 'C'): diff --git a/tests/cupy_tests/sorting_tests/test_search.py b/tests/cupy_tests/sorting_tests/test_search.py index ea2fe5a7982..0617e64496e 100644 --- a/tests/cupy_tests/sorting_tests/test_search.py +++ b/tests/cupy_tests/sorting_tests/test_search.py @@ -198,7 +198,7 @@ def setUp(self): @testing.for_dtypes('bhilBHILefdFD') @testing.numpy_cupy_allclose(rtol=1E-5, contiguous_check=False) - @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_argmin(self, xp, dtype): _skip_cuda90(dtype) a = testing.shaped_random(self.shape, xp, dtype) @@ -234,7 +234,7 @@ def test_cub_argmin(self, xp, dtype): @testing.for_dtypes('bhilBHILefdFD') @testing.numpy_cupy_allclose(rtol=1E-5, contiguous_check=False) - @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_argmax(self, xp, dtype): _skip_cuda90(dtype) a = testing.shaped_random(self.shape, xp, dtype) diff --git a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_filters.py b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_filters.py index ccc0b1bb61b..33ed91f7c7c 100644 --- a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_filters.py +++ b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_filters.py @@ -192,6 +192,7 @@ def _kshape(self): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestFilter(FilterTestCaseBase): def _hip_skip_invalid_condition(self): @@ -229,6 +230,7 @@ def test_filter(self, xp, scp): @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestNearestFilterEdgeCase: @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp') @@ -305,6 +307,7 @@ def dummy_deriv_func(input, axis, output, mode, cval, *args, **kwargs): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestFilterFast(FilterTestCaseBase): def _hip_skip_invalid_condition(self): @@ -370,6 +373,7 @@ def test_filter(self, xp, scp): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestFilterComplexFast(FilterTestCaseBase): @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp') @@ -451,6 +455,7 @@ def lt_pyfunc(x): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestGenericFilter(FilterTestCaseBase): _func_or_kernels = { @@ -517,6 +522,7 @@ def shift_pyfunc(src, dst): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestGeneric1DFilter(FilterTestCaseBase): _func_or_kernels = { 'shift_raw': shift_raw, @@ -559,6 +565,7 @@ def test_filter(self, xp, scp): @testing.gpu # SciPy behavior fixed in 1.5.0: https://github.com/scipy/scipy/issues/11661 @testing.with_requires('scipy>=1.5.0') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestMirrorWithDim1(FilterTestCaseBase): @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp') def test_filter(self, xp, scp): @@ -586,6 +593,7 @@ def test_filter(self, xp, scp): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestShellSort(FilterTestCaseBase): @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp') def test_filter(self, xp, scp): @@ -612,6 +620,7 @@ def test_filter(self, xp, scp): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestFortranOrder(FilterTestCaseBase): @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp') def test_filter(self, xp, scp): @@ -639,6 +648,7 @@ def test_filter(self, xp, scp): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestWeightDtype(FilterTestCaseBase): @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp') def test_filter(self, xp, scp): @@ -670,6 +680,7 @@ def test_filter(self, xp, scp): )) @testing.gpu @testing.with_requires('scipy>=1.5.9') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestWeightComplexDtype(FilterTestCaseBase): def _skip_noncomplex(self): @@ -718,6 +729,7 @@ def test_filter_complex_output_dtype_warns(self): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestSpecialWeightCases(FilterTestCaseBase): @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp', accept_error=ValueError) @@ -756,6 +768,7 @@ def test_replace_dim_with_0(self, xp, scp): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestSpecialCases1D(FilterTestCaseBase): @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp', accept_error=RuntimeError) @@ -772,6 +785,7 @@ def test_0_dim(self, xp, scp): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestInvalidAxis(FilterTestCaseBase): @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp', accept_error=ValueError) @@ -806,6 +820,7 @@ def test_invalid_axis_neg(self, xp, scp): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestInvalidMode(FilterTestCaseBase): @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp', accept_error=RuntimeError) @@ -825,6 +840,7 @@ def test_invalid_mode(self, xp, scp): @testing.gpu # SciPy behavior fixed in 1.2.0: https://github.com/scipy/scipy/issues/822 @testing.with_requires('scipy>=1.2.0') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestInvalidOrigin(FilterTestCaseBase): @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp', accept_error=ValueError) diff --git a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_fourier.py b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_fourier.py index dd3b051d458..8888b69f2de 100644 --- a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_fourier.py +++ b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_fourier.py @@ -7,6 +7,7 @@ import cupyx.scipy.fft # NOQA import cupyx.scipy.fftpack # NOQA import cupyx.scipy.ndimage # NOQA +from cupy.cuda import runtime try: # scipy.fft only available since SciPy 1.4.0 @@ -54,6 +55,7 @@ ) @testing.gpu @testing.with_requires("scipy") +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestFourierShift: def _test_real_nd(self, xp, scp, x, real_axis): @@ -142,6 +144,7 @@ def test_complex_fft_with_output(self, xp, scp): ) @testing.gpu @testing.with_requires("scipy") +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestFourierGaussian: def _test_real_nd(self, xp, scp, x, real_axis): @@ -230,6 +233,7 @@ def test_complex_fft_with_output(self, xp, scp): ) @testing.gpu @testing.with_requires("scipy") +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestFourierUniform: def _test_real_nd(self, xp, scp, x, real_axis): @@ -312,6 +316,7 @@ def test_complex_fft_with_output(self, xp, scp): ) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestFourierEllipsoid(): def _test_real_nd(self, xp, scp, x, real_axis): if x.ndim == 1 and scipy_version < '1.5.3': @@ -383,6 +388,7 @@ def test_complex_fft_with_output(self, xp, scp, dtype): @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestFourierEllipsoidInvalid(): # SciPy < 1.5 raises ValueError instead of AxisError diff --git a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_interpolation.py b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_interpolation.py index b94a46a275b..e3919c684e0 100644 --- a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_interpolation.py +++ b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_interpolation.py @@ -42,6 +42,7 @@ def _conditional_scipy_version_skip(mode, order): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestMapCoordinates: _multiprocess_can_split = True @@ -124,6 +125,7 @@ def test_map_coordinates_int(self, xp, scp, dtype): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestMapCoordinatesHalfInteger: def _map_coordinates(self, xp, scp, a, coordinates): @@ -153,6 +155,7 @@ def test_map_coordinates_float(self, xp, scp, dtype): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestAffineTransform: _multiprocess_can_split = True @@ -238,6 +241,7 @@ def test_affine_transform_int(self, xp, scp, dtype): @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestAffineExceptions: def test_invalid_affine_ndim(self): @@ -326,6 +330,7 @@ def test_invalid_texture_arguments(self): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestAffineTransformTextureMemory: _multiprocess_can_split = True @@ -412,6 +417,7 @@ def test_affine_transform_texture_memory(self, xp, scp): @testing.gpu @testing.with_requires('opencv-python') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestAffineTransformOpenCV: _multiprocess_can_split = True @@ -453,6 +459,7 @@ def test_affine_transform_opencv(self, xp, dtype): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestRotate: _multiprocess_can_split = True @@ -531,6 +538,7 @@ def test_rotate_int(self, xp, scp, dtype): @testing.gpu # Scipy older than 1.3.0 raises IndexError instead of ValueError @testing.with_requires('scipy>=1.3.0') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestRotateExceptions: def test_rotate_invalid_plane(self): @@ -552,6 +560,7 @@ def test_rotate_invalid_plane(self): ) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestRotateAxes: _multiprocess_can_split = True @@ -566,6 +575,7 @@ def test_rotate_axes(self, xp, scp, dtype): @testing.gpu @testing.with_requires('opencv-python') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestRotateOpenCV: _multiprocess_can_split = True @@ -601,6 +611,7 @@ def test_rotate_opencv(self, xp, dtype): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestShift: _multiprocess_can_split = True @@ -681,6 +692,7 @@ def test_shift_int(self, xp, scp, dtype): 'cval': [cupy.nan, cupy.inf, -cupy.inf], })) @testing.gpu +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestInterpolationInvalidCval: def _prep_output(self, a): @@ -760,6 +772,7 @@ def test_map_coordinates(self, dtype): @testing.gpu @testing.with_requires('opencv-python') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestShiftOpenCV: _multiprocess_can_split = True @@ -787,6 +800,7 @@ def test_shift_opencv(self, xp, dtype): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestZoom: _multiprocess_can_split = True @@ -850,6 +864,7 @@ def test_zoom_int(self, xp, scp, dtype): 'mode': ['nearest', 'reflect', 'mirror', 'grid-wrap', 'grid-constant'], })) @testing.gpu +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestZoomOrder0IntegerGrid(): def test_zoom_grid_by_int_order0(self): @@ -874,6 +889,7 @@ def test_zoom_grid_by_int_order0(self): 'grid_mode': [False, True], })) @testing.gpu +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestZoomOutputSize1(): @testing.for_float_dtypes(no_float16=True) @@ -892,6 +908,7 @@ def test_zoom_output_size1(self, xp, scp, dtype): ) @testing.gpu @testing.with_requires('opencv-python') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestZoomOpenCV: _multiprocess_can_split = True @@ -930,6 +947,7 @@ def test_zoom_opencv_output_size1(self, xp, dtype): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestSplineFilter1d: @testing.numpy_cupy_allclose(atol=1e-5, rtol=1e-5, scipy_name='scp') def test_spline_filter1d(self, xp, scp): @@ -955,6 +973,7 @@ def test_spline_filter1d_output(self, xp, scp, array_order): # See #5537 @testing.slow @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestSplineFilter1dLargeArray: @pytest.mark.parametrize('mode', ['mirror', 'grid-wrap', 'reflect']) @@ -981,6 +1000,7 @@ def test_spline_filter1d_large_array(self, xp, scp, mode): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestSplineFilter: @testing.numpy_cupy_allclose(atol=1e-4, rtol=1e-4, scipy_name='scp') def test_spline_filter(self, xp, scp): @@ -1021,6 +1041,7 @@ def test_spline_filter_with_output(self, xp, scp, array_order): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestSplineFilterComplex: @testing.with_requires('scipy>=1.6') diff --git a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_measurements.py b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_measurements.py index b59e9be9ea7..84e2f2a013a 100644 --- a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_measurements.py +++ b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_measurements.py @@ -46,6 +46,7 @@ def _generate_binary_structure(rank, connectivity): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestLabel: @testing.numpy_cupy_array_equal(scipy_name='scp') @@ -71,6 +72,7 @@ def test_label(self, xp, scp): @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestLabelSpecialCases: @testing.numpy_cupy_array_equal(scipy_name='scp') @@ -116,6 +118,7 @@ def test_label_swirl(self, xp, scp): 'op': stats_ops, })) @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestStats: def _make_image(self, shape, xp, dtype): @@ -298,6 +301,7 @@ def test_no_values(self, xp, scp, dtype): 'enable_cub': [True, False], })) @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestMeasurementsSelect: @pytest.fixture(autouse=True) @@ -376,6 +380,7 @@ def test_measurements_select(self, xp, scp, dtype): 'shape': [(200,), (16, 20)], })) @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestHistogram(): def _make_image(self, shape, xp, dtype, scale): @@ -418,6 +423,7 @@ def test_histogram(self, xp, scp, dtype): 'pass_positions': [True, False], })) @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestLabeledComprehension(): def _make_image(self, shape, xp, dtype, scale): diff --git a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_morphology.py b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_morphology.py index b96eb1db61c..7f299e261ec 100644 --- a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_morphology.py +++ b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_morphology.py @@ -24,6 +24,7 @@ {'rank': 3, 'connectivity': 500}) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestGenerateBinaryStructure: @testing.numpy_cupy_array_equal(scipy_name='scp') @@ -34,6 +35,7 @@ def test_generate_binary_structure(self, xp, scp): @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestIterateStructure: @testing.numpy_cupy_array_equal(scipy_name='scp') @@ -74,6 +76,7 @@ def test_iterate_structure3(self, xp, scp): ) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestBinaryErosionAndDilation1d: def _filter(self, xp, scp, x): filter = getattr(scp.ndimage, self.filter) @@ -128,6 +131,7 @@ def test_binary_erosion_and_dilation_1d(self, xp, scp): ) @testing.gpu @testing.with_requires('scipy>=1.1.0') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestBinaryOpeningAndClosing: def _filter(self, xp, scp, x): filter = getattr(scp.ndimage, self.filter) @@ -179,6 +183,7 @@ def test_binary_opening_and_closing(self, xp, scp): ) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestBinaryFillHoles: def _filter(self, xp, scp, x): filter = scp.ndimage.binary_fill_holes @@ -228,6 +233,7 @@ def test_binary_fill_holes(self, xp, scp): ) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestBinaryHitOrMiss: def _filter(self, xp, scp, x): filter = scp.ndimage.binary_hit_or_miss @@ -301,6 +307,7 @@ def test_binary_hit_or_miss(self, xp, scp): ) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestBinaryPropagation: def _filter(self, xp, scp, x): filter = scp.ndimage.binary_propagation @@ -333,6 +340,7 @@ def test_binary_propagation(self, xp, scp): ) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestBinaryErosionAndDilation: def _filter(self, xp, scp, x): filter = getattr(scp.ndimage, self.filter) @@ -370,6 +378,7 @@ def test_binary_erosion_and_dilation(self, xp, scp): ) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestBinaryErosionAndDilationContiguity: def _filter(self, xp, scp, x): filter = getattr(scp.ndimage, self.filter) @@ -429,6 +438,7 @@ def test_binary_erosion_and_dilation_input_contiguity(self, xp, scp): )) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestGreyErosionAndDilation: def _filter(self, xp, scp, x): @@ -475,6 +485,7 @@ def test_grey_erosion_and_dilation(self, xp, scp): })) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestGreyClosingAndOpening: shape = (4, 5) @@ -525,6 +536,7 @@ def test_grey_closing_and_opening(self, xp, scp): ) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestMorphologicalGradientAndLaplace: def _filter(self, xp, scp, x): @@ -588,6 +600,7 @@ def test_morphological_gradient_and_laplace(self, xp, scp): ) @testing.gpu @testing.with_requires('scipy') +@pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestWhiteTophatAndBlackTopHat: def _filter(self, xp, scp, x): diff --git a/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py b/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py index b162a064c6f..2cd31a92d9a 100644 --- a/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py +++ b/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py @@ -325,7 +325,7 @@ class TestOrderFilter: @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) @testing.numpy_cupy_allclose(atol=1e-8, rtol=1e-8, scipy_name='scp', accept_error=ValueError) # for even kernels - @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_order_filter(self, xp, scp, dtype): a = testing.shaped_random(self.a, xp, dtype) d = self.domain @@ -345,6 +345,7 @@ class TestMedFilt: @testing.for_all_dtypes() @testing.numpy_cupy_allclose(atol=1e-8, rtol=1e-8, scipy_name='scp', accept_error=ValueError) # for even kernels + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_medfilt(self, xp, scp, dtype): if sys.platform == 'win32': pytest.xfail('medfilt broken for Scipy 1.7.0 in windows') From 7fa7b87654b6abb0ad73805d0f3ac9af562d0238 Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Sat, 18 Mar 2023 07:17:25 +0000 Subject: [PATCH 10/57] missing import --- tests/cupyx_tests/scipy_tests/ndimage_tests/test_morphology.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_morphology.py b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_morphology.py index 7f299e261ec..2a6d5e7335a 100644 --- a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_morphology.py +++ b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_morphology.py @@ -2,6 +2,7 @@ import pytest from cupy import testing +from cupy.cuda import runtime import cupyx.scipy.ndimage # NOQA try: From 9d53554b4b1d257f90e2b713245c645be24392b2 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 18 Mar 2023 21:46:26 +0000 Subject: [PATCH 11/57] Skip manipulation tests causing hang --- .../manipulation_tests/test_basic.py | 9 ++++-- .../manipulation_tests/test_join.py | 28 +++++++++++++++++-- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/tests/cupy_tests/manipulation_tests/test_basic.py b/tests/cupy_tests/manipulation_tests/test_basic.py index d563f9215fb..d1344a06779 100644 --- a/tests/cupy_tests/manipulation_tests/test_basic.py +++ b/tests/cupy_tests/manipulation_tests/test_basic.py @@ -7,7 +7,7 @@ import cupy from cupy import cuda from cupy import testing - +from cupy.cuda import runtime @testing.gpu class TestBasic: @@ -143,23 +143,26 @@ def get_numpy(): @testing.multi_gpu(2) @testing.for_all_dtypes() - @pytest.mark.xfail(cuda.runtime.is_hip, reason='ROCm/HIP may have a bug') + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_copyto_where_multigpu_raises(self, dtype): self._check_copyto_where_multigpu_raises(dtype, 2) @testing.multi_gpu(4) @testing.for_all_dtypes() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_copyto_where_multigpu_raises_4(self, dtype): self._check_copyto_where_multigpu_raises(dtype, 4) @testing.multi_gpu(6) @testing.for_all_dtypes() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_copyto_where_multigpu_raises_6(self, dtype): self._check_copyto_where_multigpu_raises(dtype, 6) @testing.multi_gpu(2) @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_copyto_multigpu(self, xp, dtype): with cuda.Device(0): a = testing.shaped_arange((2, 3, 4), xp, dtype) @@ -170,7 +173,7 @@ def test_copyto_multigpu(self, xp, dtype): @testing.multi_gpu(2) @testing.for_all_dtypes() - @pytest.mark.xfail(cuda.runtime.is_hip, reason='ROCm/HIP may have a bug') + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_copyto_multigpu_noncontinguous(self, dtype): with cuda.Device(0): src = testing.shaped_arange((2, 3, 4), cupy, dtype) diff --git a/tests/cupy_tests/manipulation_tests/test_join.py b/tests/cupy_tests/manipulation_tests/test_join.py index 6fd40b9b3a4..5cc249b6698 100644 --- a/tests/cupy_tests/manipulation_tests/test_join.py +++ b/tests/cupy_tests/manipulation_tests/test_join.py @@ -4,7 +4,7 @@ import cupy from cupy import testing from cupy import cuda - +from cupy.cuda import runtime class TestJoin: @@ -37,6 +37,7 @@ def test_column_stack_wrong_shape(self): @testing.for_all_dtypes(name='dtype') @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate1(self, xp, dtype): a = testing.shaped_arange((2, 3, 4), xp, dtype) b = testing.shaped_reverse_arange((2, 3, 2), xp, dtype) @@ -45,6 +46,7 @@ def test_concatenate1(self, xp, dtype): @testing.for_all_dtypes(name='dtype') @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate2(self, xp, dtype): a = testing.shaped_arange((2, 3, 4), xp, dtype) b = testing.shaped_reverse_arange((2, 3, 2), xp, dtype) @@ -53,6 +55,7 @@ def test_concatenate2(self, xp, dtype): @testing.for_all_dtypes(name='dtype') @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_axis_none(self, xp, dtype): a = testing.shaped_arange((2, 3), xp, dtype) b = testing.shaped_reverse_arange((3, 5, 2), xp, dtype) @@ -61,6 +64,7 @@ def test_concatenate_axis_none(self, xp, dtype): @testing.for_all_dtypes(name='dtype') @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_large_2(self, xp, dtype): a = testing.shaped_arange((2, 3, 4), xp, dtype) b = testing.shaped_reverse_arange((2, 3, 2), xp, dtype) @@ -71,6 +75,7 @@ def test_concatenate_large_2(self, xp, dtype): @testing.for_all_dtypes(name='dtype') @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_large_3(self, xp, dtype): a = testing.shaped_arange((2, 3, 1), xp, dtype) b = testing.shaped_reverse_arange((2, 3, 1), xp, dtype) @@ -78,6 +83,7 @@ def test_concatenate_large_3(self, xp, dtype): @testing.for_all_dtypes(name='dtype') @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_large_4(self, xp, dtype): a = testing.shaped_arange((2, 3, 4), xp, dtype) b = testing.shaped_reverse_arange((2, 3, 4), xp, dtype) @@ -85,12 +91,14 @@ def test_concatenate_large_4(self, xp, dtype): @testing.for_all_dtypes(name='dtype') @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_large_5(self, xp, dtype): a = testing.shaped_arange((2, 3, 4), xp, dtype) b = testing.shaped_reverse_arange((2, 3, 4), xp, 'i') return xp.concatenate((a, b) * 10, axis=-1) @testing.multi_gpu(2) + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_large_different_devices(self): arrs = [] for i in range(10): @@ -105,6 +113,7 @@ def test_concatenate_large_different_devices(self): @testing.for_all_dtypes(name='dtype') @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_f_contiguous(self, xp, dtype): a = testing.shaped_arange((2, 3, 4), xp, dtype) b = testing.shaped_arange((2, 3, 2), xp, dtype).T @@ -113,6 +122,7 @@ def test_concatenate_f_contiguous(self, xp, dtype): @testing.for_all_dtypes(name='dtype') @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_large_f_contiguous(self, xp, dtype): a = testing.shaped_arange((2, 3, 4), xp, dtype) b = testing.shaped_arange((2, 3, 2), xp, dtype).T @@ -122,12 +132,14 @@ def test_concatenate_large_f_contiguous(self, xp, dtype): return xp.concatenate((a, b, c, d, e) * 2, axis=-1) @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_many_multi_dtype(self, xp): a = testing.shaped_arange((2, 1), xp, 'i') b = testing.shaped_arange((2, 1), xp, 'f') return xp.concatenate((a, b) * 1024, axis=1) @testing.slow + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_32bit_boundary(self): a = cupy.zeros((2 ** 30,), dtype=cupy.int8) b = cupy.zeros((2 ** 30,), dtype=cupy.int8) @@ -138,12 +150,14 @@ def test_concatenate_32bit_boundary(self): # Free huge memory for slow test cupy.get_default_memory_pool().free_all_blocks() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_wrong_ndim(self): a = cupy.empty((2, 3)) b = cupy.empty((2,)) with pytest.raises(ValueError): cupy.concatenate((a, b)) + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_wrong_shape(self): a = cupy.empty((2, 3, 4)) b = cupy.empty((3, 3, 4)) @@ -153,6 +167,7 @@ def test_concatenate_wrong_shape(self): @testing.for_all_dtypes(name='dtype') @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_out(self, xp, dtype): a = testing.shaped_arange((3, 4), xp, dtype) b = testing.shaped_reverse_arange((3, 4), xp, dtype) @@ -162,6 +177,7 @@ def test_concatenate_out(self, xp, dtype): return out @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_out_same_kind(self, xp): a = testing.shaped_arange((3, 4), xp, xp.float64) b = testing.shaped_reverse_arange((3, 4), xp, xp.float64) @@ -170,6 +186,7 @@ def test_concatenate_out_same_kind(self, xp): xp.concatenate((a, b, c), axis=1, out=out) return out + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_out_invalid_shape(self): for xp in (numpy, cupy): a = testing.shaped_arange((3, 4), xp, xp.float64) @@ -178,7 +195,8 @@ def test_concatenate_out_invalid_shape(self): out = xp.zeros((4, 10), dtype=xp.float64) with pytest.raises(ValueError): xp.concatenate((a, b, c), axis=1, out=out) - + + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_out_invalid_shape_2(self): for xp in (numpy, cupy): a = testing.shaped_arange((3, 4), xp, xp.float64) @@ -188,6 +206,7 @@ def test_concatenate_out_invalid_shape_2(self): with pytest.raises(ValueError): xp.concatenate((a, b, c), axis=1, out=out) + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_out_invalid_dtype(self): for xp in (numpy, cupy): a = testing.shaped_arange((3, 4), xp, xp.float64) @@ -199,6 +218,7 @@ def test_concatenate_out_invalid_dtype(self): @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2']) @testing.numpy_cupy_array_equal() + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_different_dtype(self, xp, dtype1, dtype2): a = testing.shaped_arange((3, 4), xp, dtype1) b = testing.shaped_arange((3, 4), xp, dtype2) @@ -206,6 +226,7 @@ def test_concatenate_different_dtype(self, xp, dtype1, dtype2): @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2']) @testing.numpy_cupy_array_equal(accept_error=TypeError) + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_out_different_dtype(self, xp, dtype1, dtype2): a = testing.shaped_arange((3, 4), xp, dtype1) b = testing.shaped_arange((3, 4), xp, dtype1) @@ -215,12 +236,14 @@ def test_concatenate_out_different_dtype(self, xp, dtype1, dtype2): @testing.with_requires('numpy>=1.20.0') @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2']) @testing.numpy_cupy_array_equal(accept_error=TypeError) + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_dtype(self, xp, dtype1, dtype2): a = testing.shaped_arange((3, 4), xp, dtype1) b = testing.shaped_arange((3, 4), xp, dtype1) return xp.concatenate((a, b), dtype=dtype2) @testing.with_requires('numpy>=1.20.0') + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_dtype_invalid_out(self): for xp in (numpy, cupy): a = testing.shaped_arange((3, 4), xp, xp.float64) @@ -241,6 +264,7 @@ def test_concatenate_dtype_invalid_out(self): @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2']) @testing.numpy_cupy_array_equal( accept_error=(TypeError, numpy.ComplexWarning)) + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_casting(self, xp, dtype1, dtype2, casting): a = testing.shaped_arange((3, 4), xp, dtype1) b = testing.shaped_arange((3, 4), xp, dtype1) From dcedab970122bb649260e972e5ca5796e582ab9b Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Sat, 18 Mar 2023 21:52:00 +0000 Subject: [PATCH 12/57] add c++14 for cubreduction --- cupy/_core/_cub_reduction.pyx | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cupy/_core/_cub_reduction.pyx b/cupy/_core/_cub_reduction.pyx index 7c8980893b7..1009ae641b4 100644 --- a/cupy/_core/_cub_reduction.pyx +++ b/cupy/_core/_cub_reduction.pyx @@ -28,9 +28,13 @@ cdef function.Function _create_cub_reduction_function( _kernel._TypeMap type_map, preamble, options): # A (incomplete) list of internal variables: # _J : the index of an element in the array - - # static_assert needs at least C++11 in NVRTC - options += ('--std=c++11',) + + # ROCm5.3 and above requires c++14 + if runtime._is_hip_environment: + options += ('--std=c++14',) + else: + # static_assert needs at least C++11 in NVRTC + options += ('--std=c++11',) cdef str backend if runtime._is_hip_environment: From 43f7134885ef1de7f3ced53bfe57e349144a01de Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Sun, 19 Mar 2023 15:10:23 +0000 Subject: [PATCH 13/57] skipping flaky tests --- tests/cupy_tests/core_tests/test_ndarray_reduction.py | 1 + tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/cupy_tests/core_tests/test_ndarray_reduction.py b/tests/cupy_tests/core_tests/test_ndarray_reduction.py index c2fa3e3e3b0..bee64902728 100644 --- a/tests/cupy_tests/core_tests/test_ndarray_reduction.py +++ b/tests/cupy_tests/core_tests/test_ndarray_reduction.py @@ -459,6 +459,7 @@ def test_cub_min_empty_axis(self, xp, dtype, contiguous_check=False): @testing.for_all_dtypes(no_bool=True) @testing.numpy_cupy_allclose( contiguous_check=False, accept_error=ValueError) + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_cub_max(self, xp, dtype, axis): a = testing.shaped_random(self.shape, xp, dtype, order=self.order) diff --git a/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py b/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py index 2cd31a92d9a..995ee4cb592 100644 --- a/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py +++ b/tests/cupyx_tests/scipy_tests/signal_tests/test_signaltools.py @@ -366,6 +366,7 @@ class TestMedFilt2d: @testing.for_all_dtypes() @testing.numpy_cupy_allclose(atol=1e-8, rtol=1e-8, scipy_name='scp', accept_error=ValueError) # for even kernels + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') def test_medfilt2d(self, xp, scp, dtype): if sys.platform == 'win32': pytest.xfail('medfilt2d broken for Scipy 1.7.0 in windows') From 3c1df4f0cca60afe92cd89ffaf971513838fa2d1 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Sun, 19 Mar 2023 18:39:42 +0000 Subject: [PATCH 14/57] Skip ndarray tests --- tests/cupy_tests/core_tests/test_ndarray_reduction.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cupy_tests/core_tests/test_ndarray_reduction.py b/tests/cupy_tests/core_tests/test_ndarray_reduction.py index c2fa3e3e3b0..bf8cce2c3bb 100644 --- a/tests/cupy_tests/core_tests/test_ndarray_reduction.py +++ b/tests/cupy_tests/core_tests/test_ndarray_reduction.py @@ -459,6 +459,7 @@ def test_cub_min_empty_axis(self, xp, dtype, contiguous_check=False): @testing.for_all_dtypes(no_bool=True) @testing.numpy_cupy_allclose( contiguous_check=False, accept_error=ValueError) + @pytest.mark.skipif(runtime.is_hip,reason="rocm/hip may have a bug") def test_cub_max(self, xp, dtype, axis): a = testing.shaped_random(self.shape, xp, dtype, order=self.order) From ba59d26dea4102f20910214b4113a3b316daff85 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 19 Mar 2023 23:31:02 +0000 Subject: [PATCH 15/57] Skip build hip check --- tests/install_tests/test_build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/install_tests/test_build.py b/tests/install_tests/test_build.py index 44dfdb3ddf5..b1ab45dc7f8 100644 --- a/tests/install_tests/test_build.py +++ b/tests/install_tests/test_build.py @@ -21,7 +21,7 @@ def setUp(self): self.settings = build.get_compiler_setting(ctx, False) @pytest.mark.gpu - @pytest.mark.skipif(not test_hip, reason='For ROCm/HIP environment') + @pytest.mark.skipif(test_hip, reason='For ROCm/HIP environment') def test_check_hip_version(self): with self.assertRaises(RuntimeError): build.get_hip_version() From edcc5731b22b62a483d9a584f3df4c626f63aa14 Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Mon, 20 Mar 2023 09:06:08 +0000 Subject: [PATCH 16/57] flake8 white trailing spaces --- cupy/_core/_cub_reduction.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cupy/_core/_cub_reduction.pyx b/cupy/_core/_cub_reduction.pyx index 1009ae641b4..4489dc09ab6 100644 --- a/cupy/_core/_cub_reduction.pyx +++ b/cupy/_core/_cub_reduction.pyx @@ -28,13 +28,13 @@ cdef function.Function _create_cub_reduction_function( _kernel._TypeMap type_map, preamble, options): # A (incomplete) list of internal variables: # _J : the index of an element in the array - + # ROCm5.3 and above requires c++14 if runtime._is_hip_environment: options += ('--std=c++14',) else: # static_assert needs at least C++11 in NVRTC - options += ('--std=c++11',) + options += ('--std=c++11',) cdef str backend if runtime._is_hip_environment: From 741d4bb2e0429d92c426bac4e388917c1110e51b Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Mon, 20 Mar 2023 11:54:03 +0000 Subject: [PATCH 17/57] trailing spaces --- tests/cupy_tests/manipulation_tests/test_basic.py | 1 + tests/cupy_tests/manipulation_tests/test_join.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/cupy_tests/manipulation_tests/test_basic.py b/tests/cupy_tests/manipulation_tests/test_basic.py index d1344a06779..b4d5fef2334 100644 --- a/tests/cupy_tests/manipulation_tests/test_basic.py +++ b/tests/cupy_tests/manipulation_tests/test_basic.py @@ -9,6 +9,7 @@ from cupy import testing from cupy.cuda import runtime + @testing.gpu class TestBasic: diff --git a/tests/cupy_tests/manipulation_tests/test_join.py b/tests/cupy_tests/manipulation_tests/test_join.py index 5cc249b6698..6a9df508a1a 100644 --- a/tests/cupy_tests/manipulation_tests/test_join.py +++ b/tests/cupy_tests/manipulation_tests/test_join.py @@ -6,6 +6,7 @@ from cupy import cuda from cupy.cuda import runtime + class TestJoin: @testing.for_all_dtypes(name='dtype1') @@ -195,7 +196,7 @@ def test_concatenate_out_invalid_shape(self): out = xp.zeros((4, 10), dtype=xp.float64) with pytest.raises(ValueError): xp.concatenate((a, b, c), axis=1, out=out) - + @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug ') def test_concatenate_out_invalid_shape_2(self): for xp in (numpy, cupy): From 739e81fa760bcdcd76e4f93dbc8d81596ee36f74 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Mon, 27 Mar 2023 20:11:21 +0000 Subject: [PATCH 18/57] Add xfail for test_cumprod_huge_array --- tests/cupy_tests/math_tests/test_sumprod.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cupy_tests/math_tests/test_sumprod.py b/tests/cupy_tests/math_tests/test_sumprod.py index 7470b1e5752..3e6bdf71be0 100644 --- a/tests/cupy_tests/math_tests/test_sumprod.py +++ b/tests/cupy_tests/math_tests/test_sumprod.py @@ -733,6 +733,7 @@ def test_ndarray_cumprod_2dim_with_axis(self, xp, dtype): return a.cumprod(axis=1) @testing.slow + @pytest.mark.xfail(runtime.is_hip, reason='Workload size is bigger than what ROCm/CUDA supports') def test_cumprod_huge_array(self): size = 2 ** 32 # Free huge memory for slow test From f2381ddf003c4d8fb1261914a077cfabae76c1d6 Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Wed, 12 Apr 2023 05:52:09 +0000 Subject: [PATCH 19/57] fix compilation issues in rocm5.6 --- cupy/cuda/cupy_cufft.h | 2 +- cupy/random/cupy_distributions.cuh | 2 +- cupy_backends/hip/cupy_hip_common.h | 4 ++-- cupy_backends/hip/cupy_hipblas.h | 2 +- cupy_backends/hip/cupy_hipsparse.h | 2 +- cupy_backends/hip/cupy_rccl.h | 2 +- install/cupy_builder/_features.py | 10 +++++----- install/cupy_builder/install_build.py | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cupy/cuda/cupy_cufft.h b/cupy/cuda/cupy_cufft.h index 1ccecdefb34..9d1ece6978d 100644 --- a/cupy/cuda/cupy_cufft.h +++ b/cupy/cuda/cupy_cufft.h @@ -12,7 +12,7 @@ #include #elif defined(CUPY_USE_HIP) -#include +#include extern "C" { diff --git a/cupy/random/cupy_distributions.cuh b/cupy/random/cupy_distributions.cuh index be3e43b1b1b..48b5e5c2dbe 100644 --- a/cupy/random/cupy_distributions.cuh +++ b/cupy/random/cupy_distributions.cuh @@ -35,7 +35,7 @@ struct rk_binomial_state { // gcc will be used, but the hiprand_kernel can only be compiled with llvm // so we need to explicitly declare stubs for the functions #if HIP_VERSION > 400 -#include +#include #else #include typedef struct {} hiprandState; diff --git a/cupy_backends/hip/cupy_hip_common.h b/cupy_backends/hip/cupy_hip_common.h index 37b96cd1122..672406ce5c6 100644 --- a/cupy_backends/hip/cupy_hip_common.h +++ b/cupy_backends/hip/cupy_hip_common.h @@ -2,8 +2,8 @@ #define INCLUDE_GUARD_HIP_CUPY_COMMON_H #include -#include -#include +#include +#include #define CUDA_VERSION 0 diff --git a/cupy_backends/hip/cupy_hipblas.h b/cupy_backends/hip/cupy_hipblas.h index 627bc8681d8..fe166d7e3cd 100644 --- a/cupy_backends/hip/cupy_hipblas.h +++ b/cupy_backends/hip/cupy_hipblas.h @@ -2,7 +2,7 @@ #define INCLUDE_GUARD_HIP_CUPY_HIPBLAS_H #include "cupy_hip_common.h" -#include +#include #include // for HIP_VERSION #include // for gcc 10 diff --git a/cupy_backends/hip/cupy_hipsparse.h b/cupy_backends/hip/cupy_hipsparse.h index dc8a8a92214..271c8bae895 100644 --- a/cupy_backends/hip/cupy_hipsparse.h +++ b/cupy_backends/hip/cupy_hipsparse.h @@ -2,7 +2,7 @@ #ifndef INCLUDE_GUARD_HIP_CUPY_HIPSPARSE_H #define INCLUDE_GUARD_HIP_CUPY_HIPSPARSE_H -#include +#include #include // for HIP_VERSION #include // for hipDataType #include // for gcc 10.0 diff --git a/cupy_backends/hip/cupy_rccl.h b/cupy_backends/hip/cupy_rccl.h index 94dde35054b..06af0461de9 100644 --- a/cupy_backends/hip/cupy_rccl.h +++ b/cupy_backends/hip/cupy_rccl.h @@ -1,7 +1,7 @@ #ifndef INCLUDE_GUARD_HIP_CUPY_RCCL_H #define INCLUDE_GUARD_HIP_CUPY_RCCL_H -#include +#include typedef hipStream_t cudaStream_t; #endif diff --git a/install/cupy_builder/_features.py b/install/cupy_builder/_features.py index d12de78c3a8..6031c8c7cfa 100644 --- a/install/cupy_builder/_features.py +++ b/install/cupy_builder/_features.py @@ -159,12 +159,12 @@ def get_features(ctx: Context) -> Dict[str, Feature]: 'include': [ 'hip/hip_runtime_api.h', 'hip/hiprtc.h', - 'hipblas.h', + 'hipblas/hipblas.h', 'hiprand/hiprand.h', - 'hipsparse.h', - 'hipfft.h', + 'hipsparse/hipsparse.h', + 'hipfft/hipfft.h', 'roctx.h', - 'rocsolver.h', + 'rocsolver/rocsolver.h', ], 'libraries': [ 'amdhip64', # was hiprtc and hip_hcc before ROCm 3.8.0 @@ -358,7 +358,7 @@ def get_features(ctx: Context) -> Dict[str, Feature]: 'cupy_backends.cuda.libs.nccl', ], 'include': [ - 'rccl.h', + 'rccl/rccl.h', ], 'libraries': [ 'rccl', diff --git a/install/cupy_builder/install_build.py b/install/cupy_builder/install_build.py index d0317e32422..51b640bad72 100644 --- a/install/cupy_builder/install_build.py +++ b/install/cupy_builder/install_build.py @@ -448,7 +448,7 @@ def check_nccl_version(compiler, settings): #ifndef CUPY_USE_HIP #include #else - #include + #include #endif #include #ifdef NCCL_MAJOR From 70dc62a9ee64f34b5c3ca63808f61b2a9f694a59 Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Wed, 12 Apr 2023 11:41:45 +0000 Subject: [PATCH 20/57] refactor unit test suite automation --- tests/run_tests_rocm.py | 134 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 tests/run_tests_rocm.py diff --git a/tests/run_tests_rocm.py b/tests/run_tests_rocm.py new file mode 100644 index 00000000000..c9455df26dc --- /dev/null +++ b/tests/run_tests_rocm.py @@ -0,0 +1,134 @@ +import os +import sys +import argparse +import re + +TEST_ROOT = os.path.dirname(os.path.abspath(__file__)) +CUPY_TESTS = [ + 'array_api_tests', + 'binary_tests', + 'core_tests', + 'creation_tests', + 'cuda_tests', + 'fft_tests', + 'functional_tests', + 'indexing_tests', + 'io_tests', + 'lib_tests', + 'linalg_tests', + 'logic_tests', + 'manipulation_tests', + 'math_tests', + 'misc_tests', + 'padding_tests', + 'polynomial_tests', + 'prof_tests', + 'random_tests', + 'sorting_tests', + 'statistics_tests', + 'test_cublas.py', + 'testing_tests', + 'test_init.py', + 'test_ndim.py', + 'test_numpy_interop.py', + 'test_type_routines.py', + 'test_typing.py', +] + +CUPYX_TESTS = [ + 'distributed_tests', + 'fallback_mode_tests', + 'jit_tests', + 'linalg_tests', + 'profiler_tests', + 'scipy_tests/fftpack_tests', + 'scipy_tests/fft_tests', + 'scipy_tests/interpolate_tests', + 'scipy_tests/linalg_tests', + 'scipy_tests/ndimage_tests', + 'scipy_tests/signal_tests', + 'scipy_tests/sparse_tests', + 'scipy_tests/spatial_tests', + 'scipy_tests/special_tests', + 'scipy_tests/stats_tests', + 'scipy_tests/test_get_array_module.py', + 'test_cudnn.py', + 'test_cupyx.py', + 'test_cusolver.py', + 'test_cusparse.py', + 'test_cutensor.py', + 'test_lapack.py', + 'test_optimize.py', + 'test_pinned_array.py', + 'test_rsqrt.py', + 'test_runtime.py', + 'test_time.py', + 'tools_tests', +] + +TEST_SUITES = [ + 'cupy_tests', + 'cupyx_tests', + 'example_tests', + 'install_tests', +] + +def parse_test_log_and_get_summary(test_name): + fs = open("/root/.cache/cupy-tests/cupy_test.log", 'r') + lines = fs.readlines() + fs.close() + + count = "" + summary = "" + pattern = "^=*" + for j in range(len(lines)): + line = lines[j].rstrip() + if ("collecting ..." in line): + count = line.split("collected")[1].split("items")[0].strip() + if ("==" in line): + summary = re.split(pattern, line)[1].split("=")[0] + test_data = test_name + "|" + count + "|" + summary + return test_data + +def run_all_tests(): + initial_cmd = 'CUPY_TEST_GPU_LIMIT=4 CUPY_INSTALL_USE_HIP=1 pytest -vvv -k "not compile_cuda and not fft_allocate" -m "not slow" ' + os.system("mkdir -p ~/.cache/cupy-tests") + test_summary = [] + for test_suite in TEST_SUITES: + if test_suite == "cupy_tests": + for cupy_test in CUPY_TESTS: + cmd = initial_cmd + TEST_ROOT + "/cupy_tests/" + cupy_test + " | tee ~/.cache/cupy-tests/cupy_test.log" + print ("Running : " + cmd) + os.system(cmd) + test_name = "tests/cupy_tests/" + cupy_test + test_summary.append(parse_test_log_and_get_summary(test_name)) + elif test_suite == "cupyx_tests": + for cupyx_test in CUPYX_TESTS: + cmd = initial_cmd + TEST_ROOT + "/cupyx_tests/" + cupyx_test + " | tee ~/.cache/cupy-tests/cupy_test.log" + print ("Running : " + cmd) + os.system(cmd) + test_name = "tests/cupyx_tests/" + cupyx_test + test_summary.append(parse_test_log_and_get_summary(test_name)) + else: + cmd = initial_cmd + TEST_ROOT + "/" + test_suite + " | tee ~/.cache/cupy-tests/cupy_test.log" + print (cmd) + os.system(cmd) + test_name = "tests/" + test_suite + test_summary.append(parse_test_log_and_get_summary(test_name)) + + return test_summary + +def main(): + all_tests = args.all_tests + if all_tests: + test_summary = run_all_tests() + print ("---------------------- TEST SUMMARY ------------------") + for j in range(len(test_summary)): + print (test_summary[j]) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--all-tests", action="store_true", default=True, required=False, help="Run all tests"); + args = parser.parse_args() + + main() From 71cabdbe5a328e41152c206235f8b3542beb1bfc Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Fri, 14 Apr 2023 02:24:04 +0000 Subject: [PATCH 21/57] fix lint issues --- tests/cupy_tests/math_tests/test_sumprod.py | 2 +- tests/run_tests_rocm.py | 26 ++++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/tests/cupy_tests/math_tests/test_sumprod.py b/tests/cupy_tests/math_tests/test_sumprod.py index 3e6bdf71be0..0b551ba9c4b 100644 --- a/tests/cupy_tests/math_tests/test_sumprod.py +++ b/tests/cupy_tests/math_tests/test_sumprod.py @@ -733,7 +733,7 @@ def test_ndarray_cumprod_2dim_with_axis(self, xp, dtype): return a.cumprod(axis=1) @testing.slow - @pytest.mark.xfail(runtime.is_hip, reason='Workload size is bigger than what ROCm/CUDA supports') + @pytest.mark.xfail(runtime.is_hip, reason='Workload size is bigger than what ROCm/CUDA supports') def test_cumprod_huge_array(self): size = 2 ** 32 # Free huge memory for slow test diff --git a/tests/run_tests_rocm.py b/tests/run_tests_rocm.py index c9455df26dc..d956f5e9bdc 100644 --- a/tests/run_tests_rocm.py +++ b/tests/run_tests_rocm.py @@ -73,6 +73,7 @@ 'install_tests', ] + def parse_test_log_and_get_summary(test_name): fs = open("/root/.cache/cupy-tests/cupy_test.log", 'r') lines = fs.readlines() @@ -90,6 +91,7 @@ def parse_test_log_and_get_summary(test_name): test_data = test_name + "|" + count + "|" + summary return test_data + def run_all_tests(): initial_cmd = 'CUPY_TEST_GPU_LIMIT=4 CUPY_INSTALL_USE_HIP=1 pytest -vvv -k "not compile_cuda and not fft_allocate" -m "not slow" ' os.system("mkdir -p ~/.cache/cupy-tests") @@ -97,38 +99,44 @@ def run_all_tests(): for test_suite in TEST_SUITES: if test_suite == "cupy_tests": for cupy_test in CUPY_TESTS: - cmd = initial_cmd + TEST_ROOT + "/cupy_tests/" + cupy_test + " | tee ~/.cache/cupy-tests/cupy_test.log" - print ("Running : " + cmd) + cmd = initial_cmd + TEST_ROOT + "/cupy_tests/" + \ + cupy_test + " | tee ~/.cache/cupy-tests/cupy_test.log" + print("Running : " + cmd) os.system(cmd) test_name = "tests/cupy_tests/" + cupy_test test_summary.append(parse_test_log_and_get_summary(test_name)) elif test_suite == "cupyx_tests": for cupyx_test in CUPYX_TESTS: - cmd = initial_cmd + TEST_ROOT + "/cupyx_tests/" + cupyx_test + " | tee ~/.cache/cupy-tests/cupy_test.log" - print ("Running : " + cmd) + cmd = initial_cmd + TEST_ROOT + "/cupyx_tests/" + \ + cupyx_test + " | tee ~/.cache/cupy-tests/cupy_test.log" + print("Running : " + cmd) os.system(cmd) test_name = "tests/cupyx_tests/" + cupyx_test test_summary.append(parse_test_log_and_get_summary(test_name)) else: - cmd = initial_cmd + TEST_ROOT + "/" + test_suite + " | tee ~/.cache/cupy-tests/cupy_test.log" - print (cmd) + cmd = initial_cmd + TEST_ROOT + "/" + test_suite + \ + " | tee ~/.cache/cupy-tests/cupy_test.log" + print(cmd) os.system(cmd) test_name = "tests/" + test_suite test_summary.append(parse_test_log_and_get_summary(test_name)) return test_summary + def main(): all_tests = args.all_tests if all_tests: test_summary = run_all_tests() - print ("---------------------- TEST SUMMARY ------------------") + print("---------------------- TEST SUMMARY ------------------") for j in range(len(test_summary)): - print (test_summary[j]) + print(test_summary[j]) + if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument("--all-tests", action="store_true", default=True, required=False, help="Run all tests"); + parser.add_argument("--all-tests", action="store_true", + default=True, required=False, help="Run all tests") args = parser.parse_args() main() From 2b40f532441df40bf73bbc75bb2ed287767414fd Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Fri, 14 Apr 2023 02:53:05 +0000 Subject: [PATCH 22/57] fix more lint errors --- tests/run_tests_rocm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/run_tests_rocm.py b/tests/run_tests_rocm.py index d956f5e9bdc..cae5cbc1afb 100644 --- a/tests/run_tests_rocm.py +++ b/tests/run_tests_rocm.py @@ -1,5 +1,4 @@ import os -import sys import argparse import re @@ -93,7 +92,8 @@ def parse_test_log_and_get_summary(test_name): def run_all_tests(): - initial_cmd = 'CUPY_TEST_GPU_LIMIT=4 CUPY_INSTALL_USE_HIP=1 pytest -vvv -k "not compile_cuda and not fft_allocate" -m "not slow" ' + initial_cmd = 'CUPY_TEST_GPU_LIMIT=4 CUPY_INSTALL_USE_HIP=1 ' + \ + 'pytest -vvv -k "not compile_cuda and not fft_allocate" -m "not slow" ' os.system("mkdir -p ~/.cache/cupy-tests") test_summary = [] for test_suite in TEST_SUITES: @@ -116,7 +116,7 @@ def run_all_tests(): else: cmd = initial_cmd + TEST_ROOT + "/" + test_suite + \ " | tee ~/.cache/cupy-tests/cupy_test.log" - print(cmd) + print("Running : " + cmd) os.system(cmd) test_name = "tests/" + test_suite test_summary.append(parse_test_log_and_get_summary(test_name)) From 503729dfc3fe60dce56e0602a9a1cac0196b6a4e Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Fri, 14 Apr 2023 08:47:30 +0000 Subject: [PATCH 23/57] add version conditions --- cupy/cuda/cupy_cufft.h | 5 +++++ cupy/random/cupy_distributions.cuh | 4 +++- cupy_backends/hip/cupy_hip_common.h | 5 +++++ cupy_backends/hip/cupy_hipblas.h | 4 ++++ cupy_backends/hip/cupy_hipsparse.h | 4 ++++ cupy_backends/hip/cupy_rccl.h | 6 +++++- install/cupy_builder/_features.py | 12 +++++++----- 7 files changed, 33 insertions(+), 7 deletions(-) diff --git a/cupy/cuda/cupy_cufft.h b/cupy/cuda/cupy_cufft.h index 9d1ece6978d..cbe17321b40 100644 --- a/cupy/cuda/cupy_cufft.h +++ b/cupy/cuda/cupy_cufft.h @@ -12,7 +12,12 @@ #include #elif defined(CUPY_USE_HIP) +#include //for HIP_VERSION +#if HIP_VERSION >= 505 #include +#else +#include +#endif extern "C" { diff --git a/cupy/random/cupy_distributions.cuh b/cupy/random/cupy_distributions.cuh index 48b5e5c2dbe..eb2b442f199 100644 --- a/cupy/random/cupy_distributions.cuh +++ b/cupy/random/cupy_distributions.cuh @@ -34,8 +34,10 @@ struct rk_binomial_state { // When compiling cython extensions with hip 4.0 // gcc will be used, but the hiprand_kernel can only be compiled with llvm // so we need to explicitly declare stubs for the functions -#if HIP_VERSION > 400 +#if HIP_VERSION >= 505 #include +#elif HIP_VERSION > 400 +#include #else #include typedef struct {} hiprandState; diff --git a/cupy_backends/hip/cupy_hip_common.h b/cupy_backends/hip/cupy_hip_common.h index 672406ce5c6..8699cb9e391 100644 --- a/cupy_backends/hip/cupy_hip_common.h +++ b/cupy_backends/hip/cupy_hip_common.h @@ -2,8 +2,13 @@ #define INCLUDE_GUARD_HIP_CUPY_COMMON_H #include +#if HIP_VERSION >= 50530600 #include #include +#else +#include +#include +#endif #define CUDA_VERSION 0 diff --git a/cupy_backends/hip/cupy_hipblas.h b/cupy_backends/hip/cupy_hipblas.h index fe166d7e3cd..731bf2e1cb5 100644 --- a/cupy_backends/hip/cupy_hipblas.h +++ b/cupy_backends/hip/cupy_hipblas.h @@ -2,7 +2,11 @@ #define INCLUDE_GUARD_HIP_CUPY_HIPBLAS_H #include "cupy_hip_common.h" +#if HIP_VERSION >= 505 #include +#else +#include +#endif #include // for HIP_VERSION #include // for gcc 10 diff --git a/cupy_backends/hip/cupy_hipsparse.h b/cupy_backends/hip/cupy_hipsparse.h index 271c8bae895..e4bcf1ead93 100644 --- a/cupy_backends/hip/cupy_hipsparse.h +++ b/cupy_backends/hip/cupy_hipsparse.h @@ -2,7 +2,11 @@ #ifndef INCLUDE_GUARD_HIP_CUPY_HIPSPARSE_H #define INCLUDE_GUARD_HIP_CUPY_HIPSPARSE_H +#if HIP_VERSION >= 505 #include +#else +#include +#endif #include // for HIP_VERSION #include // for hipDataType #include // for gcc 10.0 diff --git a/cupy_backends/hip/cupy_rccl.h b/cupy_backends/hip/cupy_rccl.h index 06af0461de9..a1bda03fcfc 100644 --- a/cupy_backends/hip/cupy_rccl.h +++ b/cupy_backends/hip/cupy_rccl.h @@ -1,7 +1,11 @@ #ifndef INCLUDE_GUARD_HIP_CUPY_RCCL_H #define INCLUDE_GUARD_HIP_CUPY_RCCL_H - +#include +#if HIP_VERSION >= 505 #include +#else +#include +#endif typedef hipStream_t cudaStream_t; #endif diff --git a/install/cupy_builder/_features.py b/install/cupy_builder/_features.py index 6031c8c7cfa..259977e7210 100644 --- a/install/cupy_builder/_features.py +++ b/install/cupy_builder/_features.py @@ -3,6 +3,7 @@ import cupy_builder.install_build as build import cupy_builder.install_utils as utils from cupy_builder import Context +from cupy.cuda.runtime import runtimeGetVersion class Feature: @@ -147,6 +148,7 @@ def get_features(ctx: Context) -> Dict[str, Feature]: # the HIP stubs (hip/cupy_*.h) would cause many symbols # to leak into all these modules even if unused. It's easier for all of # them to link to the same set of shared libraries. + runtime_hip_version = runtimeGetVersion() HIP_cuda_nvtx_cusolver = { # TODO(leofang): call this "rocm" or "hip" to avoid confusion? 'name': 'cuda', @@ -159,12 +161,12 @@ def get_features(ctx: Context) -> Dict[str, Feature]: 'include': [ 'hip/hip_runtime_api.h', 'hip/hiprtc.h', - 'hipblas/hipblas.h', + 'hipblas/hipblas.h' if runtime_hip_version >= 50530600 else 'hipblas.h', 'hiprand/hiprand.h', - 'hipsparse/hipsparse.h', - 'hipfft/hipfft.h', + 'hipsparse/hipsparse.h' if runtime_hip_version >= 50530600 else 'hipsparse.h', + 'hipfft/hipfft.h' if runtime_hip_version >= 50530600 else 'hipfft.h', 'roctx.h', - 'rocsolver/rocsolver.h', + 'rocsolver/rocsolver.h' if runtime_hip_version >= 50530600 else 'rocsolver.h', ], 'libraries': [ 'amdhip64', # was hiprtc and hip_hcc before ROCm 3.8.0 @@ -358,7 +360,7 @@ def get_features(ctx: Context) -> Dict[str, Feature]: 'cupy_backends.cuda.libs.nccl', ], 'include': [ - 'rccl/rccl.h', + 'rccl/rccl.h' if runtime_hip_version >= 50530600 else 'rccl.h', ], 'libraries': [ 'rccl', From a11a0ce0f48501ef4a9e16912b1bfb982e51f574 Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Fri, 14 Apr 2023 13:07:21 +0000 Subject: [PATCH 24/57] retrieve rocm version before installation --- install/cupy_builder/_features.py | 13 ++++++------- install/cupy_builder/install_utils.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/install/cupy_builder/_features.py b/install/cupy_builder/_features.py index 259977e7210..52c7b91b019 100644 --- a/install/cupy_builder/_features.py +++ b/install/cupy_builder/_features.py @@ -3,7 +3,6 @@ import cupy_builder.install_build as build import cupy_builder.install_utils as utils from cupy_builder import Context -from cupy.cuda.runtime import runtimeGetVersion class Feature: @@ -148,7 +147,7 @@ def get_features(ctx: Context) -> Dict[str, Feature]: # the HIP stubs (hip/cupy_*.h) would cause many symbols # to leak into all these modules even if unused. It's easier for all of # them to link to the same set of shared libraries. - runtime_hip_version = runtimeGetVersion() + rocm_version = utils.get_rocm_version() HIP_cuda_nvtx_cusolver = { # TODO(leofang): call this "rocm" or "hip" to avoid confusion? 'name': 'cuda', @@ -161,12 +160,12 @@ def get_features(ctx: Context) -> Dict[str, Feature]: 'include': [ 'hip/hip_runtime_api.h', 'hip/hiprtc.h', - 'hipblas/hipblas.h' if runtime_hip_version >= 50530600 else 'hipblas.h', + 'hipblas/hipblas.h' if rocm_version >= 560 else 'hipblas.h', 'hiprand/hiprand.h', - 'hipsparse/hipsparse.h' if runtime_hip_version >= 50530600 else 'hipsparse.h', - 'hipfft/hipfft.h' if runtime_hip_version >= 50530600 else 'hipfft.h', + 'hipsparse/hipsparse.h' if rocm_version >= 560 else 'hipsparse.h', + 'hipfft/hipfft.h' if rocm_version >= 560 else 'hipfft.h', 'roctx.h', - 'rocsolver/rocsolver.h' if runtime_hip_version >= 50530600 else 'rocsolver.h', + 'rocsolver/rocsolver.h' if rocm_version >= 560 else 'rocsolver.h', ], 'libraries': [ 'amdhip64', # was hiprtc and hip_hcc before ROCm 3.8.0 @@ -360,7 +359,7 @@ def get_features(ctx: Context) -> Dict[str, Feature]: 'cupy_backends.cuda.libs.nccl', ], 'include': [ - 'rccl/rccl.h' if runtime_hip_version >= 50530600 else 'rccl.h', + 'rccl/rccl.h' if rocm_version >= 560 else 'rccl.h', ], 'libraries': [ 'rccl', diff --git a/install/cupy_builder/install_utils.py b/install/cupy_builder/install_utils.py index b7dd550b6bc..3b27864c793 100644 --- a/install/cupy_builder/install_utils.py +++ b/install/cupy_builder/install_utils.py @@ -20,3 +20,14 @@ def search_on_path(filenames: List[str]) -> Optional[str]: if os.path.exists(full): return os.path.abspath(full) return None + + +def get_rocm_version() -> int: + rocm_version = -1 + if os.getenv("ROCM_HOME"): + rocm_home = os.getenv("ROCM_HOME") + version_path = os.path.join(rocm_home, ".info") + version_path = os.path.join(version_path, "version") + rocm_version = int( + open(version_path).read().split("-")[0].replace(".", "")) + return rocm_version From c486e0e8e9662e4748a156eea9a9595688411d30 Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Fri, 14 Apr 2023 13:18:10 +0000 Subject: [PATCH 25/57] fix lint issues --- tests/cupy_tests/math_tests/test_sumprod.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/cupy_tests/math_tests/test_sumprod.py b/tests/cupy_tests/math_tests/test_sumprod.py index 0b551ba9c4b..ef0205c3381 100644 --- a/tests/cupy_tests/math_tests/test_sumprod.py +++ b/tests/cupy_tests/math_tests/test_sumprod.py @@ -733,7 +733,9 @@ def test_ndarray_cumprod_2dim_with_axis(self, xp, dtype): return a.cumprod(axis=1) @testing.slow - @pytest.mark.xfail(runtime.is_hip, reason='Workload size is bigger than what ROCm/CUDA supports') + @pytest.mark.xfail( + runtime.is_hip, + reason='Workload size is bigger than what ROCm/CUDA supports') def test_cumprod_huge_array(self): size = 2 ** 32 # Free huge memory for slow test From 9fd4cdb3384341ebe2fe943cb7ec989ba2670b26 Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Fri, 14 Apr 2023 13:39:38 +0000 Subject: [PATCH 26/57] version rccl.h in install build --- install/cupy_builder/install_build.py | 5 +++++ install/cupy_builder/install_utils.py | 5 ++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/install/cupy_builder/install_build.py b/install/cupy_builder/install_build.py index 51b640bad72..a1fc667c403 100644 --- a/install/cupy_builder/install_build.py +++ b/install/cupy_builder/install_build.py @@ -448,7 +448,12 @@ def check_nccl_version(compiler, settings): #ifndef CUPY_USE_HIP #include #else + #include + #if HIP_VERSION >= 50530600 #include + #else + #include + #endif #endif #include #ifdef NCCL_MAJOR diff --git a/install/cupy_builder/install_utils.py b/install/cupy_builder/install_utils.py index 3b27864c793..caf39c574f5 100644 --- a/install/cupy_builder/install_utils.py +++ b/install/cupy_builder/install_utils.py @@ -25,9 +25,8 @@ def search_on_path(filenames: List[str]) -> Optional[str]: def get_rocm_version() -> int: rocm_version = -1 if os.getenv("ROCM_HOME"): - rocm_home = os.getenv("ROCM_HOME") - version_path = os.path.join(rocm_home, ".info") - version_path = os.path.join(version_path, "version") + rocm_home = str(os.getenv("ROCM_HOME")) + version_path = os.path.join(rocm_home, ".info", "version") rocm_version = int( open(version_path).read().split("-")[0].replace(".", "")) return rocm_version From e77a6a8be13aa24a468b19b79c0ed1d2aa2a344c Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Fri, 14 Apr 2023 14:26:32 +0000 Subject: [PATCH 27/57] fixes for backward compatibility --- cupy/cuda/cupy_cufft.h | 2 +- cupy/random/cupy_distributions.cuh | 2 +- cupy_backends/hip/cupy_hipblas.h | 2 +- cupy_backends/hip/cupy_hipsparse.h | 2 +- cupy_backends/hip/cupy_rccl.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cupy/cuda/cupy_cufft.h b/cupy/cuda/cupy_cufft.h index cbe17321b40..b893ce5c7db 100644 --- a/cupy/cuda/cupy_cufft.h +++ b/cupy/cuda/cupy_cufft.h @@ -13,7 +13,7 @@ #elif defined(CUPY_USE_HIP) #include //for HIP_VERSION -#if HIP_VERSION >= 505 +#if HIP_VERSION >= 50530600 #include #else #include diff --git a/cupy/random/cupy_distributions.cuh b/cupy/random/cupy_distributions.cuh index eb2b442f199..72ee0381e5e 100644 --- a/cupy/random/cupy_distributions.cuh +++ b/cupy/random/cupy_distributions.cuh @@ -34,7 +34,7 @@ struct rk_binomial_state { // When compiling cython extensions with hip 4.0 // gcc will be used, but the hiprand_kernel can only be compiled with llvm // so we need to explicitly declare stubs for the functions -#if HIP_VERSION >= 505 +#if HIP_VERSION >= 50530600 #include #elif HIP_VERSION > 400 #include diff --git a/cupy_backends/hip/cupy_hipblas.h b/cupy_backends/hip/cupy_hipblas.h index 731bf2e1cb5..6a6e139524a 100644 --- a/cupy_backends/hip/cupy_hipblas.h +++ b/cupy_backends/hip/cupy_hipblas.h @@ -2,7 +2,7 @@ #define INCLUDE_GUARD_HIP_CUPY_HIPBLAS_H #include "cupy_hip_common.h" -#if HIP_VERSION >= 505 +#if HIP_VERSION >= 50530600 #include #else #include diff --git a/cupy_backends/hip/cupy_hipsparse.h b/cupy_backends/hip/cupy_hipsparse.h index e4bcf1ead93..d8bbd620528 100644 --- a/cupy_backends/hip/cupy_hipsparse.h +++ b/cupy_backends/hip/cupy_hipsparse.h @@ -2,7 +2,7 @@ #ifndef INCLUDE_GUARD_HIP_CUPY_HIPSPARSE_H #define INCLUDE_GUARD_HIP_CUPY_HIPSPARSE_H -#if HIP_VERSION >= 505 +#if HIP_VERSION >= 50530600 #include #else #include diff --git a/cupy_backends/hip/cupy_rccl.h b/cupy_backends/hip/cupy_rccl.h index a1bda03fcfc..428ade01321 100644 --- a/cupy_backends/hip/cupy_rccl.h +++ b/cupy_backends/hip/cupy_rccl.h @@ -1,7 +1,7 @@ #ifndef INCLUDE_GUARD_HIP_CUPY_RCCL_H #define INCLUDE_GUARD_HIP_CUPY_RCCL_H #include -#if HIP_VERSION >= 505 +#if HIP_VERSION >= 50530600 #include #else #include From 3ba93bcea3a7c59f11da042f4139f20b0bf2f06a Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Mon, 17 Apr 2023 06:53:39 +0000 Subject: [PATCH 28/57] unskip test_maxit_None for rocm 5.6 --- tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py b/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py index 3cd8bcb5bd8..755820ec67c 100644 --- a/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py +++ b/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py @@ -1198,7 +1198,9 @@ def test_random_initial_float32(self, xp, sp): return eigvals, _eigen_vec_transform(vecs, xp) @pytest.mark.xfail( - runtime.is_hip and driver.get_build_version() >= 5_00_00000, + runtime.is_hip and + (driver.get_build_version() >= 5_00_00000 and + driver.get_build_version() < 50530600), reason='ROCm 5.0+ may have a bug') def test_maxit_None(self): """Check lobpcg if maxit=None runs 20 iterations (the default) From 70ca71f81ebbb2e1038ead0f60aa9d4fa0359e43 Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Wed, 19 Apr 2023 16:32:51 +0000 Subject: [PATCH 29/57] version for test_methods --- tests/cupy_tests/random_tests/test_generator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/cupy_tests/random_tests/test_generator.py b/tests/cupy_tests/random_tests/test_generator.py index 99574110cf4..d6ebf009ca4 100644 --- a/tests/cupy_tests/random_tests/test_generator.py +++ b/tests/cupy_tests/random_tests/test_generator.py @@ -14,6 +14,7 @@ from cupy.testing import _attr from cupy.testing import _condition from cupy.testing import _hypothesis +from cupy.cuda import driver from cupy_tests.random_tests import common_distributions @@ -143,7 +144,8 @@ def test_methods(self): for method in methods: if (runtime.is_hip and - method == cupy.cuda.curand.CURAND_RNG_PSEUDO_MT19937): + method == cupy.cuda.curand.CURAND_RNG_PSEUDO_MT19937 + and driver.get_build_version() < 50530600): # hipRAND fails for MT19937 with the status code 1000, # HIPRAND_STATUS_NOT_IMPLEMENTED. We use `pytest.raises` here # so that we will be able to find it once hipRAND implement From 3ccecd37241c248e17c287ca2e12abfe8474b46e Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Fri, 12 May 2023 14:44:44 +0000 Subject: [PATCH 30/57] update the version number --- tests/cupy_tests/random_tests/test_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cupy_tests/random_tests/test_generator.py b/tests/cupy_tests/random_tests/test_generator.py index d6ebf009ca4..1512a94acf2 100644 --- a/tests/cupy_tests/random_tests/test_generator.py +++ b/tests/cupy_tests/random_tests/test_generator.py @@ -145,7 +145,7 @@ def test_methods(self): for method in methods: if (runtime.is_hip and method == cupy.cuda.curand.CURAND_RNG_PSEUDO_MT19937 - and driver.get_build_version() < 50530600): + and driver.get_build_version() < 50530201): # hipRAND fails for MT19937 with the status code 1000, # HIPRAND_STATUS_NOT_IMPLEMENTED. We use `pytest.raises` here # so that we will be able to find it once hipRAND implement From 3331cb722cc9420be16ed8c4bab4a284b0d3cfb1 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Mon, 15 May 2023 23:35:32 +0000 Subject: [PATCH 31/57] Update version value for ROCm5.5 --- tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py b/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py index 755820ec67c..b5b2839e02d 100644 --- a/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py +++ b/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py @@ -1200,7 +1200,7 @@ def test_random_initial_float32(self, xp, sp): @pytest.mark.xfail( runtime.is_hip and (driver.get_build_version() >= 5_00_00000 and - driver.get_build_version() < 50530600), + driver.get_build_version() < 50530201), reason='ROCm 5.0+ may have a bug') def test_maxit_None(self): """Check lobpcg if maxit=None runs 20 iterations (the default) From 970e28050a1da10a4183ed96d3708c81ebdddbf1 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Thu, 8 Jun 2023 22:32:28 +0000 Subject: [PATCH 32/57] Remove xfail and change test pass condition --- tests/cupy_tests/random_tests/test_generator.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/cupy_tests/random_tests/test_generator.py b/tests/cupy_tests/random_tests/test_generator.py index 1512a94acf2..ac55de3e552 100644 --- a/tests/cupy_tests/random_tests/test_generator.py +++ b/tests/cupy_tests/random_tests/test_generator.py @@ -855,8 +855,7 @@ class TestChoiceChi(RandomGeneratorTestCase): target_method = 'choice' - @_condition.repeat(3, 10) - @pytest.mark.xfail(runtime.is_hip, reason='ROCm/HIP may have a bug') + @_condition.repeat_with_success_at_least(10, 9) def test_goodness_of_fit(self): trial = 100 vals = self.generate_many(3, 1, True, [0.3, 0.3, 0.4], _count=trial) From be0beefa7c03c6e8af3a00b212007bf9835da4c2 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Wed, 14 Jun 2023 19:29:22 +0000 Subject: [PATCH 33/57] Fixed failing unit tests --- tests/cupy_tests/random_tests/test_generator.py | 4 +++- tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/cupy_tests/random_tests/test_generator.py b/tests/cupy_tests/random_tests/test_generator.py index 192ced3d594..8923ab63b2c 100644 --- a/tests/cupy_tests/random_tests/test_generator.py +++ b/tests/cupy_tests/random_tests/test_generator.py @@ -13,6 +13,7 @@ from cupy import testing from cupy.testing import _condition from cupy.testing import _hypothesis +from cupy.cuda import driver from cupy_tests.random_tests import common_distributions @@ -141,7 +142,8 @@ def test_methods(self): for method in methods: if (runtime.is_hip and - method == cupy.cuda.curand.CURAND_RNG_PSEUDO_MT19937): + method == cupy.cuda.curand.CURAND_RNG_PSEUDO_MT19937 + and driver.get_build_version() < 50530201): # hipRAND fails for MT19937 with the status code 1000, # HIPRAND_STATUS_NOT_IMPLEMENTED. We use `pytest.raises` here # so that we will be able to find it once hipRAND implement diff --git a/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py b/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py index 2fb1c59b931..01cf4693826 100644 --- a/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py +++ b/tests/cupyx_tests/scipy_tests/sparse_tests/test_linalg.py @@ -1191,7 +1191,9 @@ def test_random_initial_float32(self, xp, sp): return eigvals, _eigen_vec_transform(vecs, xp) @pytest.mark.xfail( - runtime.is_hip and driver.get_build_version() >= 5_00_00000, + runtime.is_hip and + driver.get_build_version() >= 5_00_00000 and + driver.get_build_version() < 50530201, reason='ROCm 5.0+ may have a bug') def test_maxit_None(self): """Check lobpcg if maxit=None runs 20 iterations (the default) From 7a18db6c47acd4153f950dde6227735dc7af5113 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Wed, 14 Jun 2023 23:58:30 +0000 Subject: [PATCH 34/57] Remove gpu decorator similar to upstream --- .../scipy_tests/ndimage_tests/test_interpolation.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_interpolation.py b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_interpolation.py index 2e711aff9d4..6c4e626df1e 100644 --- a/tests/cupyx_tests/scipy_tests/ndimage_tests/test_interpolation.py +++ b/tests/cupyx_tests/scipy_tests/ndimage_tests/test_interpolation.py @@ -680,7 +680,6 @@ def test_shift_int(self, xp, scp, dtype): 'mode': ['constant', 'nearest'], 'cval': [cupy.nan, cupy.inf, -cupy.inf], })) -@testing.gpu @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestInterpolationInvalidCval: @@ -850,7 +849,6 @@ def test_zoom_int(self, xp, scp, dtype): 'zoom': [(1, 1), (3, 5), (8, 2), (8, 8)], 'mode': ['nearest', 'reflect', 'mirror', 'grid-wrap', 'grid-constant'], })) -@testing.gpu @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestZoomOrder0IntegerGrid(): @@ -875,7 +873,6 @@ def test_zoom_grid_by_int_order0(self): 'order': [0, 1, 2, 3, 4, 5], 'grid_mode': [False, True], })) -@testing.gpu @pytest.mark.skipif(runtime.is_hip, reason='ROCm/HIP may have a bug') class TestZoomOutputSize1(): From 29a0d95377a4fd62c3aef2c9e97ba2b63a059a4b Mon Sep 17 00:00:00 2001 From: lcskrishna Date: Wed, 26 Jul 2023 02:21:22 +0000 Subject: [PATCH 35/57] dummy comment --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index b9005a4417a..e0e8fb58f29 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,7 @@ if not cupy_builder.preflight_check(ctx): sys.exit(1) +## testing CI # TODO(kmaehashi): migrate to pyproject.toml (see #4727, #4619) setup_requires = [ From 83c0f99c1fbc8ad38a89d3aac3512591bcd21d7d Mon Sep 17 00:00:00 2001 From: Kenichi Maehashi Date: Tue, 18 Jul 2023 02:03:12 +0000 Subject: [PATCH 36/57] remove explicit cython installation --- .github/workflows/pretest-rocm-test.sh | 1 - .github/workflows/pretest.yml | 1 - setup.py | 1 - 3 files changed, 3 deletions(-) diff --git a/.github/workflows/pretest-rocm-test.sh b/.github/workflows/pretest-rocm-test.sh index acb130500ba..d661928d1aa 100644 --- a/.github/workflows/pretest-rocm-test.sh +++ b/.github/workflows/pretest-rocm-test.sh @@ -9,7 +9,6 @@ DEBIAN_FRONTEND=noninteractive apt-get -y install python3-pip python3-dev hipconfig pip3 install -U pip wheel -pip3 install cython export ROCM_HOME="/opt/rocm" export HCC_AMDGPU_TARGET="gfx900" diff --git a/.github/workflows/pretest.yml b/.github/workflows/pretest.yml index 13a91f999cc..ca5339579a9 100644 --- a/.github/workflows/pretest.yml +++ b/.github/workflows/pretest.yml @@ -67,7 +67,6 @@ jobs: - name: Build run: | pip install -U pip wheel - pip install cython READTHEDOCS=True pip install -v -e . ccache --max-size 0.5Gi --cleanup --show-stats diff --git a/setup.py b/setup.py index e0e8fb58f29..b9005a4417a 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,6 @@ if not cupy_builder.preflight_check(ctx): sys.exit(1) -## testing CI # TODO(kmaehashi): migrate to pyproject.toml (see #4727, #4619) setup_requires = [ From c0f81c704e0bc218b4212730687ba413a1cadb3d Mon Sep 17 00:00:00 2001 From: emcastillo Date: Tue, 18 Jul 2023 16:25:43 +0900 Subject: [PATCH 37/57] Merge pull request #7723 from leofang/cuqnt_update Minor updates for cuQuantum/cuTensorNet support --- cupy/linalg/_einsum_cutn.py | 32 +++++++++++++++++++++++++++++--- docs/source/conf.py | 2 +- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/cupy/linalg/_einsum_cutn.py b/cupy/linalg/_einsum_cutn.py index 6bce73f33a9..f710abc43b2 100644 --- a/cupy/linalg/_einsum_cutn.py +++ b/cupy/linalg/_einsum_cutn.py @@ -1,13 +1,31 @@ +import threading import warnings try: + import cuquantum from cuquantum import cutensornet - cutn_handle_cache = {} # type: ignore # noqa except ImportError: - cutensornet = None + cuquantum = cutensornet = None import cupy +from cupy import _util from cupy._core import _accelerator +from cupy.cuda.device import Handle + + +_tls = threading.local() + + +@_util.memoize() +def _is_cuqnt_22_11_or_higher(): + ver = [int(i) for i in cuquantum.__version__.split('.')] + if (ver[0] > 22) or (ver[0] == 22 and ver[1] >= 11): + return True + return False + + +def _is_nonblocking_supported(): + return _is_cuqnt_22_11_or_higher() def _get_einsum_operands(args): @@ -88,11 +106,19 @@ def _try_use_cutensornet(*args, **kwargs): # prepare cutn inputs device = cupy.cuda.runtime.getDevice() + if not hasattr(_tls, "cutn_handle_cache"): + cutn_handle_cache = _tls.cutn_handle_cache = {} + else: + cutn_handle_cache = _tls.cutn_handle_cache handle = cutn_handle_cache.get(device) if handle is None: handle = cutensornet.create() - cutn_handle_cache[device] = handle + cutn_handle_cache[device] = Handle(handle, cutensornet.destroy) + else: + handle = handle.handle cutn_options = {'device_id': device, 'handle': handle} + if _is_nonblocking_supported(): + cutn_options['blocking'] = "auto" # TODO(leofang): support all valid combinations: # - path from user, contract with cutn (done) diff --git a/docs/source/conf.py b/docs/source/conf.py index 0ab3dd19045..6d9ad203676 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -368,7 +368,7 @@ 'numpy': ('https://numpy.org/doc/stable/', None), 'scipy': ('https://docs.scipy.org/doc/scipy/', None), 'numba': ('https://numba.readthedocs.io/en/stable', None), - 'cuquantum': ('https://docs.nvidia.com/cuda/cuquantum/', None), + 'cuquantum': ('https://docs.nvidia.com/cuda/cuquantum/latest', None), # blocked by data-apis/array-api#428 #'array-api': ('https://data-apis.org/array-api/2021.12/', None), } From cebf3fe3fd78f26f887f5f5a3e2b894d0ecd7c56 Mon Sep 17 00:00:00 2001 From: Prasanth Nunna Date: Mon, 28 Aug 2023 18:58:22 +0000 Subject: [PATCH 38/57] Add spaces to fix precommit failures --- cupy/cuda/texture.pyx | 4 ++-- tests/cupy_tests/random_tests/common_distributions.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cupy/cuda/texture.pyx b/cupy/cuda/texture.pyx index 78c9c4b10fe..d66cbb1ad82 100644 --- a/cupy/cuda/texture.pyx +++ b/cupy/cuda/texture.pyx @@ -6,8 +6,8 @@ import numpy from cupy._core.core cimport _ndarray_base from cupy._core.core cimport _internal_ascontiguousarray from cupy_backends.cuda.api cimport runtime -from cupy_backends.cuda.api.runtime cimport Array,\ - ChannelFormatDesc, ChannelFormatKind,\ +from cupy_backends.cuda.api.runtime cimport Array, \ + ChannelFormatDesc, ChannelFormatKind, \ Memcpy3DParms, MemoryKind, PitchedPtr, ResourceDesc, ResourceType, \ TextureAddressMode, TextureDesc, TextureFilterMode, TextureReadMode from cupy.cuda cimport stream as stream_module diff --git a/tests/cupy_tests/random_tests/common_distributions.py b/tests/cupy_tests/random_tests/common_distributions.py index 25a7ad8f0d7..48672fda701 100644 --- a/tests/cupy_tests/random_tests/common_distributions.py +++ b/tests/cupy_tests/random_tests/common_distributions.py @@ -159,9 +159,9 @@ def test_uniform_ks(self): {'a': 1.0, 'b': 3.0}, {'a': 3.0, 'b': 3.0}, {'a': 3.0, 'b': 1.0}, - {'a': [1.0, 3.0, 5.0, 6.0, 9.0], 'b':7.0}, + {'a': [1.0, 3.0, 5.0, 6.0, 9.0], 'b': 7.0}, {'a': 5.0, 'b': [1.0, 5.0, 8.0, 1.0, 3.0]}, - {'a': [8.0, 6.0, 2.0, 4.0, 7.0], 'b':[3.0, 1.0, 2.0, 8.0, 1.0]}] + {'a': [8.0, 6.0, 2.0, 4.0, 7.0], 'b': [3.0, 1.0, 2.0, 8.0, 1.0]}] class Beta: From 2e31c830e1324a2055455d802c9b668fef3c3c1f Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Tue, 28 Nov 2023 22:22:28 +0000 Subject: [PATCH 39/57] Fix runtime compile issues for ROCm6.0 --- cupy_backends/cuda/api/runtime.pyx | 7 ++++--- cupy_backends/hip/cupy_hip_runtime.h | 19 ++++++++++++++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/cupy_backends/cuda/api/runtime.pyx b/cupy_backends/cuda/api/runtime.pyx index 8faff9fca94..b0d84de9548 100644 --- a/cupy_backends/cuda/api/runtime.pyx +++ b/cupy_backends/cuda/api/runtime.pyx @@ -294,7 +294,6 @@ cpdef getDeviceProperties(int device): properties['clockInstructionRate'] = props.clockInstructionRate properties['maxSharedMemoryPerMultiProcessor'] = ( props.maxSharedMemoryPerMultiProcessor) - properties['gcnArch'] = props.gcnArch properties['hdpMemFlushCntl'] = (props.hdpMemFlushCntl) properties['hdpRegFlushCntl'] = (props.hdpRegFlushCntl) properties['memPitch'] = props.memPitch @@ -327,6 +326,8 @@ cpdef getDeviceProperties(int device): arch['has3dGrid'] = props.arch.has3dGrid arch['hasDynamicParallelism'] = props.arch.hasDynamicParallelism properties['arch'] = arch + IF 0 < CUPY_HIP_VERSION < 310: #gcnArchName used after ROCm 3.1+ + properties['gcnArch'] = props.gcnArch IF CUPY_HIP_VERSION >= 310: properties['gcnArchName'] = props.gcnArchName properties['asicRevision'] = props.asicRevision @@ -696,13 +697,13 @@ cpdef PointerAttributes pointerGetAttributes(intptr_t ptr): cdef _PointerAttributes attrs status = cudaPointerGetAttributes(&attrs, ptr) check_status(status) - IF CUPY_CUDA_VERSION > 0: + IF CUPY_CUDA_VERSION > 0 or CUPY_HIP_VERSION > 60000000: return PointerAttributes( attrs.device, attrs.devicePointer, attrs.hostPointer, attrs.type) - ELIF CUPY_HIP_VERSION > 0: + ELIF 0 < CUPY_HIP_VERSION < 60000000: return PointerAttributes( attrs.device, attrs.devicePointer, diff --git a/cupy_backends/hip/cupy_hip_runtime.h b/cupy_backends/hip/cupy_hip_runtime.h index 51a54323ebc..0d1eb208528 100644 --- a/cupy_backends/hip/cupy_hip_runtime.h +++ b/cupy_backends/hip/cupy_hip_runtime.h @@ -269,6 +269,21 @@ cudaError_t cudaMemPrefetchAsync(const void *devPtr, size_t count, cudaError_t cudaPointerGetAttributes(cudaPointerAttributes *attributes, const void* ptr) { cudaError_t status = hipPointerGetAttributes(attributes, ptr); +#if HIP_VERSION >= 60000000 + if (status == cudaSuccess) { + switch (attributes->type) { + case 0 /* hipMemoryTypeHost */: + attributes->type = (hipMemoryType)1; /* cudaMemoryTypeHost */ + return status; + case 1 /* hipMemoryTypeDevice */: + attributes->type = (hipMemoryType)2; /* cudaMemoryTypeDevice */ + return status; + default: + /* we don't care the rest of possibilities */ + return status; + } + } +#else if (status == cudaSuccess) { switch (attributes->memoryType) { case 0 /* hipMemoryTypeHost */: @@ -281,7 +296,9 @@ cudaError_t cudaPointerGetAttributes(cudaPointerAttributes *attributes, /* we don't care the rest of possibilities */ return status; } - } else { + } +#endif + else { return status; } } From 9a332306e01f869d7bd2505bb1b801f6bdc66b7c Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Tue, 28 Nov 2023 22:52:47 +0000 Subject: [PATCH 40/57] Fix conditionals for ROCm6.0 --- cupy_backends/cuda/api/_runtime_typedef.pxi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cupy_backends/cuda/api/_runtime_typedef.pxi b/cupy_backends/cuda/api/_runtime_typedef.pxi index 9560b8260cc..b365f3321cd 100644 --- a/cupy_backends/cuda/api/_runtime_typedef.pxi +++ b/cupy_backends/cuda/api/_runtime_typedef.pxi @@ -131,13 +131,13 @@ cdef extern from *: ctypedef struct _MemPoolProps 'cudaMemPoolProps': pass # for HIP & RTD - IF CUPY_CUDA_VERSION > 0: + IF CUPY_CUDA_VERSION > 0 or CUPY_HIP_VERSION > 60000000: ctypedef struct _PointerAttributes 'cudaPointerAttributes': int type int device void* devicePointer void* hostPointer - ELIF CUPY_HIP_VERSION > 0: + ELIF 0 < CUPY_HIP_VERSION < 60000000: ctypedef struct _PointerAttributes 'cudaPointerAttributes': int memoryType int device From 8543331f7d489a48596c48cd448a2a46e21f47ce Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Tue, 28 Nov 2023 23:22:39 +0000 Subject: [PATCH 41/57] fix flake8 cython issues --- cupy_backends/cuda/api/_runtime_typedef.pxi | 2 +- cupy_backends/cuda/api/runtime.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cupy_backends/cuda/api/_runtime_typedef.pxi b/cupy_backends/cuda/api/_runtime_typedef.pxi index b365f3321cd..a4a129e0087 100644 --- a/cupy_backends/cuda/api/_runtime_typedef.pxi +++ b/cupy_backends/cuda/api/_runtime_typedef.pxi @@ -131,7 +131,7 @@ cdef extern from *: ctypedef struct _MemPoolProps 'cudaMemPoolProps': pass # for HIP & RTD - IF CUPY_CUDA_VERSION > 0 or CUPY_HIP_VERSION > 60000000: + IF CUPY_CUDA_VERSION > 0 or CUPY_HIP_VERSION > 60000000: ctypedef struct _PointerAttributes 'cudaPointerAttributes': int type int device diff --git a/cupy_backends/cuda/api/runtime.pyx b/cupy_backends/cuda/api/runtime.pyx index b0d84de9548..1d7732e0174 100644 --- a/cupy_backends/cuda/api/runtime.pyx +++ b/cupy_backends/cuda/api/runtime.pyx @@ -326,7 +326,7 @@ cpdef getDeviceProperties(int device): arch['has3dGrid'] = props.arch.has3dGrid arch['hasDynamicParallelism'] = props.arch.hasDynamicParallelism properties['arch'] = arch - IF 0 < CUPY_HIP_VERSION < 310: #gcnArchName used after ROCm 3.1+ + IF 0 < CUPY_HIP_VERSION < 310: # gcnArchName used after ROCm 3.1+ properties['gcnArch'] = props.gcnArch IF CUPY_HIP_VERSION >= 310: properties['gcnArchName'] = props.gcnArchName From 61e60270bcd0ae8ad3befd081f2882e1957f68c4 Mon Sep 17 00:00:00 2001 From: Kenichi Maehashi Date: Tue, 18 Jul 2023 02:03:12 +0000 Subject: [PATCH 42/57] remove explicit cython installation --- .github/workflows/pretest-rocm-test.sh | 3 +-- .github/workflows/pretest.yml | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/pretest-rocm-test.sh b/.github/workflows/pretest-rocm-test.sh index acb130500ba..9da45c2dcef 100644 --- a/.github/workflows/pretest-rocm-test.sh +++ b/.github/workflows/pretest-rocm-test.sh @@ -8,8 +8,7 @@ DEBIAN_FRONTEND=noninteractive apt-get -y install python3-pip python3-dev hipconfig -pip3 install -U pip wheel -pip3 install cython +python3.9 -m pip install -U pip wheel export ROCM_HOME="/opt/rocm" export HCC_AMDGPU_TARGET="gfx900" diff --git a/.github/workflows/pretest.yml b/.github/workflows/pretest.yml index 13a91f999cc..ca5339579a9 100644 --- a/.github/workflows/pretest.yml +++ b/.github/workflows/pretest.yml @@ -67,7 +67,6 @@ jobs: - name: Build run: | pip install -U pip wheel - pip install cython READTHEDOCS=True pip install -v -e . ccache --max-size 0.5Gi --cleanup --show-stats From 09d53dd8e2620c40e68b8ab553c111faa00fb863 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Wed, 29 Nov 2023 04:24:00 +0000 Subject: [PATCH 43/57] Update CI --- .github/workflows/pretest-rocm-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pretest-rocm-test.sh b/.github/workflows/pretest-rocm-test.sh index 9da45c2dcef..d661928d1aa 100644 --- a/.github/workflows/pretest-rocm-test.sh +++ b/.github/workflows/pretest-rocm-test.sh @@ -8,7 +8,7 @@ DEBIAN_FRONTEND=noninteractive apt-get -y install python3-pip python3-dev hipconfig -python3.9 -m pip install -U pip wheel +pip3 install -U pip wheel export ROCM_HOME="/opt/rocm" export HCC_AMDGPU_TARGET="gfx900" From 44fb4e5f56d7a2a214f28428f0d0503e796398d0 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Tue, 23 Jan 2024 20:21:43 +0000 Subject: [PATCH 44/57] Inital bringup of hipGraph --- cupy/cuda/stream.pyx | 16 +++-------- cupy_backends/cuda/api/runtime.pyx | 34 ----------------------- cupy_backends/hip/cupy_hip_runtime.h | 4 +++ tests/cupy_tests/cuda_tests/test_graph.py | 2 -- 4 files changed, 8 insertions(+), 48 deletions(-) diff --git a/cupy/cuda/stream.pyx b/cupy/cuda/stream.pyx index 10e7a120c55..ed56c9f8dd6 100644 --- a/cupy/cuda/stream.pyx +++ b/cupy/cuda/stream.pyx @@ -366,8 +366,6 @@ class _BaseStream: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g793d7d4e474388ddfda531603dc34aa3 """ - if runtime._is_hip_environment: - raise RuntimeError('This function is not supported on HIP') if self.ptr == 0 or self.ptr == 1: raise RuntimeError('cannot capture on the default (legacy) stream') if mode is None: @@ -396,8 +394,6 @@ class _BaseStream: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1gf5a0efebc818054ceecd1e3e5e76d93e """ - if runtime._is_hip_environment: - raise RuntimeError('This function is not supported on HIP') cdef intptr_t g = runtime.streamEndCapture(self.ptr) return graph.Graph.from_stream(g) @@ -412,9 +408,6 @@ class _BaseStream: Programming Guide for detail. """ - # TODO(leofang): is it better to be a property? - if runtime._is_hip_environment: - raise RuntimeError('This function is not supported on HIP') try: return runtime.streamIsCapturing(self.ptr) except RuntimeError: # can be RuntimeError or CUDARuntimeError @@ -462,9 +455,9 @@ class Stream(_BaseStream): ptr = 0 device_id = -1 elif ptds: - if runtime._is_hip_environment: - raise ValueError('HIP does not support per-thread ' - 'default stream (ptds)') + #if runtime._is_hip_environment: + # raise ValueError('HIP does not support per-thread ' + # 'default stream (ptds)') ptr = runtime.streamPerThread device_id = -1 elif non_blocking: @@ -520,5 +513,4 @@ class ExternalStream(_BaseStream): Stream.null = Stream(null=True) -if not runtime._is_hip_environment: - Stream.ptds = Stream(ptds=True) +Stream.ptds = Stream(ptds=True) diff --git a/cupy_backends/cuda/api/runtime.pyx b/cupy_backends/cuda/api/runtime.pyx index 63b11edc8c5..a8f8bf56740 100644 --- a/cupy_backends/cuda/api/runtime.pyx +++ b/cupy_backends/cuda/api/runtime.pyx @@ -507,8 +507,6 @@ cpdef intptr_t mallocArray(intptr_t descPtr, size_t width, size_t height, cpdef intptr_t mallocAsync(size_t size, intptr_t stream) except? 0: cdef void* ptr - if _is_hip_environment: - raise RuntimeError('HIP does not support mallocAsync') if runtimeGetVersion() < 11020: raise RuntimeError('mallocAsync is supported since CUDA 11.2') with nogil: @@ -519,8 +517,6 @@ cpdef intptr_t mallocAsync(size_t size, intptr_t stream) except? 0: cpdef intptr_t mallocFromPoolAsync( size_t size, intptr_t pool, intptr_t stream) except? 0: cdef void* ptr - if _is_hip_environment: - raise RuntimeError('HIP does not support mallocFromPoolAsync') if runtimeGetVersion() < 11020: raise RuntimeError('mallocFromPoolAsync is supported since CUDA 11.2') with nogil: @@ -562,8 +558,6 @@ cpdef freeArray(intptr_t ptr): check_status(status) cpdef freeAsync(intptr_t ptr, intptr_t stream): - if _is_hip_environment: - raise RuntimeError('HIP does not support freeAsync') if runtimeGetVersion() < 11020: raise RuntimeError('freeAsync is supported since CUDA 11.2') with nogil: @@ -715,8 +709,6 @@ cpdef PointerAttributes pointerGetAttributes(intptr_t ptr): cpdef intptr_t deviceGetDefaultMemPool(int device) except? 0: '''Get the default mempool on the current device.''' - if _is_hip_environment: - raise RuntimeError('HIP does not support deviceGetDefaultMemPool') if runtimeGetVersion() < 11020: raise RuntimeError('deviceGetDefaultMemPool is supported since ' 'CUDA 11.2') @@ -728,8 +720,6 @@ cpdef intptr_t deviceGetDefaultMemPool(int device) except? 0: cpdef intptr_t deviceGetMemPool(int device) except? 0: '''Get the current mempool on the current device.''' - if _is_hip_environment: - raise RuntimeError('HIP does not support deviceGetMemPool') if runtimeGetVersion() < 11020: raise RuntimeError('deviceGetMemPool is supported since ' 'CUDA 11.2') @@ -741,8 +731,6 @@ cpdef intptr_t deviceGetMemPool(int device) except? 0: cpdef deviceSetMemPool(int device, intptr_t pool): '''Set the current mempool on the current device to pool.''' - if _is_hip_environment: - raise RuntimeError('HIP does not support deviceSetMemPool') if runtimeGetVersion() < 11020: raise RuntimeError('deviceSetMemPool is supported since ' 'CUDA 11.2') @@ -751,8 +739,6 @@ cpdef deviceSetMemPool(int device, intptr_t pool): check_status(status) cpdef intptr_t memPoolCreate(MemPoolProps props) except? 0: - if _is_hip_environment: - raise RuntimeError('HIP does not support memPoolCreate') if runtimeGetVersion() < 11020: raise RuntimeError('memPoolCreate is supported since CUDA 11.2') @@ -770,8 +756,6 @@ cpdef intptr_t memPoolCreate(MemPoolProps props) except? 0: return pool cpdef memPoolDestroy(intptr_t pool): - if _is_hip_environment: - raise RuntimeError('HIP does not support memPoolDestroy') if runtimeGetVersion() < 11020: raise RuntimeError('memPoolDestroy is supported since CUDA 11.2') with nogil: @@ -779,8 +763,6 @@ cpdef memPoolDestroy(intptr_t pool): check_status(status) cpdef memPoolTrimTo(intptr_t pool, size_t size): - if _is_hip_environment: - raise RuntimeError('HIP does not support memPoolTrimTo') if runtimeGetVersion() < 11020: raise RuntimeError('memPoolTrimTo is supported since CUDA 11.2') with nogil: @@ -788,8 +770,6 @@ cpdef memPoolTrimTo(intptr_t pool, size_t size): check_status(status) cpdef memPoolGetAttribute(intptr_t pool, int attr): - if _is_hip_environment: - raise RuntimeError('HIP does not support memPoolGetAttribute') if runtimeGetVersion() < 11020: raise RuntimeError('memPoolGetAttribute is supported since CUDA 11.2') cdef int val1 @@ -805,8 +785,6 @@ cpdef memPoolGetAttribute(intptr_t pool, int attr): return val1 if attr <= 0x3 else val2 cpdef memPoolSetAttribute(intptr_t pool, int attr, object value): - if _is_hip_environment: - raise RuntimeError('HIP does not support memPoolSetAttribute') if runtimeGetVersion() < 11020: raise RuntimeError('memPoolSetAttribute is supported since CUDA 11.2') cdef int val1 @@ -870,9 +848,6 @@ cdef _HostFnFunc(void* func_arg) with gil: cpdef streamAddCallback(intptr_t stream, callback, intptr_t arg, unsigned int flags=0): - if _is_hip_environment and stream == 0: - raise RuntimeError('HIP does not allow adding callbacks to the ' - 'default (null) stream') func_arg = (callback, arg) cpython.Py_INCREF(func_arg) with nogil: @@ -883,9 +858,6 @@ cpdef streamAddCallback(intptr_t stream, callback, intptr_t arg, cpdef launchHostFunc(intptr_t stream, callback, intptr_t arg): - if _is_hip_environment: - raise RuntimeError('This feature is not supported on HIP') - func_arg = (callback, arg) cpython.Py_INCREF(func_arg) with nogil: @@ -907,8 +879,6 @@ cpdef streamWaitEvent(intptr_t stream, intptr_t event, unsigned int flags=0): cpdef streamBeginCapture(intptr_t stream, int mode=streamCaptureModeRelaxed): - if _is_hip_environment: - raise RuntimeError('streamBeginCapture is not supported in ROCm') # TODO(leofang): check and raise if stream == 0? with nogil: status = cudaStreamBeginCapture(stream, @@ -919,8 +889,6 @@ cpdef streamBeginCapture(intptr_t stream, int mode=streamCaptureModeRelaxed): cpdef intptr_t streamEndCapture(intptr_t stream) except? 0: # TODO(leofang): check and raise if stream == 0? cdef Graph g - if _is_hip_environment: - raise RuntimeError('streamEndCapture is not supported in ROCm') with nogil: status = cudaStreamEndCapture(stream, &g) check_status(status) @@ -929,8 +897,6 @@ cpdef intptr_t streamEndCapture(intptr_t stream) except? 0: cpdef bint streamIsCapturing(intptr_t stream) except*: cdef StreamCaptureStatus s - if _is_hip_environment: - raise RuntimeError('streamIsCapturing is not supported in ROCm') with nogil: status = cudaStreamIsCapturing(stream, &s) check_status(status) # cudaErrorStreamCaptureImplicit could be raised here diff --git a/cupy_backends/hip/cupy_hip_runtime.h b/cupy_backends/hip/cupy_hip_runtime.h index 0d1eb208528..e6c3eec0f28 100644 --- a/cupy_backends/hip/cupy_hip_runtime.h +++ b/cupy_backends/hip/cupy_hip_runtime.h @@ -530,7 +530,11 @@ cudaError_t cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream) { } cudaError_t cudaGraphUpload(...) { +#if HIP_VERSION < 60000000 return hipErrorUnknown; +#else + return hipSuccess; +#endif } } // extern "C" diff --git a/tests/cupy_tests/cuda_tests/test_graph.py b/tests/cupy_tests/cuda_tests/test_graph.py index 6d70f85806b..56d5f2be56e 100644 --- a/tests/cupy_tests/cuda_tests/test_graph.py +++ b/tests/cupy_tests/cuda_tests/test_graph.py @@ -6,8 +6,6 @@ import cupyx -@pytest.mark.skipif(cuda.runtime.is_hip, - reason='HIP does not support this') @pytest.mark.skipif(cuda.driver.get_build_version() < 10010, reason='Only CUDA 10.1+ supports this') class TestGraph: From f69de2c9bc192fc5ec77e3657447ed93e8b1a906 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Tue, 23 Jan 2024 21:33:58 +0000 Subject: [PATCH 45/57] Fix test_null_stream_cannot_capture graph tests --- tests/cupy_tests/cuda_tests/test_graph.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/cupy_tests/cuda_tests/test_graph.py b/tests/cupy_tests/cuda_tests/test_graph.py index 56d5f2be56e..ad1d610f6e4 100644 --- a/tests/cupy_tests/cuda_tests/test_graph.py +++ b/tests/cupy_tests/cuda_tests/test_graph.py @@ -143,7 +143,10 @@ def test_null_stream_cannot_capture(self, upload): # cudaStreamLegacy is unhappy when a blocking stream is capturing with pytest.raises(cuda.runtime.CUDARuntimeError) as e: cuda.Stream.null.is_capturing() - assert 'cudaErrorStreamCaptureImplicit' in str(e.value) + if _runtime.is_hip: + assert 'hipErrorStreamCaptureImplicit' in str(e.value) + else: + assert 'cudaErrorStreamCaptureImplicit' in str(e.value) g = s.end_capture() assert not s.is_capturing() assert not cuda.Stream.null.is_capturing() From 602f5703b1248786a11056466e3c9ebe25f4555b Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Wed, 24 Jan 2024 01:34:28 +0000 Subject: [PATCH 46/57] Update pytests to handle hip --- tests/cupy_tests/cuda_tests/test_graph.py | 36 +++++++++++++++-------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/tests/cupy_tests/cuda_tests/test_graph.py b/tests/cupy_tests/cuda_tests/test_graph.py index ad1d610f6e4..4f0a3e8d3ad 100644 --- a/tests/cupy_tests/cuda_tests/test_graph.py +++ b/tests/cupy_tests/cuda_tests/test_graph.py @@ -3,6 +3,7 @@ import cupy from cupy import cuda from cupy import testing +from cupy_backends.cuda.api import runtime import cupyx @@ -143,10 +144,8 @@ def test_null_stream_cannot_capture(self, upload): # cudaStreamLegacy is unhappy when a blocking stream is capturing with pytest.raises(cuda.runtime.CUDARuntimeError) as e: cuda.Stream.null.is_capturing() - if _runtime.is_hip: - assert 'hipErrorStreamCaptureImplicit' in str(e.value) - else: - assert 'cudaErrorStreamCaptureImplicit' in str(e.value) + assert ('hipErrorStreamCaptureImplicit' if runtime.is_hip + else 'cudaErrorStreamCaptureImplicit') in str(e.value) g = s.end_capture() assert not s.is_capturing() assert not cuda.Stream.null.is_capturing() @@ -165,11 +164,14 @@ def test_stream_capture_failure1(self): s.begin_capture() with pytest.raises(cuda.runtime.CUDARuntimeError) as e: s.synchronize() - assert 'cudaErrorStreamCaptureUnsupported' in str(e.value) + assert ('hipErrorStreamCaptureUnsupported' if runtime.is_hip + else 'cudaErrorStreamCaptureUnsupported') in str(e.value) # invalid operation causes the capture sequence to be invalidated with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s.end_capture() # noqa - assert 'cudaErrorStreamCaptureInvalidated' in str(e.value) + print(str(e.value)) + assert ('hipErrorStreamCaptureInvalidated' if runtime.is_hip + else 'cudaErrorStreamCaptureInvalidated') in str(e.value) # check s left the capture mode and permits normal usage assert not s.is_capturing() @@ -185,18 +187,21 @@ def test_stream_capture_failure2(self): s1.begin_capture() with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s2.end_capture() - assert 'cudaErrorIllegalState' in str(e.value) + assert ('hipErrorIllegalState' if runtime.is_hip + else 'cudaErrorIllegalState') in str(e.value) e2.record(s1) s2.wait_event(e2) with s2: b = a**3 # noqa with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s2.end_capture() - assert 'cudaErrorStreamCaptureUnmatched' in str(e.value) + assert ('hipErrorStreamCaptureUnmatched' if runtime.is_hip + else 'cudaErrorStreamCaptureUnmatched') in str(e.value) # invalid operation causes the capture sequence to be invalidated with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s1.end_capture() # noqa - assert 'cudaErrorStreamCaptureInvalidated' in str(e.value) + assert ('hipErrorStreamCaptureInvalidated' if runtime.is_hip + else 'cudaErrorStreamCaptureInvalidated') in str(e.value) # check both s1 and s2 left the capture mode and permit normal usage assert not s1.is_capturing() @@ -223,7 +228,8 @@ def test_stream_capture_failure3(self): # invalid operation causes the capture sequence to be invalidated with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s1.end_capture() # noqa - assert 'cudaErrorStreamCaptureUnjoined' in str(e.value) + assert ('hipErrorStreamCaptureImplicit' if runtime.is_hip + else 'cudaErrorStreamCaptureUnjoined') in str(e.value) # check both s1 and s2 left the capture mode and permit normal usage assert not s1.is_capturing() @@ -238,9 +244,11 @@ def test_stream_capture_failure4(self): s.begin_capture() # query the stream status is illegal during capturing s.done + with pytest.raises(cuda.runtime.CUDARuntimeError) as e: s.end_capture() - assert 'cudaErrorStreamCaptureInvalidated' in str(e.value) + assert ('hipErrorStreamCaptureImplicit' if runtime.is_hip + else 'cudaErrorStreamCaptureInvalidated') in str(e.value) # check s left the capture mode and permits normal usage assert not s.is_capturing() @@ -272,10 +280,12 @@ def test_stream_capture_failure6(self): # synchronize the stream is illegal during capturing with pytest.raises(cuda.runtime.CUDARuntimeError) as e: s.synchronize() - assert 'cudaErrorStreamCaptureUnsupported' in str(e.value) + assert ('hipErrorStreamCaptureUnsupported' if runtime.is_hip + else 'cudaErrorStreamCaptureUnsupported') in str(e.value) with pytest.raises(cuda.runtime.CUDARuntimeError) as e: s.end_capture() - assert 'cudaErrorStreamCaptureInvalidated' in str(e.value) + assert ('hipErrorStreamCaptureInvalidated' if runtime.is_hip + else 'cudaErrorStreamCaptureInvalidated') in str(e.value) # check s left the capture mode and permits normal usage assert not s.is_capturing() From 20e2e5ece48e425b9141da34e9f63cbe8f40e405 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Wed, 24 Jan 2024 22:58:52 +0000 Subject: [PATCH 47/57] enable stream capture for hip:blas/rand/solver/sparse --- cupy/cuda/memory.pyx | 32 +++++------------------ cupy/cuda/stream.pyx | 7 ++--- cupy_backends/cuda/libs/cublas.pyx | 2 +- cupy_backends/cuda/libs/curand.pyx | 2 +- cupy_backends/cuda/libs/cusolver.pyx | 2 +- cupy_backends/cuda/libs/cusparse.pyx | 2 +- cupy_backends/hip/cupy_hip_runtime.h | 20 ++++++-------- tests/cupy_tests/cuda_tests/test_graph.py | 5 ++-- 8 files changed, 22 insertions(+), 50 deletions(-) diff --git a/cupy/cuda/memory.pyx b/cupy/cuda/memory.pyx index e17cbcdd2e7..7ff9681b9ea 100644 --- a/cupy/cuda/memory.pyx +++ b/cupy/cuda/memory.pyx @@ -107,8 +107,6 @@ cdef class Memory(BaseMemory): cdef inline void check_async_alloc_supported(int device_id) except*: - if runtime._is_hip_environment: - raise RuntimeError('HIP does not support memory_async') if runtime.runtimeGetVersion() < 11020: raise RuntimeError("memory_async is supported since CUDA 11.2") cdef int dev_id @@ -404,10 +402,7 @@ cdef class MemoryPointer: """ stream_ptr = stream_module.get_current_stream_ptr() - if ( - not runtime._is_hip_environment - and runtime.streamIsCapturing(stream_ptr) - ): + if runtime.streamIsCapturing(stream_ptr): raise RuntimeError( 'the current stream is capturing, so synchronous API calls ' 'are disallowed') @@ -451,10 +446,7 @@ cdef class MemoryPointer: """ stream_ptr = stream_module.get_current_stream_ptr() - if ( - not runtime._is_hip_environment - and runtime.streamIsCapturing(stream_ptr) - ): + if runtime.streamIsCapturing(stream_ptr): raise RuntimeError( 'the current stream is capturing, so synchronous API calls ' 'are disallowed') @@ -478,10 +470,7 @@ cdef class MemoryPointer: stream_ptr = stream_module.get_current_stream_ptr() else: stream_ptr = stream.ptr - if ( - not runtime._is_hip_environment - and runtime.streamIsCapturing(stream_ptr) - ): + if runtime.streamIsCapturing(stream_ptr): raise RuntimeError( 'the current stream is capturing, so H2D transfers ' 'are disallowed') @@ -549,10 +538,7 @@ cdef class MemoryPointer: """ stream_ptr = stream_module.get_current_stream_ptr() - if ( - not runtime._is_hip_environment - and runtime.streamIsCapturing(stream_ptr) - ): + if runtime.streamIsCapturing(stream_ptr): raise RuntimeError( 'the current stream is capturing, so synchronous API calls ' 'are disallowed') @@ -576,10 +562,7 @@ cdef class MemoryPointer: stream_ptr = stream_module.get_current_stream_ptr() else: stream_ptr = stream.ptr - if ( - not runtime._is_hip_environment - and runtime.streamIsCapturing(stream_ptr) - ): + if runtime.streamIsCapturing(stream_ptr): raise RuntimeError( 'the current stream is capturing, so D2H transfers ' 'are disallowed') @@ -603,10 +586,7 @@ cdef class MemoryPointer: """ stream_ptr = stream_module.get_current_stream_ptr() - if ( - not runtime._is_hip_environment - and runtime.streamIsCapturing(stream_ptr) - ): + if runtime.streamIsCapturing(stream_ptr): raise RuntimeError( 'the current stream is capturing, so synchronous API calls ' 'are disallowed') diff --git a/cupy/cuda/stream.pyx b/cupy/cuda/stream.pyx index ed56c9f8dd6..e37bf5ac4ee 100644 --- a/cupy/cuda/stream.pyx +++ b/cupy/cuda/stream.pyx @@ -366,8 +366,8 @@ class _BaseStream: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g793d7d4e474388ddfda531603dc34aa3 """ - if self.ptr == 0 or self.ptr == 1: - raise RuntimeError('cannot capture on the default (legacy) stream') + # if self.ptr == 0 or self.ptr == 1: + # raise RuntimeError('cannot capture on the default (legacy) stream') if mode is None: # We default to the relaxed mode for the following reason: During # the capture the memory pool might need to increase size. If it's @@ -455,9 +455,6 @@ class Stream(_BaseStream): ptr = 0 device_id = -1 elif ptds: - #if runtime._is_hip_environment: - # raise ValueError('HIP does not support per-thread ' - # 'default stream (ptds)') ptr = runtime.streamPerThread device_id = -1 elif non_blocking: diff --git a/cupy_backends/cuda/libs/cublas.pyx b/cupy_backends/cuda/libs/cublas.pyx index c31d2544501..8b1a3bea779 100644 --- a/cupy_backends/cuda/libs/cublas.pyx +++ b/cupy_backends/cuda/libs/cublas.pyx @@ -488,7 +488,7 @@ cpdef setStream(intptr_t handle, size_t stream): # https://docs.nvidia.com/cuda/cublas/index.html#CUDA-graphs # Before we come up with a robust strategy to test the support conditions, # we disable this functionality. - if not runtime._is_hip_environment and runtime.streamIsCapturing(stream): + if runtime.streamIsCapturing(stream): raise NotImplementedError( 'calling cuBLAS API during stream capture is currently ' 'unsupported') diff --git a/cupy_backends/cuda/libs/curand.pyx b/cupy_backends/cuda/libs/curand.pyx index d8e8df97036..3812f2e0ec5 100644 --- a/cupy_backends/cuda/libs/curand.pyx +++ b/cupy_backends/cuda/libs/curand.pyx @@ -115,7 +115,7 @@ cpdef int getVersion() except? -1: cpdef setStream(size_t generator, size_t stream): # TODO(leofang): The support of stream capture is not mentioned at all in # the cuRAND docs (as of CUDA 11.5), so we disable this functionality. - if not runtime._is_hip_environment and runtime.streamIsCapturing(stream): + if runtime.streamIsCapturing(stream): raise NotImplementedError( 'calling cuRAND API during stream capture is currently ' 'unsupported') diff --git a/cupy_backends/cuda/libs/cusolver.pyx b/cupy_backends/cuda/libs/cusolver.pyx index c994d4d6646..dcc5d6c25be 100644 --- a/cupy_backends/cuda/libs/cusolver.pyx +++ b/cupy_backends/cuda/libs/cusolver.pyx @@ -1136,7 +1136,7 @@ cpdef spDestroy(intptr_t handle): cpdef setStream(intptr_t handle, size_t stream): # TODO(leofang): The support of stream capture is not mentioned at all in # the cuSOLVER docs (as of CUDA 11.5), so we disable this functionality. - if not runtime._is_hip_environment and runtime.streamIsCapturing(stream): + if runtime.streamIsCapturing(stream): raise NotImplementedError( 'calling cuSOLVER API during stream capture is currently ' 'unsupported') diff --git a/cupy_backends/cuda/libs/cusparse.pyx b/cupy_backends/cuda/libs/cusparse.pyx index 0b7a99c7018..5994a53197f 100644 --- a/cupy_backends/cuda/libs/cusparse.pyx +++ b/cupy_backends/cuda/libs/cusparse.pyx @@ -1630,7 +1630,7 @@ cpdef void setStream(intptr_t handle, size_t stream) except *: # https://docs.nvidia.com/cuda/cusparse/index.html#optimization-notes # Before we come up with a robust strategy to test the support conditions, # we disable this functionality. - if not runtime._is_hip_environment and runtime.streamIsCapturing(stream): + if runtime.streamIsCapturing(stream): raise NotImplementedError( 'calling cuSPARSE API during stream capture is currently ' 'unsupported') diff --git a/cupy_backends/hip/cupy_hip_runtime.h b/cupy_backends/hip/cupy_hip_runtime.h index e6c3eec0f28..1574cd4ce5c 100644 --- a/cupy_backends/hip/cupy_hip_runtime.h +++ b/cupy_backends/hip/cupy_hip_runtime.h @@ -81,7 +81,7 @@ cudaError_t cudaDeviceGetLimit(size_t* pValue, cudaLimit limit) { cudaError_t cudaDeviceSetLimit(cudaLimit limit, size_t value) { // see https://github.com/ROCm-Developer-Tools/HIP/issues/1632 - return hipErrorUnknown; + return hipDeviceSetLimit(limit, value); } // IPC operations @@ -90,11 +90,9 @@ cudaError_t cudaIpcCloseMemHandle(void* devPtr) { } cudaError_t cudaIpcGetEventHandle(cudaIpcEventHandle_t* handle, cudaEvent_t event) { - return hipErrorUnknown; - // TODO(leofang): this is supported after ROCm-Developer-Tools/HIP#1996 is released; // as of ROCm 3.5.0 it is still not supported - //return hipIpcGetEventHandle(handle, event); + return hipIpcGetEventHandle(handle, event); } cudaError_t cudaIpcGetMemHandle(cudaIpcMemHandle_t* handle, void* devPtr) { @@ -102,11 +100,9 @@ cudaError_t cudaIpcGetMemHandle(cudaIpcMemHandle_t* handle, void* devPtr) { } cudaError_t cudaIpcOpenEventHandle(cudaEvent_t* event, cudaIpcEventHandle_t handle) { - return hipErrorUnknown; - // TODO(leofang): this is supported after ROCm-Developer-Tools/HIP#1996 is released; // as of ROCm 3.5.0 it is still not supported - //return hipIpcOpenEventHandle(event, handle); + return hipIpcOpenEventHandle(event, handle); } cudaError_t cudaIpcOpenMemHandle(void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { @@ -377,7 +373,7 @@ cudaError_t cudaStreamAddCallback(cudaStream_t stream, } cudaError_t cudaLaunchHostFunc(cudaStream_t stream, cudaHostFn_t fn, void* userData) { - return hipErrorUnknown; + return hipLaunchHostFunc(stream, fn, userData); } cudaError_t cudaStreamQuery(cudaStream_t stream) { @@ -529,11 +525,11 @@ cudaError_t cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream) { #endif } -cudaError_t cudaGraphUpload(...) { -#if HIP_VERSION < 60000000 - return hipErrorUnknown; +cudaError_t cudaGraphUpload(cudaGraphExec_t graphExec, cudaStream_t stream) { +#if HIP_VERSION >= 60000000 + return hipGraphUpload(graphExec, stream); #else - return hipSuccess; + return hipErrorUnknown; #endif } diff --git a/tests/cupy_tests/cuda_tests/test_graph.py b/tests/cupy_tests/cuda_tests/test_graph.py index 4f0a3e8d3ad..d94adbc8584 100644 --- a/tests/cupy_tests/cuda_tests/test_graph.py +++ b/tests/cupy_tests/cuda_tests/test_graph.py @@ -169,7 +169,6 @@ def test_stream_capture_failure1(self): # invalid operation causes the capture sequence to be invalidated with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s.end_capture() # noqa - print(str(e.value)) assert ('hipErrorStreamCaptureInvalidated' if runtime.is_hip else 'cudaErrorStreamCaptureInvalidated') in str(e.value) @@ -228,7 +227,7 @@ def test_stream_capture_failure3(self): # invalid operation causes the capture sequence to be invalidated with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s1.end_capture() # noqa - assert ('hipErrorStreamCaptureImplicit' if runtime.is_hip + assert ('hipErrorStreamCaptureUnjoined' if runtime.is_hip else 'cudaErrorStreamCaptureUnjoined') in str(e.value) # check both s1 and s2 left the capture mode and permit normal usage @@ -336,7 +335,7 @@ def test_stream_capture_failure_curand(self): # check s left the capture mode and permits normal usage assert not s.is_capturing() s.synchronize() - + def test_stream_capture_failure_cusparse(self): s = cupy.cuda.Stream(non_blocking=True) a = cupy.zeros((3, 4)) From 63f40bf7b8f7ad049ac0b31c95d0b081d159aca7 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Fri, 26 Jan 2024 21:30:46 +0000 Subject: [PATCH 48/57] Clean up comments --- cupy/cuda/stream.pyx | 4 ++-- tests/cupy_tests/cuda_tests/test_graph.py | 13 +++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/cupy/cuda/stream.pyx b/cupy/cuda/stream.pyx index e37bf5ac4ee..f848df815a3 100644 --- a/cupy/cuda/stream.pyx +++ b/cupy/cuda/stream.pyx @@ -366,8 +366,8 @@ class _BaseStream: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g793d7d4e474388ddfda531603dc34aa3 """ - # if self.ptr == 0 or self.ptr == 1: - # raise RuntimeError('cannot capture on the default (legacy) stream') + if self.ptr == 0 or self.ptr == 1: + raise RuntimeError('cannot capture on the default (legacy) stream') if mode is None: # We default to the relaxed mode for the following reason: During # the capture the memory pool might need to increase size. If it's diff --git a/tests/cupy_tests/cuda_tests/test_graph.py b/tests/cupy_tests/cuda_tests/test_graph.py index d94adbc8584..f167989ee90 100644 --- a/tests/cupy_tests/cuda_tests/test_graph.py +++ b/tests/cupy_tests/cuda_tests/test_graph.py @@ -157,6 +157,8 @@ def test_null_stream_cannot_capture(self, upload): s.synchronize() testing.assert_array_equal(b, a + 4) + @pytest.mark.skipif(cuda.runtime.is_hip, + reason='HIP does not support this') def test_stream_capture_failure1(self): s = cupy.cuda.Stream(non_blocking=True) @@ -176,6 +178,8 @@ def test_stream_capture_failure1(self): assert not s.is_capturing() s.synchronize() + @pytest.mark.skipif(cuda.runtime.is_hip, + reason='HIP does not support this') def test_stream_capture_failure2(self): s1 = cupy.cuda.Stream(non_blocking=True) s2 = cupy.cuda.Stream(non_blocking=True) @@ -208,6 +212,8 @@ def test_stream_capture_failure2(self): s1.synchronize() s2.synchronize() + @pytest.mark.skipif(cuda.runtime.is_hip, + reason='HIP does not support this') def test_stream_capture_failure3(self): s1 = cupy.cuda.Stream(non_blocking=True) s2 = cupy.cuda.Stream(non_blocking=True) @@ -236,6 +242,8 @@ def test_stream_capture_failure3(self): s1.synchronize() s2.synchronize() + @pytest.mark.skipif(cuda.runtime.is_hip, + reason='HIP does not support this') def test_stream_capture_failure4(self): s = cupy.cuda.Stream(non_blocking=True) @@ -243,7 +251,6 @@ def test_stream_capture_failure4(self): s.begin_capture() # query the stream status is illegal during capturing s.done - with pytest.raises(cuda.runtime.CUDARuntimeError) as e: s.end_capture() assert ('hipErrorStreamCaptureImplicit' if runtime.is_hip @@ -270,7 +277,9 @@ def test_stream_capture_failure5(self): # check s left the capture mode and permits normal usage assert not s.is_capturing() s.synchronize() - + + @pytest.mark.skipif(cuda.runtime.is_hip, + reason='HIP does not support this') def test_stream_capture_failure6(self): s = cupy.cuda.Stream(non_blocking=True) From 6d5d69974d7d9d987a59c16953fb1877fa128330 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Fri, 26 Jan 2024 21:48:54 +0000 Subject: [PATCH 49/57] Update autopep8 --- .github/workflows/pretest.yml | 1 + tests/cupy_tests/cuda_tests/test_graph.py | 30 +++++++++++------------ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/workflows/pretest.yml b/.github/workflows/pretest.yml index d8223b8fe14..2ffd5f5f696 100644 --- a/.github/workflows/pretest.yml +++ b/.github/workflows/pretest.yml @@ -30,6 +30,7 @@ jobs: - name: Setup pre-commit run: | + pip install --upgrade autopep8 pip install pre-commit - name: Check diff --git a/tests/cupy_tests/cuda_tests/test_graph.py b/tests/cupy_tests/cuda_tests/test_graph.py index f167989ee90..b546ced1e55 100644 --- a/tests/cupy_tests/cuda_tests/test_graph.py +++ b/tests/cupy_tests/cuda_tests/test_graph.py @@ -158,7 +158,7 @@ def test_null_stream_cannot_capture(self, upload): testing.assert_array_equal(b, a + 4) @pytest.mark.skipif(cuda.runtime.is_hip, - reason='HIP does not support this') + reason='HIP does not support this') def test_stream_capture_failure1(self): s = cupy.cuda.Stream(non_blocking=True) @@ -172,14 +172,14 @@ def test_stream_capture_failure1(self): with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s.end_capture() # noqa assert ('hipErrorStreamCaptureInvalidated' if runtime.is_hip - else 'cudaErrorStreamCaptureInvalidated') in str(e.value) + else 'cudaErrorStreamCaptureInvalidated') in str(e.value) # check s left the capture mode and permits normal usage assert not s.is_capturing() s.synchronize() @pytest.mark.skipif(cuda.runtime.is_hip, - reason='HIP does not support this') + reason='HIP does not support this') def test_stream_capture_failure2(self): s1 = cupy.cuda.Stream(non_blocking=True) s2 = cupy.cuda.Stream(non_blocking=True) @@ -190,7 +190,7 @@ def test_stream_capture_failure2(self): s1.begin_capture() with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s2.end_capture() - assert ('hipErrorIllegalState' if runtime.is_hip + assert ('hipErrorIllegalState' if runtime.is_hip else 'cudaErrorIllegalState') in str(e.value) e2.record(s1) s2.wait_event(e2) @@ -198,12 +198,12 @@ def test_stream_capture_failure2(self): b = a**3 # noqa with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s2.end_capture() - assert ('hipErrorStreamCaptureUnmatched' if runtime.is_hip + assert ('hipErrorStreamCaptureUnmatched' if runtime.is_hip else 'cudaErrorStreamCaptureUnmatched') in str(e.value) # invalid operation causes the capture sequence to be invalidated with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s1.end_capture() # noqa - assert ('hipErrorStreamCaptureInvalidated' if runtime.is_hip + assert ('hipErrorStreamCaptureInvalidated' if runtime.is_hip else 'cudaErrorStreamCaptureInvalidated') in str(e.value) # check both s1 and s2 left the capture mode and permit normal usage @@ -213,7 +213,7 @@ def test_stream_capture_failure2(self): s2.synchronize() @pytest.mark.skipif(cuda.runtime.is_hip, - reason='HIP does not support this') + reason='HIP does not support this') def test_stream_capture_failure3(self): s1 = cupy.cuda.Stream(non_blocking=True) s2 = cupy.cuda.Stream(non_blocking=True) @@ -233,7 +233,7 @@ def test_stream_capture_failure3(self): # invalid operation causes the capture sequence to be invalidated with pytest.raises(cuda.runtime.CUDARuntimeError) as e: g = s1.end_capture() # noqa - assert ('hipErrorStreamCaptureUnjoined' if runtime.is_hip + assert ('hipErrorStreamCaptureUnjoined' if runtime.is_hip else 'cudaErrorStreamCaptureUnjoined') in str(e.value) # check both s1 and s2 left the capture mode and permit normal usage @@ -243,7 +243,7 @@ def test_stream_capture_failure3(self): s2.synchronize() @pytest.mark.skipif(cuda.runtime.is_hip, - reason='HIP does not support this') + reason='HIP does not support this') def test_stream_capture_failure4(self): s = cupy.cuda.Stream(non_blocking=True) @@ -253,7 +253,7 @@ def test_stream_capture_failure4(self): s.done with pytest.raises(cuda.runtime.CUDARuntimeError) as e: s.end_capture() - assert ('hipErrorStreamCaptureImplicit' if runtime.is_hip + assert ('hipErrorStreamCaptureImplicit' if runtime.is_hip else 'cudaErrorStreamCaptureInvalidated') in str(e.value) # check s left the capture mode and permits normal usage @@ -277,9 +277,9 @@ def test_stream_capture_failure5(self): # check s left the capture mode and permits normal usage assert not s.is_capturing() s.synchronize() - + @pytest.mark.skipif(cuda.runtime.is_hip, - reason='HIP does not support this') + reason='HIP does not support this') def test_stream_capture_failure6(self): s = cupy.cuda.Stream(non_blocking=True) @@ -288,11 +288,11 @@ def test_stream_capture_failure6(self): # synchronize the stream is illegal during capturing with pytest.raises(cuda.runtime.CUDARuntimeError) as e: s.synchronize() - assert ('hipErrorStreamCaptureUnsupported' if runtime.is_hip + assert ('hipErrorStreamCaptureUnsupported' if runtime.is_hip else 'cudaErrorStreamCaptureUnsupported') in str(e.value) with pytest.raises(cuda.runtime.CUDARuntimeError) as e: s.end_capture() - assert ('hipErrorStreamCaptureInvalidated' if runtime.is_hip + assert ('hipErrorStreamCaptureInvalidated' if runtime.is_hip else 'cudaErrorStreamCaptureInvalidated') in str(e.value) # check s left the capture mode and permits normal usage @@ -344,7 +344,7 @@ def test_stream_capture_failure_curand(self): # check s left the capture mode and permits normal usage assert not s.is_capturing() s.synchronize() - + def test_stream_capture_failure_cusparse(self): s = cupy.cuda.Stream(non_blocking=True) a = cupy.zeros((3, 4)) From 6fe644bb730ffd779a8e9ac783be7bfad3b06498 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Fri, 26 Jan 2024 22:25:49 +0000 Subject: [PATCH 50/57] Add backport conditionals for ROCm versions --- cupy_backends/hip/cupy_hip_runtime.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/cupy_backends/hip/cupy_hip_runtime.h b/cupy_backends/hip/cupy_hip_runtime.h index 1574cd4ce5c..f77d0c2fe38 100644 --- a/cupy_backends/hip/cupy_hip_runtime.h +++ b/cupy_backends/hip/cupy_hip_runtime.h @@ -80,8 +80,12 @@ cudaError_t cudaDeviceGetLimit(size_t* pValue, cudaLimit limit) { } cudaError_t cudaDeviceSetLimit(cudaLimit limit, size_t value) { - // see https://github.com/ROCm-Developer-Tools/HIP/issues/1632 +#if HIP_VERSION >= 50300000 return hipDeviceSetLimit(limit, value); +#else + // see https://github.com/ROCm-Developer-Tools/HIP/issues/1632 + return hipErrorUnknown; +#endif } // IPC operations @@ -90,9 +94,13 @@ cudaError_t cudaIpcCloseMemHandle(void* devPtr) { } cudaError_t cudaIpcGetEventHandle(cudaIpcEventHandle_t* handle, cudaEvent_t event) { +#if HIP_VERSION >= 40300000 + return hipIpcGetEventHandle(handle, event); +#else // TODO(leofang): this is supported after ROCm-Developer-Tools/HIP#1996 is released; // as of ROCm 3.5.0 it is still not supported - return hipIpcGetEventHandle(handle, event); + return hipErrorUnknown; +#endif } cudaError_t cudaIpcGetMemHandle(cudaIpcMemHandle_t* handle, void* devPtr) { @@ -100,9 +108,13 @@ cudaError_t cudaIpcGetMemHandle(cudaIpcMemHandle_t* handle, void* devPtr) { } cudaError_t cudaIpcOpenEventHandle(cudaEvent_t* event, cudaIpcEventHandle_t handle) { +#if HIP_VERSION >= 40300000 + return hipIpcOpenEventHandle(event, handle); +#else // TODO(leofang): this is supported after ROCm-Developer-Tools/HIP#1996 is released; // as of ROCm 3.5.0 it is still not supported - return hipIpcOpenEventHandle(event, handle); + return hipErrorUnknown; +#endif } cudaError_t cudaIpcOpenMemHandle(void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { @@ -373,7 +385,11 @@ cudaError_t cudaStreamAddCallback(cudaStream_t stream, } cudaError_t cudaLaunchHostFunc(cudaStream_t stream, cudaHostFn_t fn, void* userData) { +#if HIP_VERSION >= 50300000 return hipLaunchHostFunc(stream, fn, userData); +#else + return hipErrorUnknown; +#endif } cudaError_t cudaStreamQuery(cudaStream_t stream) { From 82a2f112a8bf456eb527e648fb8f62a60de7cf01 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Tue, 7 May 2024 22:08:36 +0000 Subject: [PATCH 51/57] Enable cuda_array_interface on ROCm --- cupy/_core/core.pyx | 8 ----- tests/cupy_tests/core_tests/test_ndarray.py | 8 ++--- .../test_ndarray_cuda_array_interface.py | 34 ++++++++++--------- .../creation_tests/test_from_data.py | 10 ++---- 4 files changed, 22 insertions(+), 38 deletions(-) diff --git a/cupy/_core/core.pyx b/cupy/_core/core.pyx index 221352b3430..f3207dca253 100644 --- a/cupy/_core/core.pyx +++ b/cupy/_core/core.pyx @@ -239,9 +239,6 @@ cdef class _ndarray_base: @property def __cuda_array_interface__(self): - if runtime._is_hip_environment: - raise AttributeError( - 'HIP/ROCm does not support cuda array interface') cdef dict desc = { 'shape': self.shape, 'typestr': self.dtype.str, @@ -1678,8 +1675,6 @@ cdef class _ndarray_base: # `_kernel._preprocess_args`. check = (hasattr(x, '__cuda_array_interface__') or hasattr(x, '__cupy_get_ndarray__')) - if runtime._is_hip_environment and isinstance(x, ndarray): - check = True if (not check and not type(x) in _scalar.scalar_type_set and not isinstance(x, numpy.ndarray)): @@ -2732,9 +2727,6 @@ cpdef _ndarray_base asfortranarray(_ndarray_base a, dtype=None): cpdef _ndarray_base _convert_object_with_cuda_array_interface(a): - if runtime._is_hip_environment: - raise RuntimeError( - 'HIP/ROCm does not support cuda array interface') cdef Py_ssize_t sh, st cdef dict desc = a.__cuda_array_interface__ diff --git a/tests/cupy_tests/core_tests/test_ndarray.py b/tests/cupy_tests/core_tests/test_ndarray.py index 968812af38a..df17512f005 100644 --- a/tests/cupy_tests/core_tests/test_ndarray.py +++ b/tests/cupy_tests/core_tests/test_ndarray.py @@ -263,8 +263,6 @@ def test_shape_need_copy(self): assert 'incompatible shape' in str(e.value).lower() -@pytest.mark.skipif(cupy.cuda.runtime.is_hip, - reason='HIP does not support this') class TestNdarrayCudaInterface(unittest.TestCase): def test_cuda_array_interface(self): @@ -325,8 +323,8 @@ def test_cuda_array_interface_zero_size(self): 'stream': ('null', 'new', 'ptds'), 'ver': (2, 3), })) -@pytest.mark.skipif(cupy.cuda.runtime.is_hip, - reason='HIP does not support this') + + class TestNdarrayCudaInterfaceStream(unittest.TestCase): def setUp(self): if self.stream == 'null': @@ -368,8 +366,6 @@ def test_cuda_array_interface_stream(self): assert iface['stream'] == stream.ptr -@pytest.mark.skipif(not cupy.cuda.runtime.is_hip, - reason='This is supported on CUDA') class TestNdarrayCudaInterfaceNoneCUDA(unittest.TestCase): def setUp(self): diff --git a/tests/cupy_tests/core_tests/test_ndarray_cuda_array_interface.py b/tests/cupy_tests/core_tests/test_ndarray_cuda_array_interface.py index 6412271a866..5ae9ebda097 100644 --- a/tests/cupy_tests/core_tests/test_ndarray_cuda_array_interface.py +++ b/tests/cupy_tests/core_tests/test_ndarray_cuda_array_interface.py @@ -3,6 +3,7 @@ from cupy_backends.cuda import stream as stream_module import cupy +import cupy_backends from cupy import _core from cupy import testing @@ -27,8 +28,11 @@ def __cuda_array_interface__(self): 'version': self.ver, } if self.ver == 3: - stream = cupy.cuda.get_current_stream() - desc['stream'] = 1 if stream.ptr == 0 else stream.ptr + if not cupy_backends.cuda.api.runtime.is_hip: + desc['stream'] = cupy.cuda.runtime.streamLegacy if stream.ptr == 0 else stream.ptr + else: # Only non-default streams use their actual ptr values. (ROCm) + stream = cupy.cuda.get_current_stream() + desc['stream'] = stream.ptr return desc @@ -36,8 +40,8 @@ def __cuda_array_interface__(self): 'stream': ('null', 'new'), 'ver': (2, 3), })) -@pytest.mark.skipif(cupy.cuda.runtime.is_hip, - reason='HIP does not support this') + + class TestArrayUfunc(unittest.TestCase): def setUp(self): @@ -72,8 +76,8 @@ def test_add_scalar_with_strides(self): 'stream': ('null', 'new'), 'ver': (2, 3), })) -@pytest.mark.skipif(cupy.cuda.runtime.is_hip, - reason='HIP does not support this') + + class TestElementwiseKernel(unittest.TestCase): def setUp(self): @@ -109,8 +113,8 @@ def test_add_scalar_with_strides(self): 'stream': ('null', 'new'), 'ver': (2, 3), })) -@pytest.mark.skipif(cupy.cuda.runtime.is_hip, - reason='HIP does not support this') + + class TestSimpleReductionFunction(unittest.TestCase): def setUp(self): @@ -148,8 +152,8 @@ def test_shape_with_strides(self): 'stream': ('null', 'new'), 'ver': (2, 3), })) -@pytest.mark.skipif(cupy.cuda.runtime.is_hip, - reason='HIP does not support this') + + class TestReductionKernel(unittest.TestCase): def setUp(self): @@ -192,8 +196,8 @@ def test_shape_with_strides(self): {'shape': (10, 10), 'slices': (slice(2, None), slice(2, None))}, {'shape': (10, 10), 'slices': (slice(2, None), slice(4, None))}, ) -@pytest.mark.skipif(cupy.cuda.runtime.is_hip, - reason='HIP does not support this') + + class TestSlicingMemoryPointer(unittest.TestCase): @testing.for_all_dtypes_combination(names=['dtype']) @@ -237,8 +241,6 @@ def test_shape_with_strides(self, dtype, order): @testing.parameterize(*test_cases_with_stream) -@pytest.mark.skipif(cupy.cuda.runtime.is_hip, - reason='HIP does not support this') class TestCUDAArrayInterfaceCompliance(unittest.TestCase): def setUp(self): @@ -284,8 +286,8 @@ def test_value_type(self, dtype, order): @testing.parameterize(*testing.product({ 'stream': ('null', 'new', 'ptds'), })) -@pytest.mark.skipif(cupy.cuda.runtime.is_hip, - reason='HIP does not support this') + + class TestCUDAArrayInterfaceStream(unittest.TestCase): def setUp(self): if self.stream == 'null': diff --git a/tests/cupy_tests/creation_tests/test_from_data.py b/tests/cupy_tests/creation_tests/test_from_data.py index 567f8f97ac4..de22a5c097e 100644 --- a/tests/cupy_tests/creation_tests/test_from_data.py +++ b/tests/cupy_tests/creation_tests/test_from_data.py @@ -570,8 +570,8 @@ def test_fromfile_big_endian(self, xp): 'ver': tuple(range(max_cuda_array_interface_version+1)), 'strides': (False, None, True), })) -@pytest.mark.skipif( - cupy.cuda.runtime.is_hip, reason='HIP does not support this') + + class TestCudaArrayInterface(unittest.TestCase): @testing.for_all_dtypes() def test_base(self, dtype): @@ -640,8 +640,6 @@ def test_big_endian(self): 'ver': tuple(range(1, max_cuda_array_interface_version+1)), 'strides': (False, None, True), })) -@pytest.mark.skipif( - cupy.cuda.runtime.is_hip, reason='HIP does not support this') class TestCudaArrayInterfaceMaskedArray(unittest.TestCase): # TODO(leofang): update this test when masked array is supported @testing.for_all_dtypes() @@ -656,8 +654,6 @@ def test_masked_array(self, dtype): # marked slow as either numpy or cupy could go OOM in this test @testing.slow -@pytest.mark.skipif( - cupy.cuda.runtime.is_hip, reason='HIP does not support this') class TestCudaArrayInterfaceBigArray(unittest.TestCase): def test_with_over_size_array(self): # real example from #3009 @@ -667,8 +663,6 @@ def test_with_over_size_array(self): testing.assert_array_equal(a, b) -@pytest.mark.skipif( - cupy.cuda.runtime.is_hip, reason='HIP does not support this') class DummyObjectWithCudaArrayInterface(object): def __init__(self, a, ver, include_strides=False, mask=None, stream=None): assert ver in tuple(range(max_cuda_array_interface_version+1)) From e0e23983f923beca14ffce8c543c1a6e1e154400 Mon Sep 17 00:00:00 2001 From: AdrianAbeyta Date: Tue, 7 May 2024 22:22:47 +0000 Subject: [PATCH 52/57] Modify UT due to ROCm support with cuda_array_interface --- tests/cupy_tests/core_tests/test_ndarray.py | 16 ++++++++------ .../test_ndarray_cuda_array_interface.py | 22 +++++-------------- .../creation_tests/test_from_data.py | 2 -- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/tests/cupy_tests/core_tests/test_ndarray.py b/tests/cupy_tests/core_tests/test_ndarray.py index df17512f005..61138b3fd83 100644 --- a/tests/cupy_tests/core_tests/test_ndarray.py +++ b/tests/cupy_tests/core_tests/test_ndarray.py @@ -323,8 +323,6 @@ def test_cuda_array_interface_zero_size(self): 'stream': ('null', 'new', 'ptds'), 'ver': (2, 3), })) - - class TestNdarrayCudaInterfaceStream(unittest.TestCase): def setUp(self): if self.stream == 'null': @@ -366,18 +364,22 @@ def test_cuda_array_interface_stream(self): assert iface['stream'] == stream.ptr -class TestNdarrayCudaInterfaceNoneCUDA(unittest.TestCase): +class TestNdarrayCudaInterfaceAttr(unittest.TestCase): def setUp(self): self.arr = cupy.zeros(shape=(2, 3), dtype=cupy.float64) def test_cuda_array_interface_hasattr(self): - assert not hasattr(self.arr, '__cuda_array_interface__') + assert hasattr(self.arr, '__cuda_array_interface__') def test_cuda_array_interface_getattr(self): - with pytest.raises(AttributeError) as e: - getattr(self.arr, '__cuda_array_interface__') - assert 'HIP' in str(e.value) + try: + interface = getattr(self.arr, '__cuda_array_interface__') + self.assertIsNotNone(interface) + except AttributeError as e: + self.fail( + "__cuda_array_interface__ should be present, " + f"but got an AttributeError: {str(e)}") @testing.parameterize( diff --git a/tests/cupy_tests/core_tests/test_ndarray_cuda_array_interface.py b/tests/cupy_tests/core_tests/test_ndarray_cuda_array_interface.py index 5ae9ebda097..64705ec366b 100644 --- a/tests/cupy_tests/core_tests/test_ndarray_cuda_array_interface.py +++ b/tests/cupy_tests/core_tests/test_ndarray_cuda_array_interface.py @@ -1,5 +1,4 @@ import unittest -import pytest from cupy_backends.cuda import stream as stream_module import cupy @@ -28,11 +27,12 @@ def __cuda_array_interface__(self): 'version': self.ver, } if self.ver == 3: - if not cupy_backends.cuda.api.runtime.is_hip: - desc['stream'] = cupy.cuda.runtime.streamLegacy if stream.ptr == 0 else stream.ptr - else: # Only non-default streams use their actual ptr values. (ROCm) - stream = cupy.cuda.get_current_stream() + stream = cupy.cuda.get_current_stream() + # Only non-default streams use their actual ptr values. (ROCm) + if cupy_backends.cuda.api.runtime.is_hip: desc['stream'] = stream.ptr + else: + desc['stream'] = 1 if stream.ptr == 0 else stream.ptr # noqa: F821, E501 return desc @@ -40,8 +40,6 @@ def __cuda_array_interface__(self): 'stream': ('null', 'new'), 'ver': (2, 3), })) - - class TestArrayUfunc(unittest.TestCase): def setUp(self): @@ -76,8 +74,6 @@ def test_add_scalar_with_strides(self): 'stream': ('null', 'new'), 'ver': (2, 3), })) - - class TestElementwiseKernel(unittest.TestCase): def setUp(self): @@ -113,8 +109,6 @@ def test_add_scalar_with_strides(self): 'stream': ('null', 'new'), 'ver': (2, 3), })) - - class TestSimpleReductionFunction(unittest.TestCase): def setUp(self): @@ -152,8 +146,6 @@ def test_shape_with_strides(self): 'stream': ('null', 'new'), 'ver': (2, 3), })) - - class TestReductionKernel(unittest.TestCase): def setUp(self): @@ -196,8 +188,6 @@ def test_shape_with_strides(self): {'shape': (10, 10), 'slices': (slice(2, None), slice(2, None))}, {'shape': (10, 10), 'slices': (slice(2, None), slice(4, None))}, ) - - class TestSlicingMemoryPointer(unittest.TestCase): @testing.for_all_dtypes_combination(names=['dtype']) @@ -286,8 +276,6 @@ def test_value_type(self, dtype, order): @testing.parameterize(*testing.product({ 'stream': ('null', 'new', 'ptds'), })) - - class TestCUDAArrayInterfaceStream(unittest.TestCase): def setUp(self): if self.stream == 'null': diff --git a/tests/cupy_tests/creation_tests/test_from_data.py b/tests/cupy_tests/creation_tests/test_from_data.py index de22a5c097e..6ee0cfa526a 100644 --- a/tests/cupy_tests/creation_tests/test_from_data.py +++ b/tests/cupy_tests/creation_tests/test_from_data.py @@ -570,8 +570,6 @@ def test_fromfile_big_endian(self, xp): 'ver': tuple(range(max_cuda_array_interface_version+1)), 'strides': (False, None, True), })) - - class TestCudaArrayInterface(unittest.TestCase): @testing.for_all_dtypes() def test_base(self, dtype): From d418a99041ccfa794f587f86c86093bd41ee0b05 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 7 Nov 2024 16:04:40 -0600 Subject: [PATCH 53/57] Fix static check errors --- install/cupy_builder/_command.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/install/cupy_builder/_command.py b/install/cupy_builder/_command.py index 66af00bb40f..4d8025631d7 100644 --- a/install/cupy_builder/_command.py +++ b/install/cupy_builder/_command.py @@ -40,10 +40,10 @@ def compile_device_code( - list of compiled object files for device code ("*.o") """ sources_cu, sources_cpp = filter_files_by_extension( - ext.sources, '.cu') + list(map(str, ext.sources)), '.cu') if len(sources_cu) == 0: # No device code used in this extension. - return ext.sources, [] + return list(map(str, ext.sources)), [] if sys.platform == 'win32': compiler = DeviceCompilerWin32(ctx) From 244841bcfee6cde902295dbf23592892e0de9d28 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Thu, 9 Jan 2025 15:59:45 -0600 Subject: [PATCH 54/57] Add workaround for amdgpu --- docker/rocm/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index 0bc2769a583..9457af1908d 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -2,6 +2,8 @@ FROM rocm/dev-ubuntu-20.04:5.0.1 LABEL maintainer="CuPy Team" RUN curl -qL https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - +# Workaround for missing amdgpu drivers +RUN echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/23.20/amdgpu/ubuntu focal main" | tee /etc/apt/sources.list.d/amdgpu.list RUN apt-get update -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ hipblas hipsparse rocsparse rocrand rocthrust rocsolver rocfft hipfft hipcub rocprim rccl && \ From 9cdff1737eaa44aba657cb17f7e0cc421d7cca34 Mon Sep 17 00:00:00 2001 From: pnunna93 <104791500+pnunna93@users.noreply.github.com> Date: Mon, 20 Jan 2025 23:20:17 -0600 Subject: [PATCH 55/57] Cherrypick thrust fix (#70) * add thrust macros for rocm6.3 * fix code formating for macro --------- Co-authored-by: lcskrishna --- install/cupy_builder/install_build.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/install/cupy_builder/install_build.py b/install/cupy_builder/install_build.py index 3e674844d54..ef374bb65a4 100644 --- a/install/cupy_builder/install_build.py +++ b/install/cupy_builder/install_build.py @@ -169,6 +169,8 @@ def get_compiler_setting(ctx: Context, use_hip): if use_hip: extra_compile_args.append('-std=c++11') + define_macros.append( + ('THRUST_DEVICE_SYSTEM', 'THRUST_DEVICE_SYSTEM_HIP')) if PLATFORM_WIN32: nvtx_path = _environment.get_nvtx_path() From 40fb29c7447f8117dccf71f92a3cb166afbace20 Mon Sep 17 00:00:00 2001 From: Kenichi Maehashi Date: Mon, 14 Apr 2025 05:27:09 +0000 Subject: [PATCH 56/57] build on ROCm 6.4 --- cupy/cuda/cupy_thrust.h | 7 +++++++ install/cupy_builder/_features.py | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cupy/cuda/cupy_thrust.h b/cupy/cuda/cupy_thrust.h index 76cfe9ff7f8..838142b981e 100644 --- a/cupy/cuda/cupy_thrust.h +++ b/cupy/cuda/cupy_thrust.h @@ -4,7 +4,14 @@ #ifndef CUPY_NO_CUDA #include #include + +#ifndef CUPY_USE_HIP #include // for THRUST_VERSION +#else +// WAR #9098: +// rocThrust 3.3.0 (ROCm 6.4.0) cannot be compiled by host compiler +#define THRUST_VERSION 0 +#endif void thrust_sort(int, void *, size_t *, const std::vector&, intptr_t, void *); void thrust_lexsort(int, size_t *, void *, size_t, size_t, intptr_t, void *); diff --git a/install/cupy_builder/_features.py b/install/cupy_builder/_features.py index 908e699580d..4a7b0a1906b 100644 --- a/install/cupy_builder/_features.py +++ b/install/cupy_builder/_features.py @@ -382,7 +382,9 @@ def get_features(ctx: Context) -> Dict[str, Feature]: ('cupy.cuda.thrust', ['cupy/cuda/cupy_thrust.cu']), ], 'include': [ - 'thrust/version.h', + # WAR #9098: + # rocThrust 3.3.0 (ROCm 6.4.0) cannot be compiled by host compiler + # 'thrust/version.h', ], 'libraries': [ 'amdhip64', # was hiprtc and hip_hcc before ROCm 3.8.0 From 1cea0ec2261595ef91e44314c4b576689c6e6ba6 Mon Sep 17 00:00:00 2001 From: Kenichi Maehashi Date: Mon, 14 Apr 2025 05:27:47 +0000 Subject: [PATCH 57/57] update CI to use ROCm 6.4 --- .github/workflows/pretest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pretest.yml b/.github/workflows/pretest.yml index f149f9c4fba..86043e6610f 100644 --- a/.github/workflows/pretest.yml +++ b/.github/workflows/pretest.yml @@ -107,4 +107,4 @@ jobs: - name: Build & Test run: | - docker run --rm -v "${PWD}:/src" -w /src "rocm/dev-ubuntu-22.04:6.0" bash .github/workflows/scripts/pretest-rocm-test.sh + docker run --rm -v "${PWD}:/src" -w /src "rocm/dev-ubuntu-22.04:6.4" bash .github/workflows/scripts/pretest-rocm-test.sh