Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
8952ad2
cudnn , miopen changes on 6.1 branch
Oct 27, 2023
cef800e
update miopen.pyx
bmedishe Nov 27, 2023
842331a
do not skip tests
bmedishe Nov 27, 2023
685bc5a
cudnn , miopen changes on 6.1 branch
Oct 27, 2023
0c0f0be
update miopen.pyx
bmedishe Nov 27, 2023
fd2b322
do not skip tests
bmedishe Nov 27, 2023
c4ec1bb
Merge branch 'rocm5.1_it_miopen_integration' of https://github.com/RO…
bmedishe Nov 27, 2023
334557c
update _feature.py with miopen lib, include
bmedishe Nov 28, 2023
d396e57
add cudnn in _features.py
bmedishe Nov 28, 2023
02ab1ff
_is_hip_env replaced with hip_env
bmedishe Nov 28, 2023
9d2148f
tabs error
bmedishe Nov 29, 2023
8e0c0a1
runtime.hip_env replaced with runtime._is_hip_env
bmedishe Nov 29, 2023
d81e48c
update cudnn.pyx debug errors
bmedishe Nov 29, 2023
ee228f3
add miopen.pxd
bmedishe Nov 29, 2023
67b677d
update for cupy_miopen build
bmedishe Nov 29, 2023
e79cce9
update cudnn.pyx
bmedishe Nov 30, 2023
c1b5396
almost working build
bmedishe Dec 3, 2023
5f451a3
update got miopenGetVersion
bmedishe Dec 4, 2023
c712949
size_t to size_t*
bmedishe Dec 4, 2023
8541e90
update cupy_backends/cuda/libs/miopen.pyx
bmedishe Dec 4, 2023
028a475
update cudnn.pyx
bmedishe Dec 5, 2023
e09ddbd
comment out miopen unsupported apis
bmedishe Dec 5, 2023
93d5906
add dropout apis
bmedishe Dec 5, 2023
5ab3ece
update enum constants miopen cudnn
bmedishe Dec 7, 2023
30c13ae
from cupyx import cudnn does not throw error
bmedishe Dec 8, 2023
1b0b892
adding miopen apis
bmedishe Dec 13, 2023
d76814e
activation tests working
bmedishe Dec 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,460 changes: 748 additions & 712 deletions cupy_backends/cuda/libs/cudnn.pxd

Large diffs are not rendered by default.

5,030 changes: 2,516 additions & 2,514 deletions cupy_backends/cuda/libs/cudnn.pyx

Large diffs are not rendered by default.

624 changes: 624 additions & 0 deletions cupy_backends/cuda/libs/miopen.pxd

Large diffs are not rendered by default.

516 changes: 516 additions & 0 deletions cupy_backends/cuda/libs/miopen.pyx

Large diffs are not rendered by default.

14 changes: 6 additions & 8 deletions cupy_backends/cupy_cudnn.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// This file is a stub header file of cudnn for Read the Docs.


#ifndef INCLUDE_GUARD_CUPY_CUDNN_H
#define INCLUDE_GUARD_CUPY_CUDNN_H
#if CUPY_USE_HIP

#include "miopen/miopen.h"

#ifndef CUPY_NO_CUDA
#elif !defined(CUPY_NO_CUDA)

#include <cudnn.h>

Expand All @@ -12,21 +16,15 @@
#include "stub/cupy_cuda_common.h"
#include "stub/cupy_cudnn.h"

#else

#include "hip/cupy_hip_common.h"
#include "stub/cupy_cudnn.h"

#endif // #ifdef CUPY_NO_CUDA


///////////////////////////////////////////////////////////////////////////////
// Definitions are for compatibility with cuDNN v5 and v6.
///////////////////////////////////////////////////////////////////////////////

extern "C" {

#if defined(CUPY_NO_CUDA) || (CUDNN_VERSION < 6000)
#if !defined(CUPY_NO_CUDA) && (CUDNN_VERSION < 6000)

typedef enum {} cudnnRNNAlgo_t;
typedef enum {} cudnnReduceTensorOp_t;
Expand Down
21 changes: 21 additions & 0 deletions cupy_backends/cupy_miopen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// This file is a stub header file of cudnn for Read the Docs.


#ifndef INCLUDE_GUARD_CUPY_CUDNN_H
#define INCLUDE_GUARD_CUPY_CUDNN_H
#if CUPY_USE_HIP

#include <miopen/miopen.h>

#elif !defined(CUPY_NO_CUDA)

#include <cudnn.h>

#elif defined(CUPY_NO_CUDA)

#include "stub/cupy_cuda_common.h"
#include "stub/cupy_cudnn.h"


#endif // #ifdef CUPY_NO_CUDA
#endif // #ifndef INCLUDE_GUARD_CUPY_CUDNN_H
54 changes: 28 additions & 26 deletions cupyx/cudnn.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@ from cupy._core.core cimport _ndarray_base
from cupy._core cimport internal
from cupy.cuda cimport device
from cupy.cuda cimport memory as _memory
from cupy_backends.cuda.libs cimport cudnn
IF CUPY_HIP_VERSION != 0:
from cupy_backends.cuda.libs import miopen as cudnn
from cupy_backends.cuda.libs.cudnn import *
ELSE:
from cupy_backends.cuda.libs cimport cudnn

from cupy._core._ufuncs import elementwise_copy as _elementwise_copy
from cupy import _util

from cupy.cuda import cudnn as _py_cudnn
from cupy_backends.cuda.libs import cudnn as _cudnn


cdef int _cudnn_version = -1
Expand Down Expand Up @@ -109,11 +114,11 @@ cdef class Descriptor:
cpdef int get_data_type(dtype) except? -1:
cdef char t = ord(dtype.char)
if t == b'f':
return cudnn.CUDNN_DATA_FLOAT
return _cudnn.CUDNN_DATA_FLOAT
elif t == b'd':
return cudnn.CUDNN_DATA_DOUBLE
return _cudnn.CUDNN_DATA_DOUBLE
elif t == b'e':
return cudnn.CUDNN_DATA_HALF
return _cudnn.CUDNN_DATA_HALF
else:
raise TypeError('Dtype {} is not supported in cuDNN'.format(dtype))

Expand Down Expand Up @@ -153,9 +158,7 @@ cpdef _create_tensor_nd_descriptor(
desc, data_type, arr._shape.size(), <size_t>c_shape.data(),
<size_t>c_strides.data())


cpdef _create_tensor_descriptor(size_t desc, _ndarray_base arr,
int format=cudnn.CUDNN_TENSOR_NCHW):
cpdef _create_tensor_descriptor(size_t desc, _ndarray_base arr,int format=cudnn.miopenTensorNCHW):
if not arr._c_contiguous:
raise ValueError('cupyx.cudnn supports c-contiguous arrays only')
if arr._shape.size() == 4:
Expand All @@ -180,12 +183,12 @@ cpdef _create_tensor_descriptor_as4darray(size_t desc,
if arr._shape.size() > 0:
dim1 = arr._shape[0]
dim2 = arr.size // dim1
cudnn.setTensor4dDescriptor(desc, cudnn.CUDNN_TENSOR_NCHW, data_type,
cudnn.setTensor4dDescriptor(desc, _cudnn.CUDNN_TENSOR_NCHW, data_type,
dim1, dim2, 1, 1)


cpdef _create_filter_descriptor(
size_t desc, _ndarray_base arr, int format=cudnn.CUDNN_TENSOR_NCHW):
size_t desc, _ndarray_base arr, int format=_cudnn.CUDNN_TENSOR_NCHW):
cdef vector.vector[int] c_shape
cdef Py_ssize_t s, ndim = arr._shape.size()
data_type = get_data_type(arr.dtype)
Expand Down Expand Up @@ -268,7 +271,7 @@ cpdef _ndarray_base _ascontiguousarray_normalized_strides(_ndarray_base a):
return newarray


def create_tensor_descriptor(arr, format=cudnn.CUDNN_TENSOR_NCHW):
def create_tensor_descriptor(arr, format=_cudnn.CUDNN_TENSOR_NCHW):
desc = Descriptor(cudnn.createTensorDescriptor(),
_py_cudnn.destroyTensorDescriptor)
_create_tensor_descriptor(desc.value, arr, format)
Expand Down Expand Up @@ -306,15 +309,15 @@ def create_tensor_nd_descriptor(_ndarray_base arr):
return desc


def create_filter_descriptor(arr, format=cudnn.CUDNN_TENSOR_NCHW):
def create_filter_descriptor(arr, format=_cudnn.CUDNN_TENSOR_NCHW):
desc = Descriptor(cudnn.createFilterDescriptor(),
_py_cudnn.destroyFilterDescriptor)
_create_filter_descriptor(desc.value, arr, format)
return desc


def create_convolution_descriptor(pad, stride, dtype,
mode=cudnn.CUDNN_CROSS_CORRELATION,
mode=_cudnn.CUDNN_CROSS_CORRELATION,
dilation=None,
use_tensor_core=False,
groups=1):
Expand Down Expand Up @@ -616,7 +619,7 @@ def rnn_backward_weights_ex(
return dw


def create_activation_descriptor(mode, nan_prop_mode=cudnn.CUDNN_PROPAGATE_NAN,
def create_activation_descriptor(mode, nan_prop_mode=_cudnn.CUDNN_PROPAGATE_NAN,
coef=0.0):
desc = Descriptor(cudnn.createActivationDescriptor(),
_py_cudnn.destroyActivationDescriptor)
Expand Down Expand Up @@ -645,7 +648,7 @@ def activation_forward(_ndarray_base x, int mode, double coef=0.0):
try:
_create_tensor_descriptor_as4darray(desc, x)
cudnn.setActivationDescriptor(
act_desc, mode, cudnn.CUDNN_NOT_PROPAGATE_NAN, coef)
act_desc, mode, _cudnn.CUDNN_NOT_PROPAGATE_NAN, coef)
cudnn.activationForward_v4(
handle, act_desc, one, desc, x.data.ptr,
zero, desc, y.data.ptr)
Expand Down Expand Up @@ -773,13 +776,13 @@ def create_dropout_descriptor(
desc = Descriptor(cudnn.createDropoutDescriptor(),
_py_cudnn.destroyDropoutDescriptor)
cudnn.setDropoutDescriptor(desc.value, handle, dropout,
states, state_size_in_bytes, seed)
states, state_size_in_bytes, seed, False, False, 0)
return desc


def set_dropout_descriptor(desc, handle, dropout):
# When the fourth argument is NULL, random state is not updated.
cudnn.setDropoutDescriptor(desc.value, handle, dropout, 0, 0, 0)
cudnn.setDropoutDescriptor(desc.value, handle, dropout, 0, 0, 0, False, False, 0)


def _create_ctc_loss_descriptor(data_type):
Expand Down Expand Up @@ -1356,7 +1359,7 @@ cpdef _warn_algorithm_fwd(
.format(x.shape, W.shape, y.shape, conv_param[0], conv_param[1]),
_util.PerformanceWarning)


"""
cpdef _Algorithm _find_algorithm_fwd(
_ndarray_base x, _ndarray_base W, _ndarray_base y, tuple conv_param,
size_t handle, size_t x_desc, size_t filter_desc, size_t conv_desc,
Expand Down Expand Up @@ -1639,7 +1642,6 @@ cpdef _Algorithm _get_algorithm_bwd_data(
_get_algorithm_bwd_data_cache[key] = algo
return algo


cpdef bint _should_use_tensor_core(
tensor_core_mode, object dtype) except *:
if tensor_core_mode == 'auto':
Expand Down Expand Up @@ -1970,7 +1972,7 @@ def convolution_backward_data(
cudnn.destroyFilterDescriptor(filter_desc)
cudnn.destroyConvolutionDescriptor(conv_desc)


"""
def pooling_forward(
_ndarray_base x, _ndarray_base y,
tuple ksize, tuple stride, tuple pad, int mode):
Expand Down Expand Up @@ -2044,7 +2046,7 @@ def pooling_backward(

cdef _create_tensor_descriptor_for_bn(
size_t desc, _ndarray_base arr, bint is_for_conv2d,
int format=cudnn.CUDNN_TENSOR_NCHW):
int format=_cudnn.CUDNN_TENSOR_NCHW):
assert arr._c_contiguous
if is_for_conv2d:
_create_tensor_descriptor(desc, arr, format)
Expand Down Expand Up @@ -2077,7 +2079,7 @@ def batch_normalization_forward_training(
_ndarray_base running_mean, _ndarray_base running_var,
mean, inv_std, double eps, double decay,
bint is_for_conv2d, int cudnn_mode, bint debug,
int d_layout=cudnn.CUDNN_TENSOR_NCHW):
int d_layout=_cudnn.CUDNN_TENSOR_NCHW):

reserve_space, y, save_mean, save_inv_std = (
_batch_normalization_forward_training(
Expand Down Expand Up @@ -2106,7 +2108,7 @@ def batch_normalization_forward_training_ex(
_ndarray_base running_mean, _ndarray_base running_var,
mean, inv_std, double eps, double decay,
bint is_for_conv2d, int cudnn_mode, bint debug,
int d_layout=cudnn.CUDNN_TENSOR_NCHW):
int d_layout=_cudnn.CUDNN_TENSOR_NCHW):

reserve_space, y, save_mean, save_inv_std = (
_batch_normalization_forward_training(
Expand All @@ -2129,7 +2131,7 @@ cdef _batch_normalization_forward_training(
_ndarray_base running_mean, _ndarray_base running_var,
mean, inv_std, double eps, double decay,
bint is_for_conv2d, int cudnn_mode, bint debug,
int d_layout=cudnn.CUDNN_TENSOR_NCHW):
int d_layout=_cudnn.CUDNN_TENSOR_NCHW):

cdef _memory.MemoryPointer workspace = None
cdef _memory.MemoryPointer reserve_space = None
Expand Down Expand Up @@ -2282,7 +2284,7 @@ def batch_normalization_forward_inference(
_ndarray_base x, _ndarray_base gamma, _ndarray_base beta,
_ndarray_base mean, _ndarray_base var,
double eps, bint is_for_conv2d, int cudnn_mode,
int d_layout=cudnn.CUDNN_TENSOR_NCHW):
int d_layout=_cudnn.CUDNN_TENSOR_NCHW):
x = core._internal_ascontiguousarray(x)
dtype = x.dtype
y = _core.ndarray(x._shape, dtype)
Expand Down Expand Up @@ -2327,7 +2329,7 @@ def batch_normalization_backward(
_ndarray_base x, _ndarray_base gamma, _ndarray_base gy,
_ndarray_base mean, _ndarray_base inv_std,
double eps, bint is_for_conv2d, int cudnn_mode, bint debug,
int d_layout=cudnn.CUDNN_TENSOR_NCHW,
int d_layout=_cudnn.CUDNN_TENSOR_NCHW,
*,
_memory.MemoryPointer reserve_space=None,
):
Expand Down Expand Up @@ -2440,7 +2442,7 @@ def batch_normalization_backward(
return gx, ggamma, gbeta


def create_activation_descriptor(mode, relu_nan_opt=cudnn.CUDNN_PROPAGATE_NAN,
def create_activation_descriptor(mode, relu_nan_opt=_cudnn.CUDNN_PROPAGATE_NAN,
coef=0.0):
desc = Descriptor(cudnn.createActivationDescriptor(),
_py_cudnn.destroyActivationDescriptor)
Expand Down
5 changes: 5 additions & 0 deletions install/cupy_builder/_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ def get_features(ctx: Context) -> Dict[str, Feature]:
'cupyx.cusolver',
'cupy_backends.cuda.libs.curand_hip',
'cupy_backends.cuda.libs.nvrtc_hip',
'cupy_backends.cuda.libs.miopen',
'cupy_backends.cuda.libs.cudnn',
'cupyx.cudnn',
],
'include': [
'hip/hip_runtime_api.h',
Expand All @@ -175,6 +178,7 @@ def get_features(ctx: Context) -> Dict[str, Feature]:
'roctx.h',
'rocsolver/rocsolver.h' if rocm_version >= 560 else 'rocsolver.h',
'hipsolver/hipsolver.h' if rocm_version >= 560 else 'hipsolver.h',
'miopen/miopen.h',
],
'libraries': [
'amdhip64', # was hiprtc and hip_hcc before ROCm 3.8.0
Expand All @@ -188,6 +192,7 @@ def get_features(ctx: Context) -> Dict[str, Feature]:
'rocsolver',
'rocsparse',
'hipsolver',
'MIOpen',
],
'check_method': build.check_hip_version,
'version_method': build.get_hip_version,
Expand Down
7 changes: 0 additions & 7 deletions tests/cupyx_tests/test_cudnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

if cudnn_enabled:
modes = [
libcudnn.CUDNN_ACTIVATION_SIGMOID,
libcudnn.CUDNN_ACTIVATION_RELU,
libcudnn.CUDNN_ACTIVATION_TANH,
]
Expand All @@ -40,7 +39,6 @@
'dtype': [numpy.float32, numpy.float64],
'mode': modes,
}))
@pytest.mark.skipif(not cudnn_enabled, reason='cuDNN is not available')
class TestCudnnActivation:

@pytest.fixture(autouse=True)
Expand All @@ -60,7 +58,6 @@ def test_activation_backward(self):
'dtype': [numpy.float32, numpy.float64],
'mode': coef_modes,
}))
@pytest.mark.skipif(not cudnn_enabled, reason='cuDNN is not available')
class TestCudnnActivationCoef:

@pytest.fixture(autouse=True)
Expand All @@ -83,7 +80,6 @@ def test_activation_backward(self):
'ratio': [0.0, 0.1, 0.2, 0.5],
'seed': [0, 100]
}))
@pytest.mark.skipif(not cudnn_enabled, reason='cuDNN is not available')
class TestCudnnDropout:

@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -136,7 +132,6 @@ def test_dropout_seed(self):
'bias': [True, False],
'layout': layouts,
})))
@pytest.mark.skipif(not cudnn_enabled, reason='cuDNN is not available')
class TestConvolutionForward:

@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -224,7 +219,6 @@ def test_call(self):
'auto_tune': [True, False],
'deterministic': [True, False],
})))
@pytest.mark.skipif(not cudnn_enabled, reason='cuDNN is not available')
class TestConvolutionBackwardFilter:

@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -303,7 +297,6 @@ def test_call(self):
'deterministic': [True, False],
'bias': [True, False],
})))
@pytest.mark.skipif(not cudnn_enabled, reason='cuDNN is not available')
class TestConvolutionBackwardData:

@pytest.fixture(autouse=True)
Expand Down