Skip to content

Commit 087fe8c

Browse files
committed
fix: enforce naming conventions in cuda_bindings examples
Keep pep8-naming checks active in cuda_bindings examples so local identifiers follow snake_case, while retaining targeted naming-rule exceptions for non-example cuda_bindings paths that mirror CUDA/setuptools APIs. Made-with: Cursor
1 parent 2321c17 commit 087fe8c

17 files changed

+1140
-1124
lines changed

cuda_bindings/examples/0_Introduction/clock_nvrtc_test.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77
from common import common
8-
from common.helper_cuda import checkCudaErrors, findCudaDevice
8+
from common.helper_cuda import check_cuda_errors, find_cuda_device
99

1010
from cuda.bindings import driver as cuda
1111

@@ -50,8 +50,8 @@
5050
}
5151
"""
5252

53-
NUM_BLOCKS = 64
54-
NUM_THREADS = 256
53+
num_blocks = 64
54+
num_threads = 256
5555

5656

5757
def elems_to_bytes(nelems, dt):
@@ -64,28 +64,28 @@ def main():
6464
if platform.machine() == "armv7l":
6565
pytest.skip("clock_nvrtc is not supported on ARMv7")
6666

67-
timer = np.empty(NUM_BLOCKS * 2, dtype="int64")
68-
hinput = np.empty(NUM_THREADS * 2, dtype="float32")
67+
timer = np.empty(num_blocks * 2, dtype="int64")
68+
hinput = np.empty(num_threads * 2, dtype="float32")
6969

70-
for i in range(NUM_THREADS * 2):
70+
for i in range(num_threads * 2):
7171
hinput[i] = i
7272

73-
devID = findCudaDevice()
74-
kernelHelper = common.KernelHelper(clock_nvrtc, devID)
75-
kernel_addr = kernelHelper.getFunction(b"timedReduction")
73+
dev_id = find_cuda_device()
74+
kernel_helper = common.KernelHelper(clock_nvrtc, dev_id)
75+
kernel_addr = kernel_helper.get_function(b"timedReduction")
7676

77-
dinput = checkCudaErrors(cuda.cuMemAlloc(hinput.nbytes))
78-
doutput = checkCudaErrors(cuda.cuMemAlloc(elems_to_bytes(NUM_BLOCKS, np.float32)))
79-
dtimer = checkCudaErrors(cuda.cuMemAlloc(timer.nbytes))
80-
checkCudaErrors(cuda.cuMemcpyHtoD(dinput, hinput, hinput.nbytes))
77+
dinput = check_cuda_errors(cuda.cuMemAlloc(hinput.nbytes))
78+
doutput = check_cuda_errors(cuda.cuMemAlloc(elems_to_bytes(num_blocks, np.float32)))
79+
dtimer = check_cuda_errors(cuda.cuMemAlloc(timer.nbytes))
80+
check_cuda_errors(cuda.cuMemcpyHtoD(dinput, hinput, hinput.nbytes))
8181

8282
args = ((dinput, doutput, dtimer), (None, None, None))
83-
shared_memory_nbytes = elems_to_bytes(2 * NUM_THREADS, np.float32)
83+
shared_memory_nbytes = elems_to_bytes(2 * num_threads, np.float32)
8484

85-
grid_dims = (NUM_BLOCKS, 1, 1)
86-
block_dims = (NUM_THREADS, 1, 1)
85+
grid_dims = (num_blocks, 1, 1)
86+
block_dims = (num_threads, 1, 1)
8787

88-
checkCudaErrors(
88+
check_cuda_errors(
8989
cuda.cuLaunchKernel(
9090
kernel_addr,
9191
*grid_dims, # grid dim
@@ -97,19 +97,19 @@ def main():
9797
)
9898
) # arguments
9999

100-
checkCudaErrors(cuda.cuCtxSynchronize())
101-
checkCudaErrors(cuda.cuMemcpyDtoH(timer, dtimer, timer.nbytes))
102-
checkCudaErrors(cuda.cuMemFree(dinput))
103-
checkCudaErrors(cuda.cuMemFree(doutput))
104-
checkCudaErrors(cuda.cuMemFree(dtimer))
100+
check_cuda_errors(cuda.cuCtxSynchronize())
101+
check_cuda_errors(cuda.cuMemcpyDtoH(timer, dtimer, timer.nbytes))
102+
check_cuda_errors(cuda.cuMemFree(dinput))
103+
check_cuda_errors(cuda.cuMemFree(doutput))
104+
check_cuda_errors(cuda.cuMemFree(dtimer))
105105

106-
avgElapsedClocks = 0.0
106+
avg_elapsed_clocks = 0.0
107107

108-
for i in range(NUM_BLOCKS):
109-
avgElapsedClocks += timer[i + NUM_BLOCKS] - timer[i]
108+
for i in range(num_blocks):
109+
avg_elapsed_clocks += timer[i + num_blocks] - timer[i]
110110

111-
avgElapsedClocks = avgElapsedClocks / NUM_BLOCKS
112-
print(f"Average clocks/block = {avgElapsedClocks}")
111+
avg_elapsed_clocks = avg_elapsed_clocks / num_blocks
112+
print(f"Average clocks/block = {avg_elapsed_clocks}")
113113

114114

115115
if __name__ == "__main__":

cuda_bindings/examples/0_Introduction/simpleCubemapTexture_test.py

Lines changed: 63 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77

88
import numpy as np
99
from common import common
10-
from common.helper_cuda import checkCudaErrors, findCudaDevice
10+
from common.helper_cuda import check_cuda_errors, find_cuda_device
1111

1212
from cuda.bindings import driver as cuda
1313
from cuda.bindings import runtime as cudart
1414

15-
simpleCubemapTexture = """\
15+
simple_cubemap_texture = """\
1616
extern "C"
1717
__global__ void transformKernel(float *g_odata, int width, cudaTextureObject_t tex)
1818
{
@@ -83,14 +83,14 @@
8383

8484
def main():
8585
# Use command-line specified CUDA device, otherwise use device with highest Gflops/s
86-
devID = findCudaDevice()
86+
dev_id = find_cuda_device()
8787

8888
# Get number of SMs on this GPU
89-
deviceProps = checkCudaErrors(cudart.cudaGetDeviceProperties(devID))
89+
device_props = check_cuda_errors(cudart.cudaGetDeviceProperties(dev_id))
9090
print(
91-
f"CUDA device [{deviceProps.name}] has {deviceProps.multiProcessorCount} Multi-Processors SM {deviceProps.major}.{deviceProps.minor}"
91+
f"CUDA device [{device_props.name}] has {device_props.multiProcessorCount} Multi-Processors SM {device_props.major}.{device_props.minor}"
9292
)
93-
if deviceProps.major < 2:
93+
if device_props.major < 2:
9494
import pytest
9595

9696
pytest.skip("Test requires SM 2.0 or higher for support of Texture Arrays.")
@@ -107,15 +107,15 @@ def main():
107107
h_data_ref = np.repeat(np.arange(num_layers, dtype=h_data.dtype), cubemap_size) - h_data
108108

109109
# Allocate device memory for result
110-
d_data = checkCudaErrors(cudart.cudaMalloc(size))
110+
d_data = check_cuda_errors(cudart.cudaMalloc(size))
111111

112112
# Allocate array and copy image data
113-
channelDesc = checkCudaErrors(
113+
channel_desc = check_cuda_errors(
114114
cudart.cudaCreateChannelDesc(32, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindFloat)
115115
)
116-
cu_3darray = checkCudaErrors(
116+
cu_3darray = check_cuda_errors(
117117
cudart.cudaMalloc3DArray(
118-
channelDesc,
118+
channel_desc,
119119
cudart.make_cudaExtent(width, width, num_faces),
120120
cudart.cudaArrayCubemap,
121121
)
@@ -128,90 +128,90 @@ def main():
128128
myparms.dstArray = cu_3darray
129129
myparms.extent = cudart.make_cudaExtent(width, width, num_faces)
130130
myparms.kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
131-
checkCudaErrors(cudart.cudaMemcpy3D(myparms))
132-
133-
texRes = cudart.cudaResourceDesc()
134-
texRes.resType = cudart.cudaResourceType.cudaResourceTypeArray
135-
texRes.res.array.array = cu_3darray
136-
137-
texDescr = cudart.cudaTextureDesc()
138-
texDescr.normalizedCoords = True
139-
texDescr.filterMode = cudart.cudaTextureFilterMode.cudaFilterModeLinear
140-
texDescr.addressMode[0] = cudart.cudaTextureAddressMode.cudaAddressModeWrap
141-
texDescr.addressMode[1] = cudart.cudaTextureAddressMode.cudaAddressModeWrap
142-
texDescr.addressMode[2] = cudart.cudaTextureAddressMode.cudaAddressModeWrap
143-
texDescr.readMode = cudart.cudaTextureReadMode.cudaReadModeElementType
144-
145-
tex = checkCudaErrors(cudart.cudaCreateTextureObject(texRes, texDescr, None))
146-
dimBlock = cudart.dim3()
147-
dimBlock.x = 8
148-
dimBlock.y = 8
149-
dimBlock.z = 1
150-
dimGrid = cudart.dim3()
151-
dimGrid.x = width / dimBlock.x
152-
dimGrid.y = width / dimBlock.y
153-
dimGrid.z = 1
131+
check_cuda_errors(cudart.cudaMemcpy3D(myparms))
132+
133+
tex_res = cudart.cudaResourceDesc()
134+
tex_res.resType = cudart.cudaResourceType.cudaResourceTypeArray
135+
tex_res.res.array.array = cu_3darray
136+
137+
tex_descr = cudart.cudaTextureDesc()
138+
tex_descr.normalizedCoords = True
139+
tex_descr.filterMode = cudart.cudaTextureFilterMode.cudaFilterModeLinear
140+
tex_descr.addressMode[0] = cudart.cudaTextureAddressMode.cudaAddressModeWrap
141+
tex_descr.addressMode[1] = cudart.cudaTextureAddressMode.cudaAddressModeWrap
142+
tex_descr.addressMode[2] = cudart.cudaTextureAddressMode.cudaAddressModeWrap
143+
tex_descr.readMode = cudart.cudaTextureReadMode.cudaReadModeElementType
144+
145+
tex = check_cuda_errors(cudart.cudaCreateTextureObject(tex_res, tex_descr, None))
146+
dim_block = cudart.dim3()
147+
dim_block.x = 8
148+
dim_block.y = 8
149+
dim_block.z = 1
150+
dim_grid = cudart.dim3()
151+
dim_grid.x = width / dim_block.x
152+
dim_grid.y = width / dim_block.y
153+
dim_grid.z = 1
154154

155155
print(
156-
f"Covering Cubemap data array of {width}~3 x {num_layers}: Grid size is {dimGrid.x} x {dimGrid.y}, each block has 8 x 8 threads"
156+
f"Covering Cubemap data array of {width}~3 x {num_layers}: Grid size is {dim_grid.x} x {dim_grid.y}, each block has 8 x 8 threads"
157157
)
158158

159-
kernelHelper = common.KernelHelper(simpleCubemapTexture, devID)
160-
_transformKernel = kernelHelper.getFunction(b"transformKernel")
161-
kernelArgs = ((d_data, width, tex), (ctypes.c_void_p, ctypes.c_int, None))
162-
checkCudaErrors(
159+
kernel_helper = common.KernelHelper(simple_cubemap_texture, dev_id)
160+
_transform_kernel = kernel_helper.get_function(b"transformKernel")
161+
kernel_args = ((d_data, width, tex), (ctypes.c_void_p, ctypes.c_int, None))
162+
check_cuda_errors(
163163
cuda.cuLaunchKernel(
164-
_transformKernel,
165-
dimGrid.x,
166-
dimGrid.y,
167-
dimGrid.z, # grid dim
168-
dimBlock.x,
169-
dimBlock.y,
170-
dimBlock.z, # block dim
164+
_transform_kernel,
165+
dim_grid.x,
166+
dim_grid.y,
167+
dim_grid.z, # grid dim
168+
dim_block.x,
169+
dim_block.y,
170+
dim_block.z, # block dim
171171
0,
172172
0, # shared mem and stream
173-
kernelArgs,
173+
kernel_args,
174174
0,
175175
)
176176
) # arguments
177177

178-
checkCudaErrors(cudart.cudaDeviceSynchronize())
178+
check_cuda_errors(cudart.cudaDeviceSynchronize())
179179

180180
start = time.time()
181181

182182
# Execute the kernel
183-
checkCudaErrors(
183+
check_cuda_errors(
184184
cuda.cuLaunchKernel(
185-
_transformKernel,
186-
dimGrid.x,
187-
dimGrid.y,
188-
dimGrid.z, # grid dim
189-
dimBlock.x,
190-
dimBlock.y,
191-
dimBlock.z, # block dim
185+
_transform_kernel,
186+
dim_grid.x,
187+
dim_grid.y,
188+
dim_grid.z, # grid dim
189+
dim_block.x,
190+
dim_block.y,
191+
dim_block.z, # block dim
192192
0,
193193
0, # shared mem and stream
194-
kernelArgs,
194+
kernel_args,
195195
0,
196196
)
197197
) # arguments
198198

199-
checkCudaErrors(cudart.cudaDeviceSynchronize())
199+
check_cuda_errors(cudart.cudaDeviceSynchronize())
200200
stop = time.time()
201201
print(f"Processing time: {stop - start:.3f} msec")
202202
print(f"{cubemap_size / ((stop - start + 1) / 1000.0) / 1e6:.2f} Mtexlookups/sec")
203203

204204
# Allocate mem for the result on host side
205205
h_odata = np.empty_like(h_data)
206206
# Copy result from device to host
207-
checkCudaErrors(cudart.cudaMemcpy(h_odata, d_data, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost))
207+
check_cuda_errors(cudart.cudaMemcpy(h_odata, d_data, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost))
208208

209-
checkCudaErrors(cudart.cudaDestroyTextureObject(tex))
210-
checkCudaErrors(cudart.cudaFree(d_data))
211-
checkCudaErrors(cudart.cudaFreeArray(cu_3darray))
209+
check_cuda_errors(cudart.cudaDestroyTextureObject(tex))
210+
check_cuda_errors(cudart.cudaFree(d_data))
211+
check_cuda_errors(cudart.cudaFreeArray(cu_3darray))
212212

213-
MIN_EPSILON_ERROR = 5.0e-3
214-
if np.max(np.abs(h_odata - h_data_ref)) > MIN_EPSILON_ERROR:
213+
min_epsilon_error = 5.0e-3
214+
if np.max(np.abs(h_odata - h_data_ref)) > min_epsilon_error:
215215
print("Failed", file=sys.stderr)
216216
sys.exit(1)
217217

0 commit comments

Comments
 (0)