77
88import numpy as np
99from common import common
10- from common .helper_cuda import checkCudaErrors , findCudaDevice
10+ from common .helper_cuda import check_cuda_errors , find_cuda_device
1111
1212from cuda .bindings import driver as cuda
1313from cuda .bindings import runtime as cudart
1414
15- simpleCubemapTexture = """\
15+ simple_cubemap_texture = """\
1616 extern "C"
1717__global__ void transformKernel(float *g_odata, int width, cudaTextureObject_t tex)
1818{
8383
8484def main ():
8585 # Use command-line specified CUDA device, otherwise use device with highest Gflops/s
86- devID = findCudaDevice ()
86+ dev_id = find_cuda_device ()
8787
8888 # Get number of SMs on this GPU
89- deviceProps = checkCudaErrors (cudart .cudaGetDeviceProperties (devID ))
89+ device_props = check_cuda_errors (cudart .cudaGetDeviceProperties (dev_id ))
9090 print (
91- f"CUDA device [{ deviceProps .name } ] has { deviceProps .multiProcessorCount } Multi-Processors SM { deviceProps .major } .{ deviceProps .minor } "
91+ f"CUDA device [{ device_props .name } ] has { device_props .multiProcessorCount } Multi-Processors SM { device_props .major } .{ device_props .minor } "
9292 )
93- if deviceProps .major < 2 :
93+ if device_props .major < 2 :
9494 import pytest
9595
9696 pytest .skip ("Test requires SM 2.0 or higher for support of Texture Arrays." )
@@ -107,15 +107,15 @@ def main():
107107 h_data_ref = np .repeat (np .arange (num_layers , dtype = h_data .dtype ), cubemap_size ) - h_data
108108
109109 # Allocate device memory for result
110- d_data = checkCudaErrors (cudart .cudaMalloc (size ))
110+ d_data = check_cuda_errors (cudart .cudaMalloc (size ))
111111
112112 # Allocate array and copy image data
113- channelDesc = checkCudaErrors (
113+ channel_desc = check_cuda_errors (
114114 cudart .cudaCreateChannelDesc (32 , 0 , 0 , 0 , cudart .cudaChannelFormatKind .cudaChannelFormatKindFloat )
115115 )
116- cu_3darray = checkCudaErrors (
116+ cu_3darray = check_cuda_errors (
117117 cudart .cudaMalloc3DArray (
118- channelDesc ,
118+ channel_desc ,
119119 cudart .make_cudaExtent (width , width , num_faces ),
120120 cudart .cudaArrayCubemap ,
121121 )
@@ -128,90 +128,90 @@ def main():
128128 myparms .dstArray = cu_3darray
129129 myparms .extent = cudart .make_cudaExtent (width , width , num_faces )
130130 myparms .kind = cudart .cudaMemcpyKind .cudaMemcpyHostToDevice
131- checkCudaErrors (cudart .cudaMemcpy3D (myparms ))
132-
133- texRes = cudart .cudaResourceDesc ()
134- texRes .resType = cudart .cudaResourceType .cudaResourceTypeArray
135- texRes .res .array .array = cu_3darray
136-
137- texDescr = cudart .cudaTextureDesc ()
138- texDescr .normalizedCoords = True
139- texDescr .filterMode = cudart .cudaTextureFilterMode .cudaFilterModeLinear
140- texDescr .addressMode [0 ] = cudart .cudaTextureAddressMode .cudaAddressModeWrap
141- texDescr .addressMode [1 ] = cudart .cudaTextureAddressMode .cudaAddressModeWrap
142- texDescr .addressMode [2 ] = cudart .cudaTextureAddressMode .cudaAddressModeWrap
143- texDescr .readMode = cudart .cudaTextureReadMode .cudaReadModeElementType
144-
145- tex = checkCudaErrors (cudart .cudaCreateTextureObject (texRes , texDescr , None ))
146- dimBlock = cudart .dim3 ()
147- dimBlock .x = 8
148- dimBlock .y = 8
149- dimBlock .z = 1
150- dimGrid = cudart .dim3 ()
151- dimGrid .x = width / dimBlock .x
152- dimGrid .y = width / dimBlock .y
153- dimGrid .z = 1
131+ check_cuda_errors (cudart .cudaMemcpy3D (myparms ))
132+
133+ tex_res = cudart .cudaResourceDesc ()
134+ tex_res .resType = cudart .cudaResourceType .cudaResourceTypeArray
135+ tex_res .res .array .array = cu_3darray
136+
137+ tex_descr = cudart .cudaTextureDesc ()
138+ tex_descr .normalizedCoords = True
139+ tex_descr .filterMode = cudart .cudaTextureFilterMode .cudaFilterModeLinear
140+ tex_descr .addressMode [0 ] = cudart .cudaTextureAddressMode .cudaAddressModeWrap
141+ tex_descr .addressMode [1 ] = cudart .cudaTextureAddressMode .cudaAddressModeWrap
142+ tex_descr .addressMode [2 ] = cudart .cudaTextureAddressMode .cudaAddressModeWrap
143+ tex_descr .readMode = cudart .cudaTextureReadMode .cudaReadModeElementType
144+
145+ tex = check_cuda_errors (cudart .cudaCreateTextureObject (tex_res , tex_descr , None ))
146+ dim_block = cudart .dim3 ()
147+ dim_block .x = 8
148+ dim_block .y = 8
149+ dim_block .z = 1
150+ dim_grid = cudart .dim3 ()
151+ dim_grid .x = width / dim_block .x
152+ dim_grid .y = width / dim_block .y
153+ dim_grid .z = 1
154154
155155 print (
156- f"Covering Cubemap data array of { width } ~3 x { num_layers } : Grid size is { dimGrid .x } x { dimGrid .y } , each block has 8 x 8 threads"
156+ f"Covering Cubemap data array of { width } ~3 x { num_layers } : Grid size is { dim_grid .x } x { dim_grid .y } , each block has 8 x 8 threads"
157157 )
158158
159- kernelHelper = common .KernelHelper (simpleCubemapTexture , devID )
160- _transformKernel = kernelHelper . getFunction (b"transformKernel" )
161- kernelArgs = ((d_data , width , tex ), (ctypes .c_void_p , ctypes .c_int , None ))
162- checkCudaErrors (
159+ kernel_helper = common .KernelHelper (simple_cubemap_texture , dev_id )
160+ _transform_kernel = kernel_helper . get_function (b"transformKernel" )
161+ kernel_args = ((d_data , width , tex ), (ctypes .c_void_p , ctypes .c_int , None ))
162+ check_cuda_errors (
163163 cuda .cuLaunchKernel (
164- _transformKernel ,
165- dimGrid .x ,
166- dimGrid .y ,
167- dimGrid .z , # grid dim
168- dimBlock .x ,
169- dimBlock .y ,
170- dimBlock .z , # block dim
164+ _transform_kernel ,
165+ dim_grid .x ,
166+ dim_grid .y ,
167+ dim_grid .z , # grid dim
168+ dim_block .x ,
169+ dim_block .y ,
170+ dim_block .z , # block dim
171171 0 ,
172172 0 , # shared mem and stream
173- kernelArgs ,
173+ kernel_args ,
174174 0 ,
175175 )
176176 ) # arguments
177177
178- checkCudaErrors (cudart .cudaDeviceSynchronize ())
178+ check_cuda_errors (cudart .cudaDeviceSynchronize ())
179179
180180 start = time .time ()
181181
182182 # Execute the kernel
183- checkCudaErrors (
183+ check_cuda_errors (
184184 cuda .cuLaunchKernel (
185- _transformKernel ,
186- dimGrid .x ,
187- dimGrid .y ,
188- dimGrid .z , # grid dim
189- dimBlock .x ,
190- dimBlock .y ,
191- dimBlock .z , # block dim
185+ _transform_kernel ,
186+ dim_grid .x ,
187+ dim_grid .y ,
188+ dim_grid .z , # grid dim
189+ dim_block .x ,
190+ dim_block .y ,
191+ dim_block .z , # block dim
192192 0 ,
193193 0 , # shared mem and stream
194- kernelArgs ,
194+ kernel_args ,
195195 0 ,
196196 )
197197 ) # arguments
198198
199- checkCudaErrors (cudart .cudaDeviceSynchronize ())
199+ check_cuda_errors (cudart .cudaDeviceSynchronize ())
200200 stop = time .time ()
201201 print (f"Processing time: { stop - start :.3f} msec" )
202202 print (f"{ cubemap_size / ((stop - start + 1 ) / 1000.0 ) / 1e6 :.2f} Mtexlookups/sec" )
203203
204204 # Allocate mem for the result on host side
205205 h_odata = np .empty_like (h_data )
206206 # Copy result from device to host
207- checkCudaErrors (cudart .cudaMemcpy (h_odata , d_data , size , cudart .cudaMemcpyKind .cudaMemcpyDeviceToHost ))
207+ check_cuda_errors (cudart .cudaMemcpy (h_odata , d_data , size , cudart .cudaMemcpyKind .cudaMemcpyDeviceToHost ))
208208
209- checkCudaErrors (cudart .cudaDestroyTextureObject (tex ))
210- checkCudaErrors (cudart .cudaFree (d_data ))
211- checkCudaErrors (cudart .cudaFreeArray (cu_3darray ))
209+ check_cuda_errors (cudart .cudaDestroyTextureObject (tex ))
210+ check_cuda_errors (cudart .cudaFree (d_data ))
211+ check_cuda_errors (cudart .cudaFreeArray (cu_3darray ))
212212
213- MIN_EPSILON_ERROR = 5.0e-3
214- if np .max (np .abs (h_odata - h_data_ref )) > MIN_EPSILON_ERROR :
213+ min_epsilon_error = 5.0e-3
214+ if np .max (np .abs (h_odata - h_data_ref )) > min_epsilon_error :
215215 print ("Failed" , file = sys .stderr )
216216 sys .exit (1 )
217217
0 commit comments