diff --git a/021_array_sum.py b/021_array_sum.py index e9b35e1..c70f7b0 100644 --- a/021_array_sum.py +++ b/021_array_sum.py @@ -36,10 +36,10 @@ cl_arrays = [cl_a, cl_b, cl_c] for x in range(3): - cl.enqueue_copy(queue, cl_arrays[x], np_arrays[x]) + cl.enqueue_copy(queue, np_arrays[x], cl_arrays[x]) queue.finish() # Copy the data for array c back to the host for x in np_arrays: print(x) -# Print all three host arrays, to show sum() worked \ No newline at end of file +# Print all three host arrays, to show sum() worked diff --git a/030_timing.py b/030_timing.py index 19c361d..f628854 100644 --- a/030_timing.py +++ b/030_timing.py @@ -39,10 +39,10 @@ def gpu_array_sum(a, b): elapsed = 1e-9*(event.profile.end - event.profile.start) # Calculate the time it took to execute the kernel print("GPU Kernel Time: {0} s".format(elapsed)) # Print the time it took to execute the kernel c_gpu = np.empty_like(a) # Create an empty array the same size as array a - cl.enqueue_read_buffer(queue, c_buffer, c_gpu).wait() # Read back the data from GPU memory into array c_gpu + cl.enqueue_copy(queue, c_buffer, c_gpu).wait() # Read back the data from GPU memory into array c_gpu gpu_end_time = time() # Get the GPU end time print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time)) # Print the time the GPU program took, including both memory copies return c_gpu # Return the sum of the two arrays cpu_array_sum(a, b) # Call the function that sums two arrays on the CPU -gpu_array_sum(a, b) # Call the function that sums two arrays on the GPU \ No newline at end of file +gpu_array_sum(a, b) # Call the function that sums two arrays on the GPU diff --git a/040_elementwise.py b/040_elementwise.py index 358051c..5f6124d 100644 --- a/040_elementwise.py +++ b/040_elementwise.py @@ -7,14 +7,14 @@ context = cl.create_some_context() # Initialize the Context queue = cl.CommandQueue(context) # Instantiate a Queue -a = cl_array.to_device(queue, numpy.random.randn(10).astype(numpy.float32)) # Create a random pyopencl array -b = cl_array.to_device(queue, numpy.random.randn(10).astype(numpy.float32)) # Create a random pyopencl array +a = cl_array.to_device(queue, numpy.random.randn(50000).astype(numpy.float32)) # Create a random pyopencl array +b = cl_array.to_device(queue, numpy.random.randn(50000).astype(numpy.float32)) # Create a random pyopencl array c = cl_array.empty_like(a) # Create an empty pyopencl destination array sum = cl.elementwise.ElementwiseKernel(context, "float *a, float *b, float *c", "c[i] = a[i] + b[i]", "sum") # Create an elementwise kernel object # - Arguments: a string formatted as a C argument list -# - Operation: a snippet of C that carries out the desired map operatino +# - Operation: a snippet of C that carries out the desired map operation # - Name: the fuction name as which the kernel is compiled sum(a, b, c) # Call the elementwise kernel @@ -22,4 +22,4 @@ print("a: {}".format(a)) print("b: {}".format(b)) print("c: {}".format(c)) -# Print all three arrays, to show sum() worked \ No newline at end of file +# Print all three arrays, to show sum() worked diff --git a/README.md b/README.md index 5345fb7..6c985e8 100644 --- a/README.md +++ b/README.md @@ -11,3 +11,5 @@ PyOpenCL is a tool that is worth learning. Python allows exceptional clarity-of - 010 Introspection - Find out about your computer's OpenCL situation - 020 Array Sum - Use OpenCL To Add Two Large Random Arrays - Hiding Details - 021 Array Sum - Use OpenCL To Add Two Large Random Arrays - Showing Details +- 030 Timing - Compare performance of a loop in pure Python versus OpenCL +- 040 Elementwise - Use PyOpenCL arrays and elementwise to add two large random arrays