-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmathtransform_cuda_kernel.cpp
More file actions
64 lines (46 loc) · 1.87 KB
/
mathtransform_cuda_kernel.cpp
File metadata and controls
64 lines (46 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#include "mathtransform_cuda_kernel.h"
//
/*!
* Constructs a new kernel object with the given CUDA program.
*
* @param program Pointer to the CUDA program used to build this new kernel's
* CUDA resource.
*/
MathTransform::CUDA::Kernel::Kernel(::CUDA::Program* program):
::CUDA::Kernel(program,"mathTransform")
{}
/*!
* Begins execution of this kernel object's CUDA kernel using the given CUDA
* stream and kernel arguments, returning the CUDA event associated with
* the kernel execution.
*
* @param stream The CUDA stream this kernel is executed on.
*
* @param buffer The CUDA memory buffer where a row is stored and will
* be transformed by this kernel execution.
*
* @param type The mathematical operation type that will be used for the transform.
*
* @param amount The amount that will be used for the mathematical transform.
*
* @return CUDA event associated with this kernel's execution.
*/
::CUDA::Event MathTransform::CUDA::Kernel::execute(const ::CUDA::Stream& stream, ::CUDA::Buffer<float>* buffer, Operation type, int amount)
{
// Set the arguments this CUDA kernel requires. This includes the global memory
// buffer where the row is held, the local memory buffer, the operation type,
// and the amount.
setBuffer(GlobalBuffer,buffer);
setArgument(GlobalSize,buffer->size());
setArgument(Type,static_cast<int>(type));
setArgument(Amount,amount);
// Set the work sizes. The global work size is determined by the row size, but
// it must also be a multiple of the local work size, so it is rounded up
// accordingly.
int blockSize = 1;
int gridSize = (buffer->size() + blockSize - 1) / blockSize;
setSizes(gridSize, blockSize);
// Execute this object's CUDA kernel with the given stream, returning its
// generated CUDA event.
return ::CUDA::Kernel::execute(stream);
}