-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsquare_numbers.cu
More file actions
64 lines (51 loc) · 1.36 KB
/
square_numbers.cu
File metadata and controls
64 lines (51 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#include <stdio.h>
__global__ void square_1d_vector(float * d_out , float * d_in)
{
int idx = threadIdx.x;
d_out[idx] = d_in[idx] * d_in[idx];
}
void call_1d_parallel_computing(void)
{
const int ARRAY_SIZE = 32;
const int ARRAY_BYTES = ARRAY_SIZE * sizeof(float);
//Host mem arrays
float h_1d_in[ARRAY_SIZE];
float h_1d_out[ARRAY_SIZE];
printf("Original Array: \n");
for(int i = 0; i < ARRAY_SIZE;i++)
{
h_1d_in[i] = float(i);
printf("%d " , i);
}
printf("\n");
//Device mem arrays
float * d_1d_in;
float * d_1d_out;
cudaMalloc((void **) &d_1d_in,ARRAY_BYTES);
cudaMalloc((void **) &d_1d_out, ARRAY_BYTES);
cudaMemcpy(d_1d_in,h_1d_in,ARRAY_BYTES,cudaMemcpyHostToDevice);
square_1d_vector<<< 1, ARRAY_SIZE >>>(d_1d_out , d_1d_in);
cudaMemcpy(h_1d_out,d_1d_out,ARRAY_BYTES,cudaMemcpyDeviceToHost);
printf("Square Array : \n");
for(int i = 0;i < ARRAY_SIZE; i++)
{
printf("%d ",int(h_1d_out[i]));
}
printf("\n");
cudaFree(d_1d_in);
cudaFree(d_1d_out);
}
void print_header(void)
{
printf(" ============================================== \n");
printf(" ===== PARALLEL PROGRAMMING (1D - VECTOR) ===== \n");
printf(" ============================================== \n");
printf("\n");
}
int main(int argc,char** argv)
{
print_header();
printf("Computing square of numbers in some array ... \n");
call_1d_parallel_computing();
return 0;
}