-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathse_2.cu
More file actions
95 lines (84 loc) · 2.81 KB
/
se_2.cu
File metadata and controls
95 lines (84 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
long datasize;
cudaError_t searchKeyword(int *result, char *data, char *keyword);
__global__ void searchKeywordKernel(int *result, char *data, char *keyword,int datasize)
{
int i = blockIdx.x*threadIdx.x;
// Detect the first matching character
if(i<datasize){
if (data[i] == keyword[0]) {
// Loop through next keyword character
for (int j=1; j<3; i++) {
if (data[i+j] != keyword[j])
break;
else
// Store the first matching character to the result list
result[i] = 1;
}
}
}
}
int main()
{
FILE *f = fopen("input.txt", "rb");
fseek(f, 0, SEEK_END);
datasize = ftell(f);
fseek(f, 0, SEEK_SET); //same as rewind(f);
fclose(f);
char data[datasize];
fread(data, datasize, 1, f);
// char keyword[8]={'S','H','E','R','L','O','C','K'};// char pattern
char keyword[8]={'s'};
int result[datasize];
// Set false value in result array
memset(result, 0, datasize);
// Generate input data
// Search keyword in parallel.
cudaError_t cudaStatus = searchKeyword(result, data, keyword);
// Print out the string match result position
int total_matches = 0;
for (int i=0; i<datasize; i++) {
if (result[i] == 1) {
printf("Character found at position % i\n", i);
total_matches++;
}
}
printf("Total matches = %d\n", total_matches);
cudaStatus = cudaDeviceReset();
system("pause");
return 0;
}
// Helper function for using CUDA to search a list of characters in parallel.
cudaError_t searchKeyword(int *result, char *data, char *keyword)
{
char *dev_data = 0;
char *dev_keyword = 0;
int *dev_result = 0;
cudaError_t cudaStatus;
cudaStatus = cudaSetDevice(0);
cudaStatus = cudaMalloc((void**)&dev_result, datasize * sizeof(int));
cudaStatus = cudaMalloc((void**)&dev_data, datasize * sizeof(char));
cudaStatus = cudaMalloc((void**)&dev_keyword, datasize * sizeof(char));
cudaStatus = cudaMemcpy(dev_data, data, datasize * sizeof(char), cudaMemcpyHostToDevice);
cudaStatus = cudaMemcpy(dev_keyword, keyword, datasize * sizeof(char), cudaMemcpyHostToDevice);
int bk = (int) (datasize/512);
int gputhreads = 512;
if (bk > 0) {
gputhreads = 512;
}
else{
bk = 1;
gputhreads = datasize;
}
searchKeywordKernel<<<bk,gputhreads >>>(dev_result, dev_data, dev_keyword,datasize);
cudaStatus = cudaDeviceSynchronize();
cudaStatus = cudaMemcpy(result, dev_result, datasize * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(dev_result);
cudaFree(dev_data);
cudaFree(dev_keyword);
return cudaStatus;
}