-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsplit_allocation.cu
More file actions
54 lines (46 loc) · 1.9 KB
/
split_allocation.cu
File metadata and controls
54 lines (46 loc) · 1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#define CUB_STDERR
#include "memmove.cuh"
#include <cub/cub.cuh>
#include <iostream>
#include <thrust/count.h>
#include <thrust/device_vector.h>
#include <thrust/fill.h>
#include <thrust/host_vector.h>
#include <thrust/sequence.h>
int main()
{
using chunk_t = uint64_t;
using scan_tile_state_t = cub::ScanTileState<bool>;
constexpr std::int32_t block_threads = 256;
constexpr std::int32_t items_per_thread = 6;
// 10 GB (non-overlapping memory regions):
// constexpr std::size_t num_pool_bytes = (10UL * (1ULL << 30ULL));
// 1500 MB (use to test overlapping memory regions):
constexpr std::size_t num_pool_bytes = (1500UL * (1ULL << 20ULL));
constexpr std::size_t num_bytes = (500UL * (1ULL << 20ULL)); // 500 MB
// Allocate and initialize memory pool
thrust::device_vector<std::uint8_t> memory_pool(num_pool_bytes);
thrust::fill(memory_pool.begin(), memory_pool.end(), 0);
thrust::device_ptr<std::uint8_t> src = memory_pool.data() + (num_pool_bytes / 2);
thrust::sequence(src, src + num_bytes, 0);
thrust::device_ptr<std::uint8_t> dst = memory_pool.data() + num_bytes;
// Infoke the kernel
memmove_sirius<block_threads, items_per_thread>(thrust::raw_pointer_cast(dst),
thrust::raw_pointer_cast(src),
num_bytes);
CubDebugExit(cudaGetLastError());
CubDebugExit(cudaDeviceSynchronize());
std::cout << "Kernel done. Checking results...\n";
// Check the result count, otherwise assume here that it's correct (for now)
thrust::host_vector<std::uint8_t> dst_h{dst, dst + num_bytes};
for (std::uint32_t i = 0; i < num_bytes; ++i)
{
if (static_cast<std::uint8_t>(i) != dst_h[i])
{
std::cerr << "Expected: " << static_cast<std::uint8_t>(i) << "\nActual: " << dst_h[i] << "\n";
return EXIT_FAILURE;
}
}
std::cout << "SUCCESS\n";
return EXIT_SUCCESS;
}