forked from HiPerCoRe/cuFFTAdvisor
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsizeOptimizer.h
More file actions
99 lines (86 loc) · 3.3 KB
/
sizeOptimizer.h
File metadata and controls
99 lines (86 loc) · 3.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#ifndef CUFFTADVISOR_SIZEOPTIMIZER_H_
#define CUFFTADVISOR_SIZEOPTIMIZER_H_
#include <algorithm>
#include <cmath>
#include <set>
#include <vector>
#include "benchmarker.h"
#include "generalTransform.h"
#include "transformGenerator.h"
#include "utils.h"
namespace cuFFTAdvisor {
class SizeOptimizer {
private:
struct Polynom {
size_t value;
int invocations;
int noOfPrimes;
size_t exponent2;
size_t exponent3;
size_t exponent5;
size_t exponent7;
};
struct valueComparator {
bool asc;
valueComparator(bool asc) : asc(asc) {};
bool operator()(const Polynom &l, const Polynom &r) {
if (asc) return l.value < r.value;
return l.value > r.value;
}
};
struct kernelCallComparator {
bool operator()(Triplet<Polynom *> *l, Triplet<Polynom *> *r) {
int lval = l->fst->invocations + l->snd->invocations + l->rd->invocations;
int rval = r->fst->invocations + r->snd->invocations + r->rd->invocations;
return lval < rval;
}
};
public:
SizeOptimizer(CudaVersion::CudaVersion version, GeneralTransform &tr,
bool allowTrans);
std::vector<const Transform *> *optimize(size_t nBest, int maxPercIncrease,
int maxMemMB, bool squareOnly,
bool crop);
private:
int getNoOfPrimes(Polynom &poly);
int getInvocations(int maxPower, size_t num);
std::vector<Triplet<int> *> optimize(GeneralTransform &tr, size_t nBest,
int maxPercIncrease);
int getInvocations(Polynom &poly, bool isFloat);
int getInvocationsV8(Polynom &poly, bool isFloat);
std::set<Polynom, valueComparator> *filterOptimal(
std::vector<Polynom> *input, bool crop);
std::vector<Polynom> *generatePolys(size_t num, bool isFloat, bool crop);
std::vector<GeneralTransform> *optimizeXYZ(GeneralTransform &tr, size_t nBest,
int maxPercIncrease, bool squareOnly,
bool crop);
std::vector<const Transform *> *optimizeN(
std::vector<GeneralTransform> *transforms, size_t maxMem, size_t nBest);
void collapseBatched(GeneralTransform >, size_t maxMem,
std::vector<const Transform *> *result);
static bool perfSort(const Transform *l, const Transform *r);
bool collapse(GeneralTransform >, bool isBatched, size_t N, size_t maxMemMB,
std::vector<const Transform *> *result);
size_t getMaxSize(GeneralTransform &tr, int maxPercIncrease, bool squareOnly,
bool crop);
size_t getMinSize(GeneralTransform &tr, int maxPercDecrease, bool crop);
static bool sizeSort(const Transform *l, const Transform *r);
private:
std::vector<GeneralTransform> input;
const CudaVersion::CudaVersion version;
const double log_2;
const double log_3;
const double log_5;
const double log_7;
static const int V8_RADIX_2_MAX_SP = 10;
static const int V8_RADIX_3_MAX_SP = 6;
static const int V8_RADIX_5_MAX_SP = 3;
static const int V8_RADIX_7_MAX_SP = 3;
static const int V8_RADIX_2_MAX_DP = 9;
static const int V8_RADIX_3_MAX_DP = 5;
static const int V8_RADIX_5_MAX_DP = 3;
static const int V8_RADIX_7_MAX_DP = 3;
static const Polynom UNIT;
};
} // namespace cuFFTAdvisor
#endif // CUFFTADVISOR_SIZEOPTIMIZER_H_