-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoptimizer.hpp
More file actions
122 lines (104 loc) · 4.53 KB
/
optimizer.hpp
File metadata and controls
122 lines (104 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#ifndef __OPTIMIZER_HPP
#define __OPTIMIZER_HPP
#include <chrono>
#include <iostream>
#include "stacktrace.hpp"
namespace parallel
{
extern size_t concurrency;
bool constexpr runtime_optimization = true;
bool constexpr test_runtime_optimization = true;
/**
* @brief Helper object to measure runtime of parallel function execution for different numbers of threads.
* Depending on the minimum runtime, the optimal number of thread is chosen.
* The distinction of functions is acomplished via the C++ template instanciation.
* Optimization is performed every 1 million executions.
*/
struct optimizer
{
std::vector< long long > runtimes = {};
std::vector< size_t > concurrencies = {};
size_t counter = 0,
measuring = 0,
samples = 0,
concurrency = parallel::concurrency;
size_t static constexpr re_optimize = 1'000'000;
optimizer()
{
if ( test_runtime_optimization )
concurrencies.push_back( 1 );
if ( std::thread::hardware_concurrency() / 2 > 1 )
concurrencies.push_back( std::thread::hardware_concurrency() / 2 );
concurrencies.push_back( std::thread::hardware_concurrency() );
concurrencies.push_back( std::thread::hardware_concurrency() * 2 );
runtimes.assign( concurrencies.size(), 0 );
}
size_t get_concurrency()
{
if ( not parallel::runtime_optimization )
return parallel::concurrency;
if ( ( counter % re_optimize ) == 0 )
{
// measuring is set to 0
// the scope guard increases sample and measuring counters
if ( measuring == concurrencies.size() ) // begin / end
{
if ( samples < 2 ) // all cuncurrencies measure once. need more samples:
{
samples += 1;
measuring = 0; // start cycle again
}
else // measurement done for all concurrencies:
{
concurrency = concurrencies[ std::min_element( runtimes.begin(), runtimes.end() ) - runtimes.begin() ]; // choose optimum
runtimes.assign( concurrencies.size(), 0 ); // reset measurement
measuring = 0; // reset for next run
samples = 0;
counter += 1; // stops optimization
if constexpr( test_runtime_optimization ) // print optimizition output
{
if ( concurrency == 1 )
{
std::cout << "function gains no multithreading speedup, " << std::flush;
print_stacktrace();
}
else
{
std::cout << "using " << concurrency << " threads for " << std::flush;
print_stacktrace();
}
}
return concurrency;
}
}
return concurrencies[ measuring ]; // return currently measured concurrency
}
else
{
counter += 1;
return concurrency; // measured optimum
}
}
/**
* @brief reset optimization status, this retriggers the optimization on the next call.
*/
void reset()
{
runtimes.assign( concurrencies.size(), 0 ); // reset measurement
measuring = 0; // reset
samples = 0;
counter = 0;
}
/**
* @brief cunstruct a helper function for a scope guard that logs the execution time at scope exit of a function.
*/
std::function< void() > register_duration()
{
if ( ( counter % re_optimize ) == 0 )
return [ &, time = std::chrono::system_clock::now() ]{ runtimes[ measuring ] += ( std::chrono::system_clock::now() - time ).count(); measuring++; };
else
return []{};
}
};
}
#endif // __OPTIMIZER_HPP