-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMicroSafeRL.h
More file actions
143 lines (114 loc) · 4.11 KB
/
MicroSafeRL.h
File metadata and controls
143 lines (114 loc) · 4.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#ifndef MICRO_SAFE_RL_H
#define MICRO_SAFE_RL_H
class MicroSafeRL {
private:
float ema_mean;
float ema_mad;
float prev_value;
float current_penalty;
float kappa, alpha, beta, lambda_, max_penalty;
float min_limit, max_limit, gravity_factor;
bool initialized;
inline float fast_abs(float x) const { return x < 0.0f ? -x : x; }
public:
// ==============================
// DEBUG STRUCT
// ==============================
struct DebugState {
float instability; // ema_mad
float deviation; // |sensor - mean|
float dynamics; // velocity
float input; // ai_action
float output; // final safe output
};
DebugState debug;
// ==============================
// CONSTRUCTOR
// ==============================
MicroSafeRL(float k = 1.15f, float a = 0.55f, float b = 2.2f,
float lm = 0.12f, float max_p = 1.0f,
float l_min = -1.5f, float l_max = 1.5f, float g = 0.05f)
: kappa(k), alpha(a), beta(b), lambda_(lm), max_penalty(max_p),
min_limit(l_min), max_limit(l_max), gravity_factor(g),
ema_mean(0.0f), ema_mad(0.0f), prev_value(0.0f),
current_penalty(0.0f), initialized(false) {}
// ==============================
// INIT
// ==============================
void init(float initial_sensor = 0.0f) {
ema_mean = initial_sensor;
ema_mad = 0.0f;
prev_value = initial_sensor;
current_penalty = 0.0f;
initialized = true;
// debug init
debug = {0, 0, 0, initial_sensor, initial_sensor};
}
// ==============================
// CORE FUNCTION
// ==============================
float apply_safe_control(float ai_action, float sensor_val) {
if (!initialized) {
init(sensor_val);
float out = ai_action;
if (out > max_limit) out = max_limit;
if (out < min_limit) out = min_limit;
debug.instability = 0.0f;
debug.deviation = 0.0f;
debug.dynamics = 0.0f;
debug.input = ai_action;
debug.output = out;
return out;
}
// --- EMA mean ---
ema_mean = lambda_ * ema_mean + (1.0f - lambda_) * sensor_val;
// --- deviation ---
float abs_dev = fast_abs(sensor_val - ema_mean);
// --- MAD ---
ema_mad = lambda_ * ema_mad + (1.0f - lambda_) * abs_dev;
// --- dynamics (velocity) ---
float velocity = fast_abs(sensor_val - prev_value);
prev_value = sensor_val;
// --- coherence ---
float coherence = 1.0f / (1.0f + abs_dev * beta);
// --- instability model ---
float raw = ema_mad + alpha * (1.0f - coherence) + 0.3f * velocity;
current_penalty = kappa * raw;
if (current_penalty > max_penalty) current_penalty = max_penalty;
// --- gravity modulation ---
float gravity = 1.0f / (1.0f + current_penalty * gravity_factor);
if (gravity < 0.0f) gravity = 0.0f;
float modulated = ai_action * gravity;
// --- clamp ---
float out = modulated;
if (out > max_limit) out = max_limit;
if (out < min_limit) out = min_limit;
// ==============================
// DEBUG FILL
// ==============================
debug.instability = ema_mad;
debug.deviation = abs_dev;
debug.dynamics = velocity;
debug.input = ai_action;
debug.output = out;
return out;
}
// ==============================
// ACCESSORS
// ==============================
float get_penalty() const { return current_penalty; }
float get_current_reward() const { return 1.0f - current_penalty; }
DebugState get_debug() const { return debug; }
// ==============================
// RESET
// ==============================
void reset() {
ema_mean = 0.0f;
ema_mad = 0.0f;
prev_value = 0.0f;
current_penalty = 0.0f;
initialized = false;
debug = {0, 0, 0, 0, 0};
}
};
#endif