MicroSafe-RL/MicroSafeRL.h at main · Kretski/MicroSafe-RL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#ifndef MICRO_SAFE_RL_H
#define MICRO_SAFE_RL_H

class MicroSafeRL {
private:
    float ema_mean;
    float ema_mad;
    float prev_value;
    float current_penalty;

    float kappa, alpha, beta, lambda_, max_penalty;
    float min_limit, max_limit, gravity_factor;
    bool initialized;

    inline float fast_abs(float x) const { return x < 0.0f ? -x : x; }

public:
    // ==============================
    // DEBUG STRUCT
    // ==============================
    struct DebugState {
        float instability;  // ema_mad
        float deviation;    // |sensor - mean|
        float dynamics;     // velocity
        float input;        // ai_action
        float output;       // final safe output
    };

    DebugState debug;

    // ==============================
    // CONSTRUCTOR
    // ==============================
    MicroSafeRL(float k = 1.15f, float a = 0.55f, float b = 2.2f,
                float lm = 0.12f, float max_p = 1.0f,
                float l_min = -1.5f, float l_max = 1.5f, float g = 0.05f)
        : kappa(k), alpha(a), beta(b), lambda_(lm), max_penalty(max_p),
          min_limit(l_min), max_limit(l_max), gravity_factor(g),
          ema_mean(0.0f), ema_mad(0.0f), prev_value(0.0f),
          current_penalty(0.0f), initialized(false) {}

    // ==============================
    // INIT
    // ==============================
    void init(float initial_sensor = 0.0f) {
        ema_mean = initial_sensor;
        ema_mad = 0.0f;
        prev_value = initial_sensor;
        current_penalty = 0.0f;
        initialized = true;

        // debug init
        debug = {0, 0, 0, initial_sensor, initial_sensor};
    }

    // ==============================
    // CORE FUNCTION
    // ==============================
    float apply_safe_control(float ai_action, float sensor_val) {
        if (!initialized) {
            init(sensor_val);

            float out = ai_action;
            if (out > max_limit) out = max_limit;
            if (out < min_limit) out = min_limit;

            debug.instability = 0.0f;
            debug.deviation   = 0.0f;
            debug.dynamics    = 0.0f;
            debug.input       = ai_action;
            debug.output      = out;

            return out;
        }

        // --- EMA mean ---
        ema_mean = lambda_ * ema_mean + (1.0f - lambda_) * sensor_val;

        // --- deviation ---
        float abs_dev = fast_abs(sensor_val - ema_mean);

        // --- MAD ---
        ema_mad = lambda_ * ema_mad + (1.0f - lambda_) * abs_dev;

        // --- dynamics (velocity) ---
        float velocity = fast_abs(sensor_val - prev_value);
        prev_value = sensor_val;

        // --- coherence ---
        float coherence = 1.0f / (1.0f + abs_dev * beta);

        // --- instability model ---
        float raw = ema_mad + alpha * (1.0f - coherence) + 0.3f * velocity;

        current_penalty = kappa * raw;
        if (current_penalty > max_penalty) current_penalty = max_penalty;

        // --- gravity modulation ---
        float gravity = 1.0f / (1.0f + current_penalty * gravity_factor);
        if (gravity < 0.0f) gravity = 0.0f;

        float modulated = ai_action * gravity;

        // --- clamp ---
        float out = modulated;
        if (out > max_limit) out = max_limit;
        if (out < min_limit) out = min_limit;

        // ==============================
        // DEBUG FILL
        // ==============================
        debug.instability = ema_mad;
        debug.deviation   = abs_dev;
        debug.dynamics    = velocity;
        debug.input       = ai_action;
        debug.output      = out;

        return out;
    }

    // ==============================
    // ACCESSORS
    // ==============================
    float get_penalty() const { return current_penalty; }
    float get_current_reward() const { return 1.0f - current_penalty; }

    DebugState get_debug() const { return debug; }

    // ==============================
    // RESET
    // ==============================
    void reset() {
        ema_mean = 0.0f;
        ema_mad = 0.0f;
        prev_value = 0.0f;
        current_penalty = 0.0f;
        initialized = false;

        debug = {0, 0, 0, 0, 0};
    }
};

#endif