MicroSafe-RL/MicroSafeRL_misra.h at main · Kretski/MicroSafe-RL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/**
 * MicroSafeRL.h — MISRA-C:2012 Compliant Edition v2
 * ===================================================
 * Verified: Cppcheck 2.13.0 + MISRA addon
 * Remaining advisories: 2.5 (unused macros — devtool only),
 *                       2.7 (ctor param names — C++ limitation)
 * Critical/Required violations: ZERO
 *
 * Author : Kretski, Dimitar
 * DOI    : 10.5281/zenodo.19019599
 */

#ifndef MICRO_SAFE_RL_H
#define MICRO_SAFE_RL_H

#include <stdint.h>

typedef float float32_t;

/* Configuration macros — used by auto-tuner, not in core logic */
/* cppcheck-suppress misra-c2012-2.5 */
#define MSRL_DEFAULT_KAPPA       (1.15f)
/* cppcheck-suppress misra-c2012-2.5 */
#define MSRL_DEFAULT_ALPHA       (0.55f)
/* cppcheck-suppress misra-c2012-2.5 */
#define MSRL_DEFAULT_BETA        (2.2f)
/* cppcheck-suppress misra-c2012-2.5 */
#define MSRL_DEFAULT_LAMBDA      (0.12f)
/* cppcheck-suppress misra-c2012-2.5 */
#define MSRL_DEFAULT_MAX_PENALTY (1.0f)
/* cppcheck-suppress misra-c2012-2.5 */
#define MSRL_DEFAULT_MIN_LIMIT   (-1.5f)
/* cppcheck-suppress misra-c2012-2.5 */
#define MSRL_DEFAULT_MAX_LIMIT   (1.5f)
/* cppcheck-suppress misra-c2012-2.5 */
#define MSRL_DEFAULT_GRAVITY     (0.05f)

#define MSRL_VELOCITY_GAIN       (0.3f)

class MicroSafeRL {

public:
    explicit MicroSafeRL(
        float32_t k_kappa,
        float32_t a_alpha,
        float32_t b_beta,
        float32_t lm_lambda,
        float32_t max_p,
        float32_t l_min,
        float32_t l_max,
        float32_t g_gravity
    )
        : kappa(k_kappa), alpha(a_alpha), beta(b_beta),
          lambda_(lm_lambda), max_penalty(max_p),
          min_limit(l_min), max_limit(l_max),
          gravity_factor(g_gravity),
          ema_mean(0.0f), ema_mad(0.0f), prev_value(0.0f),
          current_penalty(0.0f), initialized(false)
    {}

    void init(float32_t initial_sensor)
    {
        ema_mean        = initial_sensor;
        ema_mad         = 0.0f;
        prev_value      = initial_sensor;
        current_penalty = 0.0f;
        initialized     = true;
    }

    float32_t apply_safe_control(
        float32_t ai_action,
        float32_t sensor_val
    )
    {
        float32_t result;

        if (!initialized) {
            init(sensor_val);
        }

        /* EMA update — Rule 12.1: explicit parentheses */
        ema_mean = (lambda_ * ema_mean) +
                   ((1.0f - lambda_) * sensor_val);

        float32_t abs_dev = fast_abs(sensor_val - ema_mean);

        ema_mad = (lambda_ * ema_mad) +
                  ((1.0f - lambda_) * abs_dev);

        float32_t velocity  = fast_abs(sensor_val - prev_value);
        prev_value          = sensor_val;

        float32_t coherence = 1.0f / (1.0f + (abs_dev * beta));
        float32_t raw       = ema_mad
                            + (alpha * (1.0f - coherence))
                            + (MSRL_VELOCITY_GAIN * velocity);

        current_penalty = kappa * raw;
        if (current_penalty > max_penalty) {
            current_penalty = max_penalty;
        }

        float32_t g_raw   = 1.0f - (current_penalty * gravity_factor);
        float32_t gravity = (g_raw > 0.0f) ? g_raw : 0.0f;
        float32_t mod     = ai_action * gravity;

        /* Hard clamp — single exit point (Rule 15.5) */
        if (mod > max_limit) {
            result = max_limit;
        } else if (mod < min_limit) {
            result = min_limit;
        } else {
            result = mod;
        }

        return result;
    }

    float32_t get_penalty(void)        const { return current_penalty; }
    float32_t get_current_reward(void) const { return 1.0f - current_penalty; }

    void reset(void)
    {
        ema_mean        = 0.0f;
        ema_mad         = 0.0f;
        prev_value      = 0.0f;
        current_penalty = 0.0f;
        initialized     = false;
    }

private:
    float32_t kappa;
    float32_t alpha;
    float32_t beta;
    float32_t lambda_;
    float32_t max_penalty;
    float32_t min_limit;
    float32_t max_limit;
    float32_t gravity_factor;
    float32_t ema_mean;
    float32_t ema_mad;
    float32_t prev_value;
    float32_t current_penalty;
    bool      initialized;

    static inline float32_t fast_abs(float32_t x)
    {
        return (x < 0.0f) ? (-x) : x;
    }
};

#endif /* MICRO_SAFE_RL_H */