1+ #include " TensorSANN/activations/Softmax.hpp"
2+ #include " TensorSANN/utils/Tensor.hpp"
3+ #include < algorithm>
4+ #include < numeric>
5+ #include < cmath>
6+ #include < cassert>
7+
8+ namespace TensorSANN {
9+
10+ Tensor Softmax::forward (const Tensor &input){
11+ cachedInput_ = input;
12+ Tensor output = input;
13+ std::vector<float >& input_data = output.data ();
14+ float max_val = *std::max_element (input_data.begin (), input_data.end ());
15+
16+ for (size_t i = 0 ; i < output.size (); ++i){
17+ output.data ()[i] = std::max (0 .0f , output.data ()[i]);
18+ }
19+
20+
21+ std::vector<float > exponents;
22+ exponents.reserve (input_data.size ());
23+ for (float val : input_data) {
24+ exponents.push_back (std::exp (val - max_val));
25+ }
26+
27+ // Sum of exponents for normalization
28+ float sum = std::accumulate (exponents.begin (), exponents.end (), 0 .0f );
29+
30+ // Normalize to get probabilities
31+ for (size_t i = 0 ; i < input_data.size (); ++i) {
32+ input_data[i] = exponents[i] / sum;
33+ }
34+
35+ this ->output_ = output;
36+ return output;
37+
38+ }
39+
40+ Tensor Softmax::backward (const Tensor &grad_output) {
41+ // assert(this->output.size() == grad_output.size() && "Gradient size mismatch");
42+
43+ const std::vector<float >& S = this ->output_ .data ();
44+ const std::vector<float >& dL_dS = grad_output.data ();
45+ std::vector<float > dL_dz (S.size ());
46+
47+ // Compute element-wise product of gradient and softmax output
48+ float sum_grad = 0 .0f ;
49+ for (size_t i = 0 ; i < S.size (); ++i) {
50+ sum_grad += dL_dS[i] * S[i];
51+ }
52+
53+ // Compute final gradients
54+ for (size_t i = 0 ; i < S.size (); ++i) {
55+ dL_dz[i] = S[i] * (dL_dS[i] - sum_grad);
56+ }
57+
58+ return Tensor ((output_).shape (), dL_dz);
59+
60+ }
61+ } // namespace TensorSANN
0 commit comments