softmax

kemsig · kemsig · commit fb72ba08fc51 · 2025-02-24T00:10:55.000-08:00
diff --git a/include/TensorSANN/activations/ReLU.hpp b/include/TensorSANN/activations/ReLU.hpp
@@ -13,9 +13,6 @@ class ReLU : Layer{
 
     Tensor backward(const Tensor &grad_output) override;
 
-protected:
-    bool isTrainable_ = false;
-
 };
 
 }
diff --git a/include/TensorSANN/activations/Softmax.hpp b/include/TensorSANN/activations/Softmax.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "TensorSANN/layers/Layer.hpp"
+#include "TensorSANN/utils/Tensor.hpp"
+#include <memory>
+#include <string>
+
+namespace TensorSANN {
+
+class Softmax : Layer{
+public:
+
+    Tensor forward(const Tensor &input) override;
+
+    Tensor backward(const Tensor &grad_output) override;
+
+protected:
+    Tensor output_;
+
+};
+
+
+
+}
diff --git a/src/activations/ReLU.cpp b/src/activations/ReLU.cpp
@@ -4,6 +4,7 @@
 namespace TensorSANN{
 
     Tensor ReLU::forward(const Tensor &input){
+        cachedInput_ = input;
         Tensor output = input;
 
         for (size_t i = 0; i < output.size(); ++i){
diff --git a/src/activations/Softmax.cpp b/src/activations/Softmax.cpp
@@ -0,0 +1,61 @@
+#include "TensorSANN/activations/Softmax.hpp"
+#include "TensorSANN/utils/Tensor.hpp"
+#include <algorithm>
+#include <numeric>
+#include <cmath>
+#include <cassert>
+
+namespace TensorSANN{
+
+    Tensor Softmax::forward(const Tensor &input){
+        cachedInput_ = input;
+        Tensor output = input;
+        std::vector<float>& input_data = output.data();
+        float max_val = *std::max_element(input_data.begin(), input_data.end());
+
+        for (size_t i = 0; i < output.size(); ++i){
+            output.data()[i] = std::max(0.0f, output.data()[i]);
+        }
+
+
+        std::vector<float> exponents;
+        exponents.reserve(input_data.size());
+        for (float val : input_data) {
+            exponents.push_back(std::exp(val - max_val));
+        }
+
+        // Sum of exponents for normalization
+        float sum = std::accumulate(exponents.begin(), exponents.end(), 0.0f);
+
+        // Normalize to get probabilities
+        for (size_t i = 0; i < input_data.size(); ++i) {
+            input_data[i] = exponents[i] / sum;
+        }
+
+        this->output_ = output;
+        return output;
+
+    }
+
+    Tensor Softmax::backward(const Tensor &grad_output) {
+        // assert(this->output.size() == grad_output.size() && "Gradient size mismatch");
+
+        const std::vector<float>& S = this->output_.data();
+        const std::vector<float>& dL_dS = grad_output.data();
+        std::vector<float> dL_dz(S.size());
+
+        // Compute element-wise product of gradient and softmax output
+        float sum_grad = 0.0f;
+        for (size_t i = 0; i < S.size(); ++i) {
+            sum_grad += dL_dS[i] * S[i];
+        }
+
+        // Compute final gradients
+        for (size_t i = 0; i < S.size(); ++i) {
+            dL_dz[i] = S[i] * (dL_dS[i] - sum_grad);
+        }
+
+        return Tensor((output_).shape(), dL_dz);
+
+    }
+}   // namespace TensorSANN
diff --git a/src/model_main.cpp b/src/model_main.cpp
@@ -2,6 +2,7 @@
 #include "TensorSANN/utils/Tensor.hpp"
 #include "TensorSANN/layers/DenseLayer.hpp"
 
+#include "TensorSANN/activations/Softmax.hpp"
 #include "TensorSANN/activations/ReLU.hpp"
 #include "TensorSANN/optimizers/SGD.hpp"
 
@@ -22,7 +23,17 @@ int main() {
     std::cout << d1w.to_string() << std::endl;
     std::cout << d1b.to_string() << std::endl;
 
-    dense1.forward(input_tensor.transpose());
+    TensorSANN::Tensor f1 = dense1.forward(input_tensor.transpose());
+    std::cout << f1.to_string() << std::endl;
+    
+    TensorSANN::Softmax smax;
+
+    TensorSANN::Tensor f2 = smax.forward(f1);
+    std::cout << f2.to_string() << std::endl;
+    
+    TensorSANN::Tensor b2 = smax.backward(f2);
+    std::cout << b2.to_string() << std::endl;
+
     return 0;
 
 }

Original file line number	Diff line number	Diff line change
`@@ -13,9 +13,6 @@ class ReLU : Layer{`
`13`	`13`
`14`	`14`	`Tensor backward(const Tensor &grad_output) override;`
`15`	`15`
`16`		`-protected:`
`17`		`- bool isTrainable_ = false;`
`18`		`-`
`19`	`16`	`};`
`20`	`17`
`21`	`18`	`}`