From 1b3b8463b9dfba3fcf3a93bd9d06db21366f06a7 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <contact@ericeastwood.com>
Date: Fri, 15 Dec 2023 09:13:12 -0600
Subject: [PATCH] Clarify that the Softmax derivative is good-enough

More context: https://github.com/MadLittleMods/zig-neural-networks/blob/main/dev-notes.md#activation-functions
---
 Assets/Scripts/Neural Network/Activation/Activation.cs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Assets/Scripts/Neural Network/Activation/Activation.cs b/Assets/Scripts/Neural Network/Activation/Activation.cs
index 08939db..56984f2 100644
--- a/Assets/Scripts/Neural Network/Activation/Activation.cs	
+++ b/Assets/Scripts/Neural Network/Activation/Activation.cs	
@@ -126,6 +126,13 @@ public double Activate(double[] inputs, int index)
 			return res;
 		}
 
+		// Much like stochastic gradient descent's quick, not-so-perfect steps downhill
+		// using mini-batches, this derivative serves as a good-enough approximation. It's
+		// simpler to calculate and we can keep our interface consistent with the other
+		// single-input activation functions above (though it won't pass gradient checks). A
+		// complete derivative for the Softmax function involves computing a Jacobian matrix
+		// which finds partial derivative of the activation function with respect to all of
+		// the inputs of each node in the layer.
 		public double Derivative(double[] inputs, int index)
 		{
 			double expSum = 0;