From 1b3b8463b9dfba3fcf3a93bd9d06db21366f06a7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 15 Dec 2023 09:13:12 -0600 Subject: [PATCH] Clarify that the Softmax derivative is good-enough More context: https://github.com/MadLittleMods/zig-neural-networks/blob/main/dev-notes.md#activation-functions --- Assets/Scripts/Neural Network/Activation/Activation.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Assets/Scripts/Neural Network/Activation/Activation.cs b/Assets/Scripts/Neural Network/Activation/Activation.cs index 08939db..56984f2 100644 --- a/Assets/Scripts/Neural Network/Activation/Activation.cs +++ b/Assets/Scripts/Neural Network/Activation/Activation.cs @@ -126,6 +126,13 @@ public double Activate(double[] inputs, int index) return res; } + // Much like stochastic gradient descent's quick, not-so-perfect steps downhill + // using mini-batches, this derivative serves as a good-enough approximation. It's + // simpler to calculate and we can keep our interface consistent with the other + // single-input activation functions above (though it won't pass gradient checks). A + // complete derivative for the Softmax function involves computing a Jacobian matrix + // which finds partial derivative of the activation function with respect to all of + // the inputs of each node in the layer. public double Derivative(double[] inputs, int index) { double expSum = 0;