diff --git a/Assets/Scripts/Neural Network/Activation/Activation.cs b/Assets/Scripts/Neural Network/Activation/Activation.cs index 08939db..56984f2 100644 --- a/Assets/Scripts/Neural Network/Activation/Activation.cs +++ b/Assets/Scripts/Neural Network/Activation/Activation.cs @@ -126,6 +126,13 @@ public double Activate(double[] inputs, int index) return res; } + // Much like stochastic gradient descent's quick, not-so-perfect steps downhill + // using mini-batches, this derivative serves as a good-enough approximation. It's + // simpler to calculate and we can keep our interface consistent with the other + // single-input activation functions above (though it won't pass gradient checks). A + // complete derivative for the Softmax function involves computing a Jacobian matrix + // which finds partial derivative of the activation function with respect to all of + // the inputs of each node in the layer. public double Derivative(double[] inputs, int index) { double expSum = 0;