-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoptimisation_functions.py
More file actions
117 lines (99 loc) · 7.37 KB
/
Copy pathoptimisation_functions.py
File metadata and controls
117 lines (99 loc) · 7.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
import random # used for shuffling the list containing the training data
# for the stochastic gradient descent
# compute the number of correct guesses on a test set
def evaluate(testSet, neuralNet):
testSize = len(testSet)
results = [(np.argmax(neuralNet.feedForward(img)), np.argmax(label)) for (img,label) in testSet] if testSize>10000 else [(np.argmax(neuralNet.feedForward(img)), label) for (img,label) in testSet]
successes = sum(int(computed==expected) for (computed,expected) in results)
ratio = successes*100/testSize
print("{} / {} ~= {}%".format(successes, testSize, ratio));
return ratio
class StochasticGradientDescent(object):
def __init__(self, nets):
self.neuroNets = nets
def SGD(self, trainingSet, validSet, testSet, numEpochs, batchSize, stepGen, earlyStopParam, earlyStopThrshld, _lambda=0.0, l2Regul=True, trainEval=False):
trainSize=len(trainingSet)
successEvalValid = np.zeros((len(self.neuroNets),numEpochs)) # contains the validation accuracy values
successEvalTrain = np.zeros((len(self.neuroNets),numEpochs)) # contains the training accuracy values
successEvalTest = np.zeros(len(self.neuroNets)) # contains the test error values
normLambda = _lambda/trainSize
i = 0 # nets cycle index
for net in self.neuroNets:
print("net {} with {} hidden neurons".format(i, net.net[1:-1]))
epoch=0
earlyStop=np.zeros(earlyStopParam) # store the validation accuracy for the last earlyStopParam epochs
while True: # until reach maxEpochs or the net stops learning
step = next(stepGen)
random.shuffle(trainingSet)
# train with every batch
for k in range(0, trainSize, batchSize):
self.updateNetwork(i, trainingSet[k:k+batchSize], step, normLambda, l2Regul)
earlyStop[epoch%earlyStopParam] = evaluate(validSet, net) # validation accuracy
successEvalValid[i,epoch] = earlyStop[epoch%earlyStopParam]
if trainEval:
successEvalTrain[i,epoch] = evaluate(trainingSet, net) # training accuracy
lastPredictionsErrors = np.linalg.norm(earlyStop-successEvalValid[i,epoch]) # norm for the last earlyStopParam epochs
if (lastPredictionsErrors <= earlyStopThrshld) or (epoch == numEpochs-1):
print("stopped at epoch {}".format(epoch))
break
epoch += 1
#endwhile - epoch cycle
print("Error on test set for net {} with {} hidden neurons:".format(i,net.net[1:-1]))
successEvalTest[i] = evaluate(testSet, net) # test error
i += 1
#endfor - nets cycle
selected = np.argmax(successEvalTest) # find the net index with the lowest test error and possibly do some stuff...
return (successEvalValid, successEvalTrain, successEvalTest)
# update network biases and weights training on a selected set
def updateNetwork(self, i, trainingSet, step, normLambda, l2Regul):
normStep = step/len(trainingSet) # normalized since it is requested to compute the average between all the derivatives for a single img
# initialize the derivation lists for the biases and weights
sumBiasesDeriv = [np.zeros(biasLayer.shape) for biasLayer in self.neuroNets[i].biasesLayers]
sumWeightsDeriv = [np.zeros(weightLayer.shape) for weightLayer in self.neuroNets[i].weightsLayers]
# train with the given set: compute the derivative for weights and biases with backpropagation algo and sum them up
for img, label in trainingSet:
biasesDeriv, weightsDeriv = self.neuroNets[i].backpropagation(img, label)
sumBiasesDeriv = [newBD+sumBD for newBD, sumBD in zip(biasesDeriv, sumBiasesDeriv)]
sumWeightsDeriv = [newWD+sumWD for newWD, sumWD in zip(weightsDeriv, sumWeightsDeriv)]
# update the network biases and weights following the equation: x'=x-delta(x) where delta(x)=step*gradient(costFun(x)) and x=(w,b)
self.neuroNets[i].biasesLayers = [oldB-(normStep*newB) for oldB, newB in zip(self.neuroNets[i].biasesLayers, sumBiasesDeriv)]
# choice between l2 or l1 regul based on the flag l2Regul
self.neuroNets[i].weightsLayers = [(1-(step*normLambda))*oldW-(normStep*newW) for oldW, newW in zip(self.neuroNets[i].weightsLayers, sumWeightsDeriv)] if l2Regul else [oldW-(step*normLambda*np.sign(oldW))-(normStep*newW) for oldW, newW in zip(self.neuroNets[i].weightsLayers, sumWeightsDeriv)]
class AdaGrad(object):
def __init__(self, net):
self.neuralNet = net
def AG(self, trainingSet, numEpochs, batchSize, step, _lambda=0.0, l2Regul=True, testSet=None):
trainSize=len(trainingSet)
successEvalTest = []
successEvalTrain = []
normLambda = _lambda/trainSize
for epoch in range(numEpochs):
random.shuffle(trainingSet)
# train with every batch
for k in range(0, trainSize, batchSize):
self.updateNetwork(trainingSet[k:k+batchSize], step, normLambda, l2Regul)
if testSet:
successEvalTest.append(evaluate(testSet, self.neuralNet))
#uncomment below if computing accuracy ratio between test-train
#successEvalTrain.append(evaluate(trainingSet, self.neuralNet))
else:
print("Epoch {} trained".format(epoch))
return None if testSet==None else (successEvalTest,successEvalTrain)
# update network biases and weights training on a selected set
def updateNetwork(self, trainingSet, step, normLambda, l2Regul):
normStep = step/len(trainingSet) # normalized since it is requested to compute the average between all the derivatives for a single img
# initialize the derivation lists for the biases and weights
sumBiasesDeriv = [np.zeros(biasLayer.shape) for biasLayer in self.neuralNet.biasesLayers]
sumWeightsDeriv = [np.zeros(weightLayer.shape) for weightLayer in self.neuralNet.weightsLayers]
# train with the given set: compute the derivative for weights and biases with backpropagation algo and sum them up
for img, label in trainingSet:
biasesDeriv, weightsDeriv = self.neuralNet.backpropagation(img, label)
sumBiasesDeriv = [np.multiply(newBD,newBD)+sumBD for newBD, sumBD in zip(biasesDeriv, sumBiasesDeriv)]
sumWeightsDeriv = [np.multiply(newWD,newWD)+sumWD for newWD, sumWD in zip(weightsDeriv, sumWeightsDeriv)]
deltab=[-1*(np.divide(normStep,(10**-7+np.sqrt(r))))*newBD for newBD, r in zip(biasesDeriv, sumBiasesDeriv)]
deltaw=[-1*(np.divide(normStep,(10**-7+np.sqrt(r))))*newWD for newWD, r in zip(weightsDeriv, sumWeightsDeriv)]
# update the network biases and weights following the equation: x'=x-delta(x) where delta(x)=step*gradient(costFun(x)) and x=(w,b)
self.neuralNet.biasesLayers = [oldB+newB for oldB, newB in zip(self.neuralNet.biasesLayers, sumBiasesDeriv)]
# choice between l2 or l1 regul based on the flag l2Regul
self.neuralNet.weightsLayers = [oldW+newW for oldW, newW in zip(self.neuralNet.weightsLayers, sumWeightsDeriv)]