deep-makeover/dm_arch.py at master · david-gpu/deep-makeover · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
import math
import numpy as np
import tensorflow as tf

import dm_utils

FLAGS = tf.app.flags.FLAGS

# Global switch to enable/disable training of variables
_glbl_is_training = tf.Variable(initial_value=True, trainable=False, name='glbl_is_training')

# Global variable dictionary. This is how we can share variables across models
_glbl_variables = {_glbl_is_training.name : _glbl_is_training}


def initialize_variables(sess):
    """Run this function only once and before the model begins to train"""

    # First initialize all variables
    sess.run(tf.global_variables_initializer())

    # Now freeze the graph to prevent new operations from being added
    #tf.get_default_graph().finalize()

def enable_training(onoff):
    """Switches training on or off globally (all models are affected).
    It is expected that dropout will be enabled during training and disabled afterwards. Batch normalization also affected.
    """
    tf.assign(_glbl_is_training, bool(onoff))


# TBD: Add "All you need is a good init"

class Model:
    """A neural network model.

    Currently only supports a feedforward architecture."""

    def __init__(self, name, features, enable_batch_norm=True):
        self.name    = name
        self.locals  = set()
        self.outputs = [features]

        self.enable_batch_norm = enable_batch_norm

    def _get_variable(self, name, initializer=None):
        # Variables are uniquely identified by a triplet: model name, layer number, and variable name
        layer = 'L%03d' % (self.get_num_layers()+1,)
        full_name = '/'.join([self.name, layer, name])

        if full_name in _glbl_variables:
            # Reuse existing variable
            #print("Reusing variable %s" % full_name)
            var = _glbl_variables[full_name]
            assert var.get_shape() == initializer.get_shape()
        elif initializer is not None:
            # Create new variable
            var = tf.Variable(initializer, name=full_name)
            _glbl_variables[full_name] = var
        else:
            raise ValueError("Initializer must be provided if variable is new")

        self.locals.add(var)
        return var

    def _get_num_inputs(self):
        return int(self.get_output().get_shape()[-1])

    def _variable_initializer(self, prev_units, num_units, stddev_factor=1.0):
        """Initialization in the style of Glorot 2010.

        stddev_factor should be 1.0 for linear activations, and 2.0 for ReLUs"""

        assert prev_units > 0 and num_units > 0
        stddev  = np.sqrt(float(stddev_factor) / np.sqrt(prev_units*num_units))
        return tf.truncated_normal([prev_units, num_units],
                                    mean=0.0, stddev=stddev)

    def _variable_initializer_conv2d(self, prev_units, num_units, mapsize, is_residual):
        """Initialization in the style of Glorot 2010.

        stddev_factor should be 1.0 for linear activations, and 2.0 for ReLUs"""

        assert prev_units > 0 and num_units > 0
        size   = [mapsize, mapsize, prev_units, num_units]
        stddev_factor = 1e-1 / (mapsize * mapsize * prev_units * num_units)
        result = stddev_factor * np.random.uniform(low=-1, high=1, size=size)

        if not is_residual:
            # Focus nearly all the weight on the center
            for i in range(min(prev_units, num_units)):
                result[mapsize//2, mapsize//2, i, i] += 1.0
        # else leaving all parameters near zero is the right thing to do

        result = tf.constant(result.astype(np.float32))

        return result

    def get_num_layers(self):
        return len(self.outputs)

    def add_batch_norm(self, scale=False):
        """Adds a batch normalization layer to this model.

        See ArXiv 1502.03167v3 for details."""

        if not self.enable_batch_norm:
            return self

        out = tf.contrib.layers.batch_norm(self.get_output(), scale=scale, is_training=_glbl_is_training)

        self.outputs.append(out)
        return self

    def add_dropout(self, keep_prob=.5):
        """Applies dropout to output of this model"""

        is_training = tf.to_float(_glbl_is_training)
        keep_prob = is_training * keep_prob + (1.0 - is_training)
        out = tf.nn.dropout(self.get_output(), keep_prob=keep_prob)

        self.outputs.append(out)
        return self

    def add_flatten(self):
        """Transforms the output of this network to a 1D tensor"""

        batch_size = int(self.get_output().get_shape()[0])
        out = tf.reshape(self.get_output(), [batch_size, -1])

        self.outputs.append(out)
        return self

    def add_reshape(self, shape):
        """Reshapes the output of this network"""

        out = tf.reshape(self.get_output(), shape)

        self.outputs.append(out)
        return self

    def add_dense(self, num_units, stddev_factor=1.0):
        """Adds a dense linear layer to this model.

        Uses Glorot 2010 initialization assuming linear activation."""

        assert len(self.get_output().get_shape()) == 2, "Previous layer must be 2-dimensional (batch, channels)"

        prev_units = self._get_num_inputs()

        # Weight term
        initw   = self._variable_initializer(prev_units, num_units,
                                           stddev_factor=stddev_factor)
        weight  = self._get_variable('weight', initw)

        # Bias term
        initb   = tf.constant(0.0, shape=[num_units])
        bias    = self._get_variable('bias', initb)

        # Output of this layer
        out     = tf.matmul(self.get_output(), weight) + bias

        self.outputs.append(out)
        return self

    def add_sigmoid(self, rnge=1.0):
        """Adds a sigmoid (0,1) activation function layer to this model."""

        prev_units = self._get_num_inputs()
        out = 0.5 + rnge * (tf.nn.sigmoid(self.get_output()) - 0.5)

        self.outputs.append(out)
        return self

    def add_tanh(self):
        """Adds a tanh (-1,+1) activation function layer to this model."""

        prev_units = self._get_num_inputs()
        out = tf.nn.tanh(self.get_output())

        self.outputs.append(out)
        return self

    def add_softmax(self):
        """Adds a softmax operation to this model"""

        this_input = tf.square(self.get_output())
        reduction_indices = list(range(1, len(this_input.get_shape())))
        acc = tf.reduce_sum(this_input, reduction_indices=reduction_indices, keep_dims=True)
        out = this_input / (acc+FLAGS.epsilon)
        #out = tf.verify_tensor_all_finite(out, "add_softmax failed; is sum equal to zero?")

        self.outputs.append(out)
        return self

    def add_relu(self):
        """Adds a ReLU activation function to this model"""

        out = tf.nn.relu(self.get_output())

        self.outputs.append(out)
        return self

    def add_elu(self):
        """Adds a ELU activation function to this model"""

        out = tf.nn.elu(self.get_output())

        self.outputs.append(out)
        return self

    def add_lrelu(self, leak=.2):
        """Adds a leaky ReLU (LReLU) activation function to this model"""

        t1  = .5 * (1 + leak)
        t2  = .5 * (1 - leak)
        out = t1 * self.get_output() + \
              t2 * tf.abs(self.get_output())

        self.outputs.append(out)
        return self

    def add_conv2d(self, num_units, mapsize=1, stride=1, is_residual = False):
        """Adds a 2D convolutional layer."""

        assert len(self.get_output().get_shape()) == 4 and "Previous layer must be 4-dimensional (batch, width, height, channels)"

        prev_units = self._get_num_inputs()

        # Weight term and convolution
        initw  = self._variable_initializer_conv2d(prev_units, num_units, mapsize, is_residual=is_residual)
        weight = self._get_variable('weight', initw)
        out    = tf.nn.conv2d(self.get_output(), weight,
                              strides=[1, stride, stride, 1],
                              padding='SAME')

        # Bias term
        initb  = tf.constant(0.0, shape=[num_units])
        bias   = self._get_variable('bias', initb)
        out    = tf.nn.bias_add(out, bias)

        self.outputs.append(out)
        return self

    def add_conv2d_transpose(self, num_units, mapsize=1, stride=1, is_residual = False):
        """Adds a transposed 2D convolutional layer"""

        assert not "This function is broken right now due to how _variable_initializer_conv2d is built. Use a regular convolution instead"

        assert len(self.get_output().get_shape()) == 4 and "Previous layer must be 4-dimensional (batch, width, height, channels)"

        prev_units = self._get_num_inputs()

        # Weight term and convolution
        initw  = self._variable_initializer_conv2d(prev_units, num_units, mapsize, is_residual=is_residual)
        weight = self._get_variable('weight', initw)
        weight = tf.transpose(weight, perm=[0, 1, 3, 2])
        prev_output = self.get_output()
        output_shape = [FLAGS.batch_size,
                        int(prev_output.get_shape()[1]) * stride,
                        int(prev_output.get_shape()[2]) * stride,
                        num_units]
        out    = tf.nn.conv2d_transpose(self.get_output(), weight,
                                        output_shape=output_shape,
                                        strides=[1, stride, stride, 1],
                                        padding='SAME')

        # Bias term
        initb  = tf.constant(0.0, shape=[num_units])
        bias   = self._get_variable('bias', initb)
        out    = tf.nn.bias_add(out, bias)

        self.outputs.append(out)
        return self

    def add_concat(self, terms):
        """Adds a concatenation layer"""

        if len(terms) > 0:
            axis = len(self.get_output().get_shape()) - 1
            terms = terms + [self.get_output()]
            out = tf.concat(axis, terms)
            self.outputs.append(out)

        return self

    def add_sum(self, term):
        """Adds a layer that sums the top layer with the given term"""

        prev_shape = self.get_output().get_shape()
        term_shape = term.get_shape()
        #print("%s %s" % (prev_shape, term_shape))
        assert prev_shape[1:] == term_shape[1:] and "Can't sum terms with a different size"
        out = tf.add(self.get_output(), term)

        self.outputs.append(out)
        return self

    def add_mean(self):
        """Adds a layer that averages the inputs from the previous layer"""

        prev_shape = self.get_output().get_shape()
        reduction_indices = list(range(len(prev_shape)))
        assert len(reduction_indices) > 2 and "Can't average a (batch, activation) tensor"
        reduction_indices = reduction_indices[1:-1]
        out = tf.reduce_mean(self.get_output(), reduction_indices=reduction_indices)

        self.outputs.append(out)
        return self

    def add_avg_pool(self, height=2, width=2):
        """Adds a layer that performs average pooling of the given size"""

        ksize   = [1, height, width, 1]
        strides = [1, height, width, 1]
        out     = tf.nn.avg_pool(self.get_output(), ksize, strides, 'VALID')

        self.outputs.append(out)
        return self

    def add_upscale(self, factor=2):
        """Adds a layer that upscales the output by 2x through nearest neighbor interpolation.
        See http://distill.pub/2016/deconv-checkerboard/"""

        out = dm_utils.upscale(self.get_output(), factor)

        self.outputs.append(out)
        return self

    def get_output(self):
        """Returns the output from the topmost layer of the network"""
        return self.outputs[-1]

    def get_num_parameters(self):
        """Return the number of parameters in this model"""
        num_params = 0
        for var in self.locals:
            size = 1
            for dim in var.get_shape():
                size *= int(dim)
            num_params += size
        return num_params

    def get_all_variables(self):
        """Returns all variables used in this model"""
        return list(self.locals)