diff --git a/README.md b/README.md index eed8d31..278924d 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ a VGG16 convolutional neural network and all kind of networks based on the layer Currently the following layers are supported: * Convolution2D with padding valid/same and strides +* SeparableConvolution2D with padding valid/same and strides * MaxPooling2D with strides * Dense * Softmax diff --git a/applications/daimler/compiler_test.py b/applications/daimler/compiler_test.py index 9adc22c..197c109 100644 --- a/applications/daimler/compiler_test.py +++ b/applications/daimler/compiler_test.py @@ -8,6 +8,11 @@ def test_daimler(self): generate("db", {"x": 18, "y": 36}, "img.db") train("img.db", "model.h5") compile("model.h5", ".", "img.db") + + def test_daimler_separable(self): + generate("db", {"x": 18, "y": 36}, "img.db") + train("img.db", "model_separable.h5", use_separable=True) + compile("model_separable.h5", ".", "img.db") if __name__ == "__main__": t = Tests() diff --git a/applications/daimler/train.py b/applications/daimler/train.py index 32fdcda..389fdba 100755 --- a/applications/daimler/train.py +++ b/applications/daimler/train.py @@ -12,45 +12,41 @@ print("PlaidML not found, using Tensorflow") USE_PLAIDML = False import argparse -from keras.layers import Flatten, MaxPooling2D, Convolution2D, Dropout, Dense +from keras.layers import Flatten, MaxPooling2D, Convolution2D, SeparableConvolution2D, Dropout, Dense from keras.models import Sequential from applications.daimler.loader import load_imdb -def train(imgdb_path, model_path): +def create_conv_model(shape, ConvolutionType=Convolution2D): + conv_model = Sequential() + conv_model.add(ConvolutionType(8, (3, 3), input_shape=shape, + activation='relu', padding='same')) + conv_model.add(MaxPooling2D(pool_size=(2, 2))) + conv_model.add(ConvolutionType(24, (3, 3), padding='same', activation='relu')) + conv_model.add(MaxPooling2D(pool_size=(2, 2))) + conv_model.add(ConvolutionType(48, (4, 3), padding='same', activation='relu')) + conv_model.add(MaxPooling2D(pool_size=(2, 2))) + conv_model.add(Dropout(0.4)) + conv_model.add(Flatten()) + conv_model.add(Dense(2, activation='softmax')) + return conv_model + +def train(imgdb_path, model_path, use_separable=False): imdb = load_imdb(imgdb_path) x = imdb['images'] y = imdb['y'] - usual_model = Sequential() - usual_model.add(Convolution2D(4, (3, 3), input_shape=(x.shape[1], x.shape[2], 1), - activation='relu', padding='same')) - usual_model.add(MaxPooling2D(pool_size=(2, 2))) - usual_model.add(Convolution2D(16, (3, 3), padding='same', activation='relu')) - usual_model.add(MaxPooling2D(pool_size=(2, 2))) - usual_model.add(Convolution2D(32, (3, 3), padding='same', activation='relu',)) - usual_model.add(MaxPooling2D(pool_size=(4, 2))) - usual_model.add(Dropout(0.4)) - usual_model.add(Convolution2D(2, (2, 2), activation='softmax')) - usual_model.add(Flatten()) - - dense_model = Sequential() - dense_model.add(Convolution2D(4, (3, 3), input_shape=(x.shape[1], x.shape[2], 1), - activation='relu', padding='same')) - dense_model.add(MaxPooling2D(pool_size=(2, 2))) - dense_model.add(Convolution2D(16, (3, 3), padding='same', activation='relu')) - dense_model.add(MaxPooling2D(pool_size=(2, 2))) - dense_model.add(Convolution2D(32, (3, 3), padding='same', activation='relu')) - dense_model.add(MaxPooling2D(pool_size=(2, 2))) - dense_model.add(Dropout(0.4)) - dense_model.add(Flatten()) - dense_model.add(Dense(2, activation='softmax')) + input_shape = (x.shape[1], x.shape[2], 1) + epochs = 30 if use_separable else 10 # Select the current model here - model = dense_model + if use_separable: + model = create_conv_model(input_shape, SeparableConvolution2D) + else: + model = create_conv_model(input_shape, Convolution2D) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) - model.fit(x, y, batch_size=1000, epochs=10, verbose=1, validation_split=0.05) + model.fit(x, y, batch_size=1000, epochs=epochs, verbose=1, validation_split=0.05) model.save(model_path) if __name__ == "__main__": @@ -61,11 +57,14 @@ def train(imgdb_path, model_path): 'Default is img.db in current folder.') parser.add_argument('-m', '--model-path', dest='model_path', help='Store the trained model using this path. Default is model.h5.') + parser.add_argument('-s', '--separable', dest='use_separable', action='store_true', + help='Whether to use separable convolution instead of regular ones. Default is regular (Conv2D).') args = parser.parse_args() imgdb_path = "img.db" model_path = "model.h5" + use_separable = False if args.imgdb_path is not None: imgdb_path = args.imgdb_path @@ -73,5 +72,8 @@ def train(imgdb_path, model_path): if args.model_path is not None: model_path = args.model_path - train(imgdb_path, model_path) + if args.use_separable is not None: + use_separable = args.use_separable + + train(imgdb_path, model_path, use_separable) diff --git a/nncg/nncg.py b/nncg/nncg.py index 0b49050..197a534 100644 --- a/nncg/nncg.py +++ b/nncg/nncg.py @@ -1,5 +1,5 @@ from tensorflow.keras import backend as K -from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Flatten, \ +from tensorflow.keras.layers import SeparableConvolution2D, Convolution2D, MaxPooling2D, Flatten, \ Dropout, BatchNormalization, LeakyReLU, InputLayer, Dense import keras.layers as kl @@ -82,7 +82,9 @@ def keras_compile(self, imdb, model, code_path, identifier=None, image_mean=0, a # Read the Keras model layer by layer and add it to the graph for i, layer in enumerate(model.layers): - if type(layer) == Convolution2D or type(layer) == kl.convolutional.Conv2D: + if type(layer) == SeparableConvolution2D or type(layer) == kl.convolutional.SeparableConv2D: + cur_node = self.add_separable_conv2d(layer, cur_node) + elif type(layer) == Convolution2D or type(layer) == kl.convolutional.Conv2D: cur_node = self.add_conv2d(layer, cur_node) elif type(layer) == MaxPooling2D or type(layer) == kl.pooling.MaxPooling2D: cur_node = self.add_maxpool2d(layer, cur_node) @@ -339,6 +341,21 @@ def add_conv2d(self, layer: Convolution2D, prev_node) -> Node: cur_node = self.add_test_node(cur_node, layer) return cur_node + def add_separable_conv2d(self, layer, prev_node): + w1 = K.eval(layer.weights[0]) + w2 = K.eval(layer.weights[1]) + b = K.eval(layer.bias) + strides = layer.strides + padding = layer.padding + activation = layer.activation + cur_node = SeparableConv2D_Depthwise_Node(w1, np.zeros(b.shape, dtype='float32'), strides, padding, prev_node) + cur_node = SeparableConv2D_Pointwise_Node(w2, b, (1, 1), 'valid', cur_node) + cur_node = self.add_activation(activation, cur_node) + if self.testing != 0: + cur_node = self.add_test_node(cur_node, layer) + return cur_node + + def write_c(self, path): """ Write the global graph as C code. diff --git a/nncg/nodes/cnn.py b/nncg/nodes/cnn.py index 926738b..ee5c524 100644 --- a/nncg/nodes/cnn.py +++ b/nncg/nodes/cnn.py @@ -156,6 +156,265 @@ def quantize(self, x_scale): self.b = (self.b / self.scale / x_scale).astype('int16') #self.out_var.type = 'int' +class SeparableConv2D_Depthwise_Node(Node): + """ + A Node representing a depthwise convolution. This is an abstract node to represent the meta + information given by the Keras SeparableConv2D. It is thus in HWC format. It must be lowered to be writeable + as C code but should not be removed from graph to provide the meta information. + """ + quantized = False + in_var: Variable + out_var: Variable + access_pattern: List[int] + + def __init__(self, w: np.ndarray, b: np.ndarray, stride: tuple, padding: str, prev_node): + """ + Initialize the SeparableConv2D_Depthwise_Node. + :param w: Weights. Shape must be: kernel height, kernel width, channels in, channels out (number of filter) + as NumPy ndarray. Thus the weight from a Keras SeparableConv2D can be passed without prior conversion. + :param b: Bias. NumPy ndarray with length "channels out" + :param stride: Tuple of 2. + :param padding: Like in TensorFlow 'same' or 'valid' + :param prev_node: The previous node. + """ + self.in_var = prev_node.out_var + x = self.in_var + super().__init__(prev_node) + self.in_dim = prev_node.out_dim + self.w = w + self.b = b + self.stride = stride + self.padding = padding + self.H, self.W, self.C_IN = x.dim + self.KH, self.KW, _, self.C_OUT = w.shape + self.SH, self.SW = stride + + if padding == 'valid': + H_OUT = int(np.ceil((self.H - self.KH + 1) / self.SH)) + W_OUT = int(np.ceil((self.W - self.KW + 1) / self.SW)) + self.pad_top = self.pad_bottom = self.pad_left = self.pad_right = 0 + elif padding == 'same': + H_OUT = int(np.ceil(float(self.H) / float(self.SH))) + W_OUT = int(np.ceil(float(self.W) / float(self.SW))) + self.pad_along_height = max((H_OUT - 1) * self.SH + self.KH - self.H, 0) + self.pad_along_width = max((W_OUT - 1) * self.SW + self.KW - self.W, 0) + self.pad_top = int(self.pad_along_height // 2) + self.pad_bottom = int(self.pad_along_height - self.pad_top) + self.pad_left = int(self.pad_along_width // 2) + self.pad_right = int(self.pad_along_width - self.pad_left) + else: + raise Exception("Unknown padding.") + self.in_var.change_padding([[self.pad_top, self.pad_bottom], + [self.pad_left, self.pad_right], + [0, 0]]) + self.out_dim = (H_OUT, W_OUT, self.C_IN) + self.out_var = Allocation.allocate_var('float', 'x', self.out_dim) + + def lowering(self): + """ + Create the loops required to express this node in ANSI C code without SIMD and connect this node with + the new nodes via 'content' edge. This loop will stay in graph to provide meta information. + :return: None. + """ + + # Create loops for settings the bias. + b_var = Allocation.allocate_var(self.b.dtype, 'b', self.b.shape, init_data=self.b) + out_var_idx = IndexedVariable(self.out_var) + b_var_idx = IndexedVariable(b_var) + + # Create the loops using a descriptor. + bias_loop_descr = [ + [0, self.out_dim[0], 1], + [0, self.out_dim[1], 1], + [0, self.out_dim[2], 1] + ] + bias_loops = LoopNode.create_loops_by_description(bias_loop_descr) + b_h_loop = bias_loops[0] + b_w_loop = bias_loops[1] + b_c_loop = bias_loops[2] + + set_bias = AssignmentNode(out_var_idx, b_var_idx) + b_c_loop.add_edge('content', set_bias) + out_var_idx.set_indices([b_h_loop.get_node('var'), b_w_loop.get_node('var'), b_c_loop.get_node('var')]) + b_var_idx.set_indices([b_c_loop.get_node('var')]) + + # Create the loops for convolution, again with descriptors + conv_loop_descr = [ + [0, self.out_dim[0] * self.SH, self.stride[0]], + [0, self.out_dim[1] * self.SW, self.stride[1]], + [0, self.KH, 1], + [0, self.KW, 1], + [0, self.C_IN, 1] + ] + conv_loops = LoopNode.create_loops_by_description(conv_loop_descr) + h_loop = conv_loops[0] + w_loop = conv_loops[1] + kh_loop = conv_loops[2] + kw_loop = conv_loops[3] + c_in_loop = conv_loops[4] + + b_h_loop.add_edge('next', h_loop) + + w_var = Allocation.allocate_var(self.w.dtype, 'w', self.w.shape, init_data=self.w) + out_var_idx = IndexedVariable(self.out_var) + in_var_idx = IndexedVariable(self.in_var, False) + w_var_idx = IndexedVariable(w_var, False) + + # Indices of IndexedVariables must respect the stride + exp1 = Expression('{var} / {stride0}', + var=h_loop.get_node('var'), + stride0=Constant(self.stride[0])) + exp2 = Expression('{var} / {stride1}', + var=w_loop.get_node('var'), + stride1=Constant(self.stride[1])) + # And access to the image start at the upper left corner. But we have to add the current offset of the filter. + exp3 = Expression('{var1} + {var2}', + var1=h_loop.get_node('var'), + var2=kh_loop.get_node('var')) + exp4 = Expression('{var1} + {var2}', + var1=w_loop.get_node('var'), + var2=kw_loop.get_node('var')) + out_var_idx.set_indices([exp1, exp2, c_in_loop.get_node('var')]) + in_var_idx.set_indices([exp3, exp4, c_in_loop.get_node('var')]) + w_var_idx.set_indices( + [kh_loop.get_node('var'), kw_loop.get_node('var'), c_in_loop.get_node('var'), Constant(0)] + ) + mac_node = MACNode(out_var_idx, w_var_idx, in_var_idx) + c_in_loop.add_edge('content', mac_node) + + # These variables must be declared (partially with initial data) at the beginning of the function + self.var_decls.append(self.out_var) + self.const_decls.append(w_var) + self.const_decls.append(b_var) + + # Don't remove this node, just put everything as content to this node. + self.add_edge('content', b_h_loop) + + def quantize(self, x_scale): + """ + Quantize this node. + :param x_scale: A factor previously determined by quantize_scale() for scaling the weights. Used for bias here. + :return: None. + """ + min = np.min([np.min(self.w), np.min(self.b)]) + max = np.max([np.max(self.w), np.max(self.b)]) + self.scale = QuantizedNode.quantize_scale(min, max, 'int8') + self.w = (self.w / self.scale).astype('int8') + self.b = (self.b / self.scale / x_scale).astype('int16') + +class SeparableConv2D_Pointwise_Node(Node): + """ + A Node representing a pointwise convolution. This is an abstract node to represent the meta + information given by the Keras Conv2D. It is thus in HWC format. It must be lowered to be writeable + as C code but should not be removed from graph to provide the meta information. + """ + quantized = False + in_var: Variable + out_var: Variable + access_pattern: List[int] + + def __init__(self, w: np.ndarray, b: np.ndarray, stride: tuple, padding: str, prev_node): + """ + Initialize the SeparableConv2D_Pointwise_Node. + :param w: Weights. Shape must be: kernel height, kernel width, channels in, channels out (number of filter) + as NumPy ndarray. Thus the weight from a Keras Conv2D can be passed without prior conversion. + :param b: Bias. NumPy ndarray with length "channels out" + :param stride: Tuple of 2. + :param padding: Like in TensorFlow 'same' or 'valid' + :param prev_node: The previous node. + """ + self.in_var = prev_node.out_var + x = self.in_var + super().__init__(prev_node) + self.in_dim = prev_node.out_dim + self.w = w + self.b = b + self.stride = stride + self.padding = padding + self.H, self.W, self.C_IN = x.dim + self.KH, self.KW, _, self.C_OUT = w.shape + self.SH, self.SW = stride + + H_OUT = int(np.ceil((self.H - self.KH + 1) / self.SH)) + W_OUT = int(np.ceil((self.W - self.KW + 1) / self.SW)) + self.pad_top = self.pad_bottom = self.pad_left = self.pad_right = 0 + + self.in_var.change_padding([[self.pad_top, self.pad_bottom], + [self.pad_left, self.pad_right], + [0, 0]]) + self.out_dim = (H_OUT, W_OUT, self.C_OUT) + self.out_var = Allocation.allocate_var('float', 'x', self.out_dim) + + def lowering(self): + + # Create loops for settings the bias. + b_var = Allocation.allocate_var(self.b.dtype, 'b', self.b.shape, init_data=self.b) + out_var_idx = IndexedVariable(self.out_var) + b_var_idx = IndexedVariable(b_var) + + # Create the loops using a descriptor. + bias_loop_descr = [ + [0, self.out_dim[0], 1], + [0, self.out_dim[1], 1], + [0, self.out_dim[2], 1] + ] + bias_loops = LoopNode.create_loops_by_description(bias_loop_descr) + b_h_loop = bias_loops[0] + b_w_loop = bias_loops[1] + b_c_loop = bias_loops[2] + + set_bias = AssignmentNode(out_var_idx, b_var_idx) + b_c_loop.add_edge('content', set_bias) + out_var_idx.set_indices([b_h_loop.get_node('var'), b_w_loop.get_node('var'), b_c_loop.get_node('var')]) + b_var_idx.set_indices([b_c_loop.get_node('var')]) + + # Create the loops for convolution, again with descriptors + conv_loop_descr = [ + [0, self.out_dim[0] * self.SH, self.stride[0]], + [0, self.out_dim[1] * self.SW, self.stride[1]], + [0, self.C_IN, 1], + [0, self.C_OUT, 1] + ] + conv_loops = LoopNode.create_loops_by_description(conv_loop_descr) + h_loop = conv_loops[0] + w_loop = conv_loops[1] + c_in_loop = conv_loops[2] + c_out_loop = conv_loops[3] + + b_h_loop.add_edge('next', h_loop) + + w_var = Allocation.allocate_var(self.w.dtype, 'w', self.w.shape, init_data=self.w) + out_var_idx = IndexedVariable(self.out_var) + in_var_idx = IndexedVariable(self.in_var, False) + w_var_idx = IndexedVariable(w_var, False) + + out_var_idx.set_indices([h_loop.get_node('var'), w_loop.get_node('var'), c_out_loop.get_node('var')]) + w_var_idx.set_indices( + [Constant(0), Constant(0), c_in_loop.get_node('var'), c_out_loop.get_node('var')] + ) + in_var_idx.set_indices([h_loop.get_node('var'), w_loop.get_node('var'), c_in_loop.get_node('var')]) + mac_node = MACNode(out_var_idx, w_var_idx, in_var_idx) + c_out_loop.add_edge('content', mac_node) + + # These variables must be declared (partially with initial data) at the beginning of the function + self.var_decls.append(self.out_var) + self.const_decls.append(w_var) + self.const_decls.append(b_var) + + # Don't remove this node, just put everything as content to this node. + self.add_edge('content', b_h_loop) + + def quantize(self, x_scale): + """ + Quantize this node. + :param x_scale: A factor previously determined by quantize_scale() for scaling the weights. Used for bias here. + :return: None. + """ + min = np.min([np.min(self.w), np.min(self.b)]) + max = np.max([np.max(self.w), np.max(self.b)]) + self.scale = QuantizedNode.quantize_scale(min, max, 'int8') + self.w = (self.w / self.scale).astype('int8') + self.b = (self.b / self.scale / x_scale).astype('int16') class LeakyReLUNode(Node): """