diff --git a/README.md b/README.md
index eed8d31..278924d 100644
--- a/README.md
+++ b/README.md
@@ -45,6 +45,7 @@ a VGG16 convolutional neural network and all kind of networks based on the layer
 Currently the following layers are supported:
 
 * Convolution2D with padding valid/same and strides
+* SeparableConvolution2D with padding valid/same and strides
 * MaxPooling2D with strides
 * Dense
 * Softmax
diff --git a/applications/daimler/compiler_test.py b/applications/daimler/compiler_test.py
index 9adc22c..197c109 100644
--- a/applications/daimler/compiler_test.py
+++ b/applications/daimler/compiler_test.py
@@ -8,6 +8,11 @@ def test_daimler(self):
         generate("db", {"x": 18, "y": 36}, "img.db")
         train("img.db", "model.h5")
         compile("model.h5", ".", "img.db")
+    
+    def test_daimler_separable(self):
+        generate("db", {"x": 18, "y": 36}, "img.db")
+        train("img.db", "model_separable.h5", use_separable=True)
+        compile("model_separable.h5", ".", "img.db")
 
 if __name__ == "__main__":
     t = Tests()
diff --git a/applications/daimler/train.py b/applications/daimler/train.py
index 32fdcda..389fdba 100755
--- a/applications/daimler/train.py
+++ b/applications/daimler/train.py
@@ -12,45 +12,41 @@
         print("PlaidML not found, using Tensorflow")
         USE_PLAIDML = False
 import argparse
-from keras.layers import Flatten, MaxPooling2D, Convolution2D, Dropout, Dense
+from keras.layers import Flatten, MaxPooling2D, Convolution2D, SeparableConvolution2D, Dropout, Dense
 from keras.models import Sequential
 from applications.daimler.loader import load_imdb
 
-def train(imgdb_path, model_path):
+def create_conv_model(shape, ConvolutionType=Convolution2D):
+    conv_model = Sequential()
+    conv_model.add(ConvolutionType(8, (3, 3), input_shape=shape,
+                                activation='relu', padding='same'))
+    conv_model.add(MaxPooling2D(pool_size=(2, 2)))
+    conv_model.add(ConvolutionType(24, (3, 3), padding='same', activation='relu'))
+    conv_model.add(MaxPooling2D(pool_size=(2, 2)))
+    conv_model.add(ConvolutionType(48, (4, 3), padding='same', activation='relu'))
+    conv_model.add(MaxPooling2D(pool_size=(2, 2)))
+    conv_model.add(Dropout(0.4))
+    conv_model.add(Flatten())
+    conv_model.add(Dense(2, activation='softmax'))
+    return conv_model
+
+def train(imgdb_path, model_path, use_separable=False):
     imdb = load_imdb(imgdb_path)
     x = imdb['images']
     y = imdb['y']
 
-    usual_model = Sequential()
-    usual_model.add(Convolution2D(4, (3, 3), input_shape=(x.shape[1], x.shape[2], 1),
-                                activation='relu', padding='same'))
-    usual_model.add(MaxPooling2D(pool_size=(2, 2)))
-    usual_model.add(Convolution2D(16, (3, 3), padding='same', activation='relu'))
-    usual_model.add(MaxPooling2D(pool_size=(2, 2)))
-    usual_model.add(Convolution2D(32, (3, 3), padding='same', activation='relu',))
-    usual_model.add(MaxPooling2D(pool_size=(4, 2)))
-    usual_model.add(Dropout(0.4))
-    usual_model.add(Convolution2D(2, (2, 2), activation='softmax'))
-    usual_model.add(Flatten())
-
-    dense_model = Sequential()
-    dense_model.add(Convolution2D(4, (3, 3), input_shape=(x.shape[1], x.shape[2], 1),
-                                activation='relu', padding='same'))
-    dense_model.add(MaxPooling2D(pool_size=(2, 2)))
-    dense_model.add(Convolution2D(16, (3, 3), padding='same', activation='relu'))
-    dense_model.add(MaxPooling2D(pool_size=(2, 2)))
-    dense_model.add(Convolution2D(32, (3, 3), padding='same', activation='relu'))
-    dense_model.add(MaxPooling2D(pool_size=(2, 2)))
-    dense_model.add(Dropout(0.4))
-    dense_model.add(Flatten())
-    dense_model.add(Dense(2, activation='softmax'))
+    input_shape = (x.shape[1], x.shape[2], 1)
+    epochs = 30 if use_separable else 10
 
     # Select the current model here
-    model = dense_model
+    if use_separable:
+        model = create_conv_model(input_shape, SeparableConvolution2D)
+    else:
+        model = create_conv_model(input_shape, Convolution2D)
 
     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
     print(model.summary())
-    model.fit(x, y, batch_size=1000, epochs=10, verbose=1, validation_split=0.05)
+    model.fit(x, y, batch_size=1000, epochs=epochs, verbose=1, validation_split=0.05)
     model.save(model_path)
 
 if __name__ == "__main__":
@@ -61,11 +57,14 @@ def train(imgdb_path, model_path):
                             'Default is img.db in current folder.')
     parser.add_argument('-m', '--model-path', dest='model_path',
                         help='Store the trained model using this path. Default is model.h5.')
+    parser.add_argument('-s', '--separable', dest='use_separable', action='store_true',
+                        help='Whether to use separable convolution instead of regular ones. Default is regular (Conv2D).')
 
     args = parser.parse_args()
 
     imgdb_path = "img.db"
     model_path = "model.h5"
+    use_separable = False
 
     if args.imgdb_path is not None:
         imgdb_path = args.imgdb_path
@@ -73,5 +72,8 @@ def train(imgdb_path, model_path):
     if args.model_path is not None:
         model_path = args.model_path
     
-    train(imgdb_path, model_path)
+    if args.use_separable is not None:
+        use_separable = args.use_separable
+    
+    train(imgdb_path, model_path, use_separable)
 
diff --git a/nncg/nncg.py b/nncg/nncg.py
index 0b49050..197a534 100644
--- a/nncg/nncg.py
+++ b/nncg/nncg.py
@@ -1,5 +1,5 @@
 from tensorflow.keras import backend as K
-from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Flatten, \
+from tensorflow.keras.layers import SeparableConvolution2D, Convolution2D, MaxPooling2D, Flatten, \
     Dropout, BatchNormalization, LeakyReLU, InputLayer, Dense
 import keras.layers as kl
 
@@ -82,7 +82,9 @@ def keras_compile(self, imdb, model, code_path, identifier=None, image_mean=0, a
         # Read the Keras model layer by layer and add it to the graph
 
         for i, layer in enumerate(model.layers):
-            if type(layer) == Convolution2D or type(layer) == kl.convolutional.Conv2D:
+            if type(layer) == SeparableConvolution2D or type(layer) == kl.convolutional.SeparableConv2D:
+                cur_node = self.add_separable_conv2d(layer, cur_node)
+            elif type(layer) == Convolution2D or type(layer) == kl.convolutional.Conv2D:
                 cur_node = self.add_conv2d(layer, cur_node)
             elif type(layer) == MaxPooling2D or type(layer) == kl.pooling.MaxPooling2D:
                 cur_node = self.add_maxpool2d(layer, cur_node)
@@ -339,6 +341,21 @@ def add_conv2d(self, layer: Convolution2D, prev_node) -> Node:
             cur_node = self.add_test_node(cur_node, layer)
         return cur_node
 
+    def add_separable_conv2d(self, layer, prev_node):
+        w1 = K.eval(layer.weights[0])
+        w2 = K.eval(layer.weights[1])
+        b = K.eval(layer.bias)
+        strides = layer.strides
+        padding = layer.padding
+        activation = layer.activation
+        cur_node = SeparableConv2D_Depthwise_Node(w1, np.zeros(b.shape, dtype='float32'), strides, padding, prev_node)
+        cur_node = SeparableConv2D_Pointwise_Node(w2, b, (1, 1), 'valid', cur_node)
+        cur_node = self.add_activation(activation, cur_node)
+        if self.testing != 0:
+            cur_node = self.add_test_node(cur_node, layer)
+        return cur_node
+
+
     def write_c(self, path):
         """
         Write the global graph as C code.
diff --git a/nncg/nodes/cnn.py b/nncg/nodes/cnn.py
index 926738b..ee5c524 100644
--- a/nncg/nodes/cnn.py
+++ b/nncg/nodes/cnn.py
@@ -156,6 +156,265 @@ def quantize(self, x_scale):
         self.b = (self.b / self.scale / x_scale).astype('int16')
         #self.out_var.type = 'int'
 
+class SeparableConv2D_Depthwise_Node(Node):
+    """
+    A Node representing a depthwise convolution. This is an abstract node to represent the meta
+    information given by the Keras SeparableConv2D. It is thus in HWC format. It must be lowered to be writeable
+    as C code but should not be removed from graph to provide the meta information.
+    """
+    quantized = False
+    in_var: Variable
+    out_var: Variable
+    access_pattern: List[int]
+
+    def __init__(self, w: np.ndarray, b: np.ndarray, stride: tuple, padding: str, prev_node):
+        """
+        Initialize the SeparableConv2D_Depthwise_Node.
+        :param w: Weights. Shape must be: kernel height, kernel width, channels in, channels out (number of filter)
+                  as NumPy ndarray. Thus the weight from a Keras SeparableConv2D can be passed without prior conversion.
+        :param b: Bias. NumPy ndarray with length "channels out"
+        :param stride: Tuple of 2.
+        :param padding: Like in TensorFlow 'same' or 'valid'
+        :param prev_node: The previous node.
+        """
+        self.in_var = prev_node.out_var
+        x = self.in_var
+        super().__init__(prev_node)
+        self.in_dim = prev_node.out_dim
+        self.w = w
+        self.b = b
+        self.stride = stride
+        self.padding = padding
+        self.H, self.W, self.C_IN = x.dim
+        self.KH, self.KW, _, self.C_OUT = w.shape
+        self.SH, self.SW = stride
+
+        if padding == 'valid':
+            H_OUT = int(np.ceil((self.H - self.KH + 1) / self.SH))
+            W_OUT = int(np.ceil((self.W - self.KW + 1) / self.SW))
+            self.pad_top = self.pad_bottom = self.pad_left = self.pad_right = 0
+        elif padding == 'same':
+            H_OUT = int(np.ceil(float(self.H) / float(self.SH)))
+            W_OUT = int(np.ceil(float(self.W) / float(self.SW)))
+            self.pad_along_height = max((H_OUT - 1) * self.SH + self.KH - self.H, 0)
+            self.pad_along_width = max((W_OUT - 1) * self.SW + self.KW - self.W, 0)
+            self.pad_top = int(self.pad_along_height // 2)
+            self.pad_bottom = int(self.pad_along_height - self.pad_top)
+            self.pad_left = int(self.pad_along_width // 2)
+            self.pad_right = int(self.pad_along_width - self.pad_left)
+        else:
+            raise Exception("Unknown padding.")
+        self.in_var.change_padding([[self.pad_top, self.pad_bottom],
+                                    [self.pad_left, self.pad_right],
+                                    [0, 0]])
+        self.out_dim = (H_OUT, W_OUT, self.C_IN)
+        self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
+
+    def lowering(self):
+        """
+        Create the loops required to express this node in ANSI C code without SIMD and connect this node with
+        the new nodes via 'content' edge. This loop will stay in graph to provide meta information.
+        :return: None.
+        """
+
+        # Create loops for settings the bias.
+        b_var = Allocation.allocate_var(self.b.dtype, 'b', self.b.shape, init_data=self.b)
+        out_var_idx = IndexedVariable(self.out_var)
+        b_var_idx = IndexedVariable(b_var)
+
+        # Create the loops using a descriptor.
+        bias_loop_descr = [
+            [0, self.out_dim[0], 1],
+            [0, self.out_dim[1], 1],
+            [0, self.out_dim[2], 1]
+        ]
+        bias_loops = LoopNode.create_loops_by_description(bias_loop_descr)
+        b_h_loop = bias_loops[0]
+        b_w_loop = bias_loops[1]
+        b_c_loop = bias_loops[2]
+
+        set_bias = AssignmentNode(out_var_idx, b_var_idx)
+        b_c_loop.add_edge('content', set_bias)
+        out_var_idx.set_indices([b_h_loop.get_node('var'), b_w_loop.get_node('var'), b_c_loop.get_node('var')])
+        b_var_idx.set_indices([b_c_loop.get_node('var')])
+
+        # Create the loops for convolution, again with descriptors
+        conv_loop_descr = [
+            [0, self.out_dim[0] * self.SH, self.stride[0]],
+            [0, self.out_dim[1] * self.SW, self.stride[1]],
+            [0, self.KH, 1],
+            [0, self.KW, 1],
+            [0, self.C_IN, 1]
+        ]
+        conv_loops = LoopNode.create_loops_by_description(conv_loop_descr)
+        h_loop = conv_loops[0]
+        w_loop = conv_loops[1]
+        kh_loop = conv_loops[2]
+        kw_loop = conv_loops[3]
+        c_in_loop = conv_loops[4]
+
+        b_h_loop.add_edge('next', h_loop)
+
+        w_var = Allocation.allocate_var(self.w.dtype, 'w', self.w.shape, init_data=self.w)
+        out_var_idx = IndexedVariable(self.out_var)
+        in_var_idx = IndexedVariable(self.in_var, False)
+        w_var_idx = IndexedVariable(w_var, False)
+
+        # Indices of IndexedVariables must respect the stride
+        exp1 = Expression('{var} / {stride0}',
+                          var=h_loop.get_node('var'),
+                          stride0=Constant(self.stride[0]))
+        exp2 = Expression('{var} / {stride1}',
+                          var=w_loop.get_node('var'),
+                          stride1=Constant(self.stride[1]))
+        # And access to the image start at the upper left corner. But we have to add the current offset of the filter.
+        exp3 = Expression('{var1} + {var2}',
+                          var1=h_loop.get_node('var'),
+                          var2=kh_loop.get_node('var'))
+        exp4 = Expression('{var1} + {var2}',
+                          var1=w_loop.get_node('var'),
+                          var2=kw_loop.get_node('var'))
+        out_var_idx.set_indices([exp1, exp2, c_in_loop.get_node('var')])
+        in_var_idx.set_indices([exp3, exp4, c_in_loop.get_node('var')])
+        w_var_idx.set_indices(
+            [kh_loop.get_node('var'), kw_loop.get_node('var'), c_in_loop.get_node('var'), Constant(0)]
+        )
+        mac_node = MACNode(out_var_idx, w_var_idx, in_var_idx)
+        c_in_loop.add_edge('content', mac_node)
+
+        # These variables must be declared (partially with initial data) at the beginning of the function
+        self.var_decls.append(self.out_var)
+        self.const_decls.append(w_var)
+        self.const_decls.append(b_var)
+
+        # Don't remove this node, just put everything as content to this node.
+        self.add_edge('content', b_h_loop)
+    
+    def quantize(self, x_scale):
+        """
+        Quantize this node.
+        :param x_scale: A factor previously determined by quantize_scale() for scaling the weights. Used for bias here.
+        :return: None.
+        """
+        min = np.min([np.min(self.w), np.min(self.b)])
+        max = np.max([np.max(self.w), np.max(self.b)])
+        self.scale = QuantizedNode.quantize_scale(min, max, 'int8')
+        self.w = (self.w / self.scale).astype('int8')
+        self.b = (self.b / self.scale / x_scale).astype('int16')
+
+class SeparableConv2D_Pointwise_Node(Node):
+    """
+    A Node representing a pointwise convolution. This is an abstract node to represent the meta
+    information given by the Keras Conv2D. It is thus in HWC format. It must be lowered to be writeable
+    as C code but should not be removed from graph to provide the meta information.
+    """
+    quantized = False
+    in_var: Variable
+    out_var: Variable
+    access_pattern: List[int]
+
+    def __init__(self, w: np.ndarray, b: np.ndarray, stride: tuple, padding: str, prev_node):
+        """
+        Initialize the SeparableConv2D_Pointwise_Node.
+        :param w: Weights. Shape must be: kernel height, kernel width, channels in, channels out (number of filter)
+                  as NumPy ndarray. Thus the weight from a Keras Conv2D can be passed without prior conversion.
+        :param b: Bias. NumPy ndarray with length "channels out"
+        :param stride: Tuple of 2.
+        :param padding: Like in TensorFlow 'same' or 'valid'
+        :param prev_node: The previous node.
+        """
+        self.in_var = prev_node.out_var
+        x = self.in_var
+        super().__init__(prev_node)
+        self.in_dim = prev_node.out_dim
+        self.w = w
+        self.b = b
+        self.stride = stride
+        self.padding = padding
+        self.H, self.W, self.C_IN = x.dim
+        self.KH, self.KW, _, self.C_OUT = w.shape
+        self.SH, self.SW = stride
+
+        H_OUT = int(np.ceil((self.H - self.KH + 1) / self.SH))
+        W_OUT = int(np.ceil((self.W - self.KW + 1) / self.SW))
+        self.pad_top = self.pad_bottom = self.pad_left = self.pad_right = 0
+
+        self.in_var.change_padding([[self.pad_top, self.pad_bottom],
+                                    [self.pad_left, self.pad_right],
+                                    [0, 0]])
+        self.out_dim = (H_OUT, W_OUT, self.C_OUT)
+        self.out_var = Allocation.allocate_var('float', 'x', self.out_dim)
+
+    def lowering(self):
+
+        # Create loops for settings the bias.
+        b_var = Allocation.allocate_var(self.b.dtype, 'b', self.b.shape, init_data=self.b)
+        out_var_idx = IndexedVariable(self.out_var)
+        b_var_idx = IndexedVariable(b_var)
+
+        # Create the loops using a descriptor.
+        bias_loop_descr = [
+            [0, self.out_dim[0], 1],
+            [0, self.out_dim[1], 1],
+            [0, self.out_dim[2], 1]
+        ]
+        bias_loops = LoopNode.create_loops_by_description(bias_loop_descr)
+        b_h_loop = bias_loops[0]
+        b_w_loop = bias_loops[1]
+        b_c_loop = bias_loops[2]
+
+        set_bias = AssignmentNode(out_var_idx, b_var_idx)
+        b_c_loop.add_edge('content', set_bias)
+        out_var_idx.set_indices([b_h_loop.get_node('var'), b_w_loop.get_node('var'), b_c_loop.get_node('var')])
+        b_var_idx.set_indices([b_c_loop.get_node('var')])
+
+        # Create the loops for convolution, again with descriptors
+        conv_loop_descr = [
+            [0, self.out_dim[0] * self.SH, self.stride[0]],
+            [0, self.out_dim[1] * self.SW, self.stride[1]],
+            [0, self.C_IN, 1],
+            [0, self.C_OUT, 1]
+        ]
+        conv_loops = LoopNode.create_loops_by_description(conv_loop_descr)
+        h_loop = conv_loops[0]
+        w_loop = conv_loops[1]
+        c_in_loop = conv_loops[2]
+        c_out_loop = conv_loops[3]
+
+        b_h_loop.add_edge('next', h_loop)
+
+        w_var = Allocation.allocate_var(self.w.dtype, 'w', self.w.shape, init_data=self.w)
+        out_var_idx = IndexedVariable(self.out_var)
+        in_var_idx = IndexedVariable(self.in_var, False)
+        w_var_idx = IndexedVariable(w_var, False)
+
+        out_var_idx.set_indices([h_loop.get_node('var'), w_loop.get_node('var'), c_out_loop.get_node('var')])
+        w_var_idx.set_indices(
+            [Constant(0), Constant(0), c_in_loop.get_node('var'), c_out_loop.get_node('var')]
+        )
+        in_var_idx.set_indices([h_loop.get_node('var'), w_loop.get_node('var'), c_in_loop.get_node('var')])
+        mac_node = MACNode(out_var_idx, w_var_idx, in_var_idx)
+        c_out_loop.add_edge('content', mac_node)
+
+        # These variables must be declared (partially with initial data) at the beginning of the function
+        self.var_decls.append(self.out_var)
+        self.const_decls.append(w_var)
+        self.const_decls.append(b_var)
+
+        # Don't remove this node, just put everything as content to this node.
+        self.add_edge('content', b_h_loop)
+
+    def quantize(self, x_scale):
+        """
+        Quantize this node.
+        :param x_scale: A factor previously determined by quantize_scale() for scaling the weights. Used for bias here.
+        :return: None.
+        """
+        min = np.min([np.min(self.w), np.min(self.b)])
+        max = np.max([np.max(self.w), np.max(self.b)])
+        self.scale = QuantizedNode.quantize_scale(min, max, 'int8')
+        self.w = (self.w / self.scale).astype('int8')
+        self.b = (self.b / self.scale / x_scale).astype('int16')
 
 class LeakyReLUNode(Node):
     """