@@ -68,6 +68,9 @@ def _gen_eqs(self):
6868
6969 input_function = None
7070
71+ for layer in self ._layers :
72+ eqs .append (Eq (layer .result , 0 ))
73+
7174 for layer in self ._layers :
7275 if input_function is not None :
7376 dims = input_function .dimensions
@@ -85,6 +88,19 @@ def _gen_backprop_eqs(self):
8588 eqs = []
8689 args = []
8790
91+ for i in range (len (self ._layers )):
92+ layer = self ._layers [i ]
93+
94+ if layer .kernel_gradients is not None :
95+ eqs .append (Eq (layer .kernel_gradients , 0 ))
96+
97+ if layer .bias_gradients is not None :
98+ eqs .append (Eq (layer .bias_gradients , 0 ))
99+
100+ if layer .result_gradients is not None \
101+ and i < len (self ._layers ) - 1 :
102+ eqs .append (Eq (layer .result_gradients , 0 ))
103+
88104 for i in range (len (self ._layers ) - 1 , - 1 , - 1 ):
89105 if i < len (self ._layers ) - 1 :
90106 prev_layer = self ._layers [i + 1 ]
@@ -102,6 +118,17 @@ def _gen_backprop_eqs(self):
102118 args += layer_args
103119 eqs += layer_eqs
104120
121+ batch_size = self ._layers [- 1 ].result .shape [1 ]
122+
123+ for layer in self ._layers :
124+ if layer .kernel_gradients is not None :
125+ eqs .append (Eq (layer .kernel_gradients ,
126+ layer .kernel_gradients / batch_size ))
127+
128+ if layer .bias_gradients is not None :
129+ eqs .append (Eq (layer .bias_gradients ,
130+ layer .bias_gradients / batch_size ))
131+
105132 return (eqs , args )
106133
107134 @property
@@ -118,9 +145,6 @@ def forward(self, input_data):
118145 input_data : np.ndarray
119146 Input data for the network.
120147 """
121- for layer in self ._layers :
122- layer .result .data [:] = 0
123-
124148 self ._layers [0 ].input .data [:] = input_data
125149 self ._forward_operator .apply (** self ._forward_arg_dict )
126150 return self ._layers [- 1 ].result .data
@@ -154,29 +178,10 @@ def backward(self, expected, loss_gradient_func, pytorch_optimizer=None):
154178
155179 The default value is None.
156180 """
157- for layer in self ._layers :
158- if layer .kernel_gradients is not None :
159- layer .kernel_gradients .data [:] = 0
160-
161- if layer .bias_gradients is not None :
162- layer .bias_gradients .data [:] = 0
163-
164- if layer .result_gradients is not None :
165- layer .result_gradients .data [:] = 0
166-
167- batch_size = self ._layers [- 1 ].result .shape [1 ]
168-
169181 self ._layers [- 1 ].result_gradients .data [:] = \
170182 np .transpose (np .array (loss_gradient_func (self ._layers [- 1 ],
171183 expected )))
172184 self ._backward_operator .apply (** self ._backward_arg_dict )
173185
174- for layer in self ._layers :
175- if layer .kernel_gradients is not None :
176- layer .kernel_gradients .data [:] /= batch_size
177-
178- if layer .bias_gradients is not None :
179- layer .bias_gradients .data [:] /= batch_size
180-
181186 if pytorch_optimizer is not None :
182187 pytorch_optimizer .step ()
0 commit comments