Skip to content

Commit 7e67836

Browse files
committed
hardswish ok
1 parent 4cd3eb0 commit 7e67836

File tree

2 files changed

+124
-73
lines changed

2 files changed

+124
-73
lines changed

linalg/arm64/arm64simd/arm64simd_act_f32_32n.tmpl

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,15 @@
195195
.sub:
196196
b .unsupported
197197
.mul:
198-
b .unsupported
198+
fmul v0.4s, v0.4s, v8.4s
199+
fmul v1.4s, v1.4s, v9.4s
200+
fmul v2.4s, v2.4s, v10.4s
201+
fmul v3.4s, v3.4s, v11.4s
202+
fmul v4.4s, v4.4s, v12.4s
203+
fmul v5.4s, v5.4s, v13.4s
204+
fmul v6.4s, v6.4s, v14.4s
205+
fmul v7.4s, v7.4s, v15.4s
206+
b .inner_loop
199207
.min:
200208
b .unsupported
201209
.max:
@@ -260,7 +268,23 @@
260268
.fma:
261269
b .unsupported
262270
.if_pos_then_else:
263-
b .unsupported
271+
fcmge v0.4s, v0.4s, #0.0
272+
fcmge v1.4s, v1.4s, #0.0
273+
fcmge v2.4s, v2.4s, #0.0
274+
fcmge v3.4s, v3.4s, #0.0
275+
fcmge v4.4s, v4.4s, #0.0
276+
fcmge v5.4s, v5.4s, #0.0
277+
fcmge v6.4s, v6.4s, #0.0
278+
fcmge v7.4s, v7.4s, #0.0
279+
bsl v0.16b, v8.16b, v16.16b
280+
bsl v1.16b, v9.16b, v17.16b
281+
bsl v2.16b, v10.16b, v18.16b
282+
bsl v3.16b, v11.16b, v19.16b
283+
bsl v4.16b, v12.16b, v20.16b
284+
bsl v5.16b, v13.16b, v21.16b
285+
bsl v6.16b, v14.16b, v22.16b
286+
bsl v7.16b, v15.16b, v23.16b
287+
b .inner_loop
264288
.swap_b_c:
265289
b .unsupported
266290
.floor:

linalg/src/frame/activations/tests.rs

Lines changed: 98 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ pub fn run_kernel_test<TI: LADatum, K: ActivationKer<TI>>(
1616
input: &[TI],
1717
prog: &[Op<TI>],
1818
refer: impl Fn(TI) -> TI,
19-
) {
19+
) {
2020
let mut tensor =
2121
tract_data::prelude::Tensor::zero_aligned::<TI>(&[input.len()], K::alignment_bytes())
22-
.unwrap();
22+
.unwrap();
2323
tensor.as_slice_mut::<TI>().unwrap().copy_from_slice(input);
2424
let expected: Vec<TI> = input.iter().cloned().map(|x| refer(x)).collect();
2525
let expected = tract_data::prelude::tensor1(&expected);
@@ -94,7 +94,7 @@ macro_rules! act_tests {
9494
&x,
9595
&[Load(RegisterId::B, konst), Move(RegisterId::A, RegisterId::B)],
9696
|_| konst
97-
);
97+
);
9898
}
9999
}
100100

@@ -125,97 +125,124 @@ macro_rules! act_tests {
125125
run_kernel_test::<$ti, $ker>(&x, &[MinConst(alpha)], |x| x.min(alpha));
126126
}
127127
}
128-
}
129-
130-
#[test]
131-
fn max_const_zero() {
132-
if $cond {
133-
run_kernel_test::<$ti, $ker>(
134-
&vec![<$ti>::zero(); <$ker>::nr()],
135-
&[MaxConst(<$ti>::zero())],
136-
|x| x.max(<$ti>::zero()),
137-
);
138-
}
139-
}
140128

141-
#[test]
142-
fn max_const_big_alpha() {
143-
if $cond {
144-
run_kernel_test::<$ti, $ker>(
145-
&vec![<$ti>::zero(); <$ker>::nr()],
146-
&[MaxConst(7.567773e37.into())],
147-
|x| x.max(7.567773e37.into()),
148-
);
149-
}
150-
}
151-
152-
#[test]
153-
fn move_b_to_a_0() {
154-
if $cond {
155-
run_kernel_test::<$ti, $ker>(
156-
&*vec![<$ti>::zero(); <$ker>::nr()],
157-
&[Load(RegisterId::B, 1.0 as _), Move(RegisterId::A, RegisterId::B)],
158-
|_| 1.0 as _,
159-
);
129+
#[test]
130+
fn mul_prop(x in x_strat(), v in any::<$ti>()) {
131+
if $cond {
132+
run_kernel_test::<$ti, $ker>(&x, &[Load(RegisterId::B, v), Mul], |x| x * v);
133+
}
160134
}
161-
}
162135

163-
proptest::proptest! {
164136
#[test]
165-
fn relu_prop(x in x_strat()) {
137+
fn ifposte_prop(x in x_strat()) {
166138
if $cond {
167-
run_kernel_test::<$ti, $ker>(
168-
&x,
169-
&$crate::frame::activations::definitions::relu().ops,
170-
|x| x.max(<$ti>::zero())
171-
);
139+
run_kernel_test::<$ti, $ker>(&x,
140+
&[Load(RegisterId::B, 2 as _), Load(RegisterId::C, 3 as _), IfPosTE],
141+
|x| if x >= <$ti>::zero() { 2 as _ } else { 3 as _ });
142+
}
172143
}
173144
}
174145

175146
#[test]
176-
fn affine_prop(x in x_strat(), alpha in any::<$ti>(), beta in any::<$ti>()) {
147+
fn max_const_zero() {
177148
if $cond {
178149
run_kernel_test::<$ti, $ker>(
179-
&x,
180-
&$crate::frame::activations::definitions::affine(alpha, beta).ops,
181-
|x| x * alpha + beta
182-
);
150+
&vec![<$ti>::zero(); <$ker>::nr()],
151+
&[MaxConst(<$ti>::zero())],
152+
|x| x.max(<$ti>::zero()),
153+
);
183154
}
184155
}
185156

186157
#[test]
187-
fn hard_sigmoid(x in x_strat(), alpha in any::<$ti>(), beta in any::<$ti>()) {
158+
fn max_const_big_alpha() {
188159
if $cond {
189160
run_kernel_test::<$ti, $ker>(
190-
&x,
191-
&$crate::frame::activations::definitions::hard_sigmoid(alpha, beta).ops,
192-
|x| (x * alpha + beta).min(<$ti>::one()).max(<$ti>::zero())
193-
);
161+
&vec![<$ti>::zero(); <$ker>::nr()],
162+
&[MaxConst(7.567773e37.into())],
163+
|x| x.max(7.567773e37.into()),
164+
);
194165
}
195166
}
196167

197168
#[test]
198-
fn hard_swish(x in x_strat()) {
169+
fn move_b_to_a_0() {
199170
if $cond {
200171
run_kernel_test::<$ti, $ker>(
201-
&x,
202-
&$crate::frame::activations::definitions::hard_swish().ops,
203-
|x| (x * 1./6. + 0.5).min(<$ti>::one()).max(<$ti>::zero()) * x
204-
);
172+
&*vec![<$ti>::zero(); <$ker>::nr()],
173+
&[Load(RegisterId::B, 1.0 as _), Move(RegisterId::A, RegisterId::B)],
174+
|_| 1.0 as _,
175+
);
205176
}
206177
}
178+
179+
proptest::proptest! {
180+
#[test]
181+
fn relu_prop(x in x_strat()) {
182+
if $cond {
183+
run_kernel_test::<$ti, $ker>(
184+
&x,
185+
&$crate::frame::activations::definitions::relu().ops,
186+
|x| x.max(<$ti>::zero())
187+
);
188+
}
189+
}
190+
191+
#[test]
192+
fn affine_prop(x in x_strat(), alpha in any::<$ti>(), beta in any::<$ti>()) {
193+
if $cond {
194+
run_kernel_test::<$ti, $ker>(
195+
&x,
196+
&$crate::frame::activations::definitions::affine(alpha, beta).ops,
197+
|x| x * alpha + beta
198+
);
199+
}
200+
}
201+
202+
#[test]
203+
fn leaky_relu_prop(x in x_strat(), alpha in any::<$ti>()) {
204+
if $cond {
205+
run_kernel_test::<$ti, $ker>(
206+
&x,
207+
&$crate::frame::activations::definitions::leaky_relu(alpha).ops,
208+
|x| if x >= <$ti>::zero() { x } else { alpha * x }
209+
);
210+
}
211+
}
212+
213+
#[test]
214+
fn hard_sigmoid(x in x_strat(), alpha in any::<$ti>(), beta in any::<$ti>()) {
215+
if $cond {
216+
run_kernel_test::<$ti, $ker>(
217+
&x,
218+
&$crate::frame::activations::definitions::hard_sigmoid(alpha, beta).ops,
219+
|x| (x * alpha + beta).min(<$ti>::one()).max(<$ti>::zero())
220+
);
221+
}
222+
}
223+
224+
#[test]
225+
fn hard_swish(x in x_strat()) {
226+
if $cond {
227+
run_kernel_test::<$ti, $ker>(
228+
&x,
229+
&$crate::frame::activations::definitions::hard_swish().ops,
230+
|x| (x * 1./6. + 0.5).min(<$ti>::one()).max(<$ti>::zero()) * x
231+
);
232+
}
233+
}
234+
}
235+
/*
236+
prop_act_e2e!($cond, $ti, $ker, affine(alpha, beta));
237+
prop_act_e2e!($cond, $ti, $ker, leaky_relu(alpha));
238+
prop_act_e2e!($cond, $ti, $ker, threshold_relu(alpha));
239+
prop_act_e2e!($cond, $ti, $ker, softsign());
240+
prop_act_e2e!($cond, $ti, $ker, hardswish());
241+
/*
242+
prop_activation!($cond, $ti, $ker, sigmoid());
243+
prop_activation!($cond, $ti, $ker, exp2f());
244+
*/
245+
*/
207246
}
208-
/*
209-
prop_act_e2e!($cond, $ti, $ker, affine(alpha, beta));
210-
prop_act_e2e!($cond, $ti, $ker, leaky_relu(alpha));
211-
prop_act_e2e!($cond, $ti, $ker, threshold_relu(alpha));
212-
prop_act_e2e!($cond, $ti, $ker, softsign());
213-
prop_act_e2e!($cond, $ti, $ker, hardswish());
214-
/*
215-
prop_activation!($cond, $ti, $ker, sigmoid());
216-
prop_activation!($cond, $ti, $ker, exp2f());
217-
*/
218-
*/
219-
}
220-
};
221-
}
247+
};
248+
}

0 commit comments

Comments
 (0)