diff --git a/bench/vunary.cc b/bench/vunary.cc index 6dd1715b6f3..d5aa0ecb6bb 100644 --- a/bench/vunary.cc +++ b/bench/vunary.cc @@ -300,6 +300,7 @@ void vlrelu(benchmark::State& state, uint64_t arch_flags, ->UseRealTime(); #include "src/bf16-f32-vcvt/bf16-f32-vcvt.inc" #include "src/bf16-qs8-vcvt/bf16-qs8-vcvt.inc" +#include "src/bf16-qu8-vcvt/bf16-qu8-vcvt.inc" #include "src/f32-bf16-vcvt/f32-bf16-vcvt.inc" #include "src/f16-f32-vcvt/f16-f32-vcvt.inc" #include "src/f16-qs8-vcvt/f16-qs8-vcvt.inc" diff --git a/build_srcs.bzl b/build_srcs.bzl index 83a7a7707b6..8357ee8ecf8 100644 --- a/build_srcs.bzl +++ b/build_srcs.bzl @@ -96,6 +96,7 @@ LOGGING_SRCS = [ MICROKERNEL_DEFS = [ "src/bf16-f32-vcvt/bf16-f32-vcvt.inc", "src/bf16-qs8-vcvt/bf16-qs8-vcvt.inc", + "src/bf16-qu8-vcvt/bf16-qu8-vcvt.inc", "src/bf16-rminmax/bf16-rmax.inc", "src/bf16-rminmax/bf16-rmin.inc", "src/bf16-rminmax/bf16-rminmax.inc", diff --git a/cmake/gen/scalar_microkernels.cmake b/cmake/gen/scalar_microkernels.cmake index df6667441bf..a03faa512a0 100644 --- a/cmake/gen/scalar_microkernels.cmake +++ b/cmake/gen/scalar_microkernels.cmake @@ -12,6 +12,7 @@ SET(PROD_SCALAR_MICROKERNEL_SRCS src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u2.c src/bf16-qs8-vcvt/gen/bf16-qs8-vcvt-scalar-imagic-u4.c + src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u4.c src/bf16-rminmax/gen/bf16-rmax-scalar-u2-acc2.c src/bf16-rminmax/gen/bf16-rmin-scalar-u2-acc2.c src/bf16-rminmax/gen/bf16-rminmax-scalar-u2-acc2.c @@ -259,6 +260,9 @@ SET(NON_PROD_SCALAR_MICROKERNEL_SRCS src/bf16-qs8-vcvt/gen/bf16-qs8-vcvt-scalar-imagic-u1.c src/bf16-qs8-vcvt/gen/bf16-qs8-vcvt-scalar-imagic-u2.c src/bf16-qs8-vcvt/gen/bf16-qs8-vcvt-scalar-imagic-u3.c + src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u1.c + src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u2.c + src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u3.c src/bf16-rminmax/gen/bf16-rmax-scalar-u1.c src/bf16-rminmax/gen/bf16-rmax-scalar-u3-acc3.c src/bf16-rminmax/gen/bf16-rmax-scalar-u4-acc2.c diff --git a/gen/scalar_microkernels.bzl b/gen/scalar_microkernels.bzl index 4ee3d07e1d2..dad64ec964e 100644 --- a/gen/scalar_microkernels.bzl +++ b/gen/scalar_microkernels.bzl @@ -8,6 +8,7 @@ PROD_SCALAR_MICROKERNEL_SRCS = [ "src/bf16-f32-vcvt/gen/bf16-f32-vcvt-scalar-u2.c", "src/bf16-qs8-vcvt/gen/bf16-qs8-vcvt-scalar-imagic-u4.c", + "src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u4.c", "src/bf16-rminmax/gen/bf16-rmax-scalar-u2-acc2.c", "src/bf16-rminmax/gen/bf16-rmin-scalar-u2-acc2.c", "src/bf16-rminmax/gen/bf16-rminmax-scalar-u2-acc2.c", @@ -256,6 +257,9 @@ NON_PROD_SCALAR_MICROKERNEL_SRCS = [ "src/bf16-qs8-vcvt/gen/bf16-qs8-vcvt-scalar-imagic-u1.c", "src/bf16-qs8-vcvt/gen/bf16-qs8-vcvt-scalar-imagic-u2.c", "src/bf16-qs8-vcvt/gen/bf16-qs8-vcvt-scalar-imagic-u3.c", + "src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u1.c", + "src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u2.c", + "src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u3.c", "src/bf16-rminmax/gen/bf16-rmax-scalar-u1.c", "src/bf16-rminmax/gen/bf16-rmax-scalar-u3-acc3.c", "src/bf16-rminmax/gen/bf16-rmax-scalar-u4-acc2.c", diff --git a/scripts/generate-f32-qs8-vcvt.sh b/scripts/generate-f32-qs8-vcvt.sh index 14b7cc67fa2..b491c16aa74 100755 --- a/scripts/generate-f32-qs8-vcvt.sh +++ b/scripts/generate-f32-qs8-vcvt.sh @@ -139,6 +139,11 @@ tools/xngen src/f32-qs8-vcvt/scalar-imagic.c.in -D BATCH_TILE=2 -D IDATATYPE=BF1 tools/xngen src/f32-qs8-vcvt/scalar-imagic.c.in -D BATCH_TILE=3 -D IDATATYPE=BF16 -D ODATATYPE=QS8 -D WASM=0 -o src/bf16-qs8-vcvt/gen/bf16-qs8-vcvt-scalar-imagic-u3.c & tools/xngen src/f32-qs8-vcvt/scalar-imagic.c.in -D BATCH_TILE=4 -D IDATATYPE=BF16 -D ODATATYPE=QS8 -D WASM=0 -o src/bf16-qs8-vcvt/gen/bf16-qs8-vcvt-scalar-imagic-u4.c & +tools/xngen src/f32-qs8-vcvt/scalar-imagic.c.in -D BATCH_TILE=1 -D IDATATYPE=BF16 -D ODATATYPE=QU8 -D WASM=0 -o src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u1.c & +tools/xngen src/f32-qs8-vcvt/scalar-imagic.c.in -D BATCH_TILE=2 -D IDATATYPE=BF16 -D ODATATYPE=QU8 -D WASM=0 -o src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u2.c & +tools/xngen src/f32-qs8-vcvt/scalar-imagic.c.in -D BATCH_TILE=3 -D IDATATYPE=BF16 -D ODATATYPE=QU8 -D WASM=0 -o src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u3.c & +tools/xngen src/f32-qs8-vcvt/scalar-imagic.c.in -D BATCH_TILE=4 -D IDATATYPE=BF16 -D ODATATYPE=QU8 -D WASM=0 -o src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u4.c & + tools/xngen src/f32-qs8-vcvt/scalar-imagic.c.in -D BATCH_TILE=1 -D IDATATYPE=F16 -D ODATATYPE=QU8 -D WASM=0 -o src/f16-qu8-vcvt/gen/f16-qu8-vcvt-scalar-imagic-u1.c & tools/xngen src/f32-qs8-vcvt/scalar-imagic.c.in -D BATCH_TILE=2 -D IDATATYPE=F16 -D ODATATYPE=QU8 -D WASM=0 -o src/f16-qu8-vcvt/gen/f16-qu8-vcvt-scalar-imagic-u2.c & tools/xngen src/f32-qs8-vcvt/scalar-imagic.c.in -D BATCH_TILE=3 -D IDATATYPE=F16 -D ODATATYPE=QU8 -D WASM=0 -o src/f16-qu8-vcvt/gen/f16-qu8-vcvt-scalar-imagic-u3.c & diff --git a/src/bf16-qu8-vcvt/bf16-qu8-vcvt.inc b/src/bf16-qu8-vcvt/bf16-qu8-vcvt.inc new file mode 100644 index 00000000000..6465185ca24 --- /dev/null +++ b/src/bf16-qu8-vcvt/bf16-qu8-vcvt.inc @@ -0,0 +1,21 @@ +// clang-format off +// Copyright 2025 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#ifndef XNN_QUANTIZED +#define XNN_QUANTIZED(T) T +#define XNN_DEFINED_QUANTIZED +#endif + +XNN_UKERNEL(xnn_arch_none, xnn_bf16_qu8_vcvt_ukernel__scalar_imagic_u1, 1, false, xnn_bfloat16, XNN_QUANTIZED(uint8_t), struct xnn_bf16_qu8_cvt_params, xnn_init_bf16_qu8_cvt_scalar_params) +XNN_UKERNEL(xnn_arch_none, xnn_bf16_qu8_vcvt_ukernel__scalar_imagic_u2, 2, false, xnn_bfloat16, XNN_QUANTIZED(uint8_t), struct xnn_bf16_qu8_cvt_params, xnn_init_bf16_qu8_cvt_scalar_params) +XNN_UKERNEL(xnn_arch_none, xnn_bf16_qu8_vcvt_ukernel__scalar_imagic_u3, 3, false, xnn_bfloat16, XNN_QUANTIZED(uint8_t), struct xnn_bf16_qu8_cvt_params, xnn_init_bf16_qu8_cvt_scalar_params) +XNN_UKERNEL(xnn_arch_none, xnn_bf16_qu8_vcvt_ukernel__scalar_imagic_u4, 4, false, xnn_bfloat16, XNN_QUANTIZED(uint8_t), struct xnn_bf16_qu8_cvt_params, xnn_init_bf16_qu8_cvt_scalar_params) + + +#ifdef XNN_DEFINED_QUANTIZED +#undef XNN_DEFINED_QUANTIZED +#undef XNN_QUANTIZED +#endif diff --git a/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u1.c b/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u1.c new file mode 100644 index 00000000000..c9a173bcbea --- /dev/null +++ b/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u1.c @@ -0,0 +1,57 @@ +// clang-format off +// Auto-generated file. Do not edit! +// Template: src/f32-qs8-vcvt/scalar-imagic.c.in +// Generator: tools/xngen +// +// Copyright 2021 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include + +#include +#include +#include + +#include "src/xnnpack/math.h" +#include "src/xnnpack/microparams.h" +#include "src/xnnpack/vcvt.h" + +void xnn_bf16_qu8_vcvt_ukernel__scalar_imagic_u1( + size_t batch, + const xnn_bfloat16* input, + uint8_t* output, + const struct xnn_bf16_qu8_cvt_params* restrict params) +{ + assert(batch != 0); + assert(batch % sizeof(xnn_bfloat16) == 0); + assert(input != NULL); + assert(output != NULL); + + const xnn_bfloat16* i = input; + // Don't let the scale be 0, which can happen for large scales, and should + // not happen because this value is a reciprocal. + const float vscale = math_max_f32(FLT_MIN, xnn_bfloat16_to_float(params->scalar.scale)); + const float vmagic_bias = 12582912.0f; + const float output_min_less_zero_point = (float) ((int32_t) 0 - (int32_t) params->scalar.output_zero_point); + const float output_max_less_zero_point = (float) ((int32_t) 255 - (int32_t) params->scalar.output_zero_point); + const int32_t vmagic_min = (int32_t) float_as_uint32(vmagic_bias + output_min_less_zero_point); + const int32_t vmagic_max = (int32_t) float_as_uint32(vmagic_bias + output_max_less_zero_point); + const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point; + + do { + float vx = xnn_bfloat16_to_float(*i++); + vx *= vscale; + vx += vmagic_bias; + + int32_t vy = (int32_t) float_as_uint32(vx); + vy = math_max_s32(vy, vmagic_min); + vy = math_min_s32(vy, vmagic_max); + vy -= vmagic_bias_less_zero_point; + + *output++ = (uint8_t) vy; + + batch -= sizeof(xnn_bfloat16); + } while (batch != 0); +} diff --git a/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u2.c b/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u2.c new file mode 100644 index 00000000000..53e0642f97d --- /dev/null +++ b/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u2.c @@ -0,0 +1,83 @@ +// clang-format off +// Auto-generated file. Do not edit! +// Template: src/f32-qs8-vcvt/scalar-imagic.c.in +// Generator: tools/xngen +// +// Copyright 2021 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include + +#include +#include +#include + +#include "src/xnnpack/common.h" +#include "src/xnnpack/math.h" +#include "src/xnnpack/microparams.h" +#include "src/xnnpack/vcvt.h" + +void xnn_bf16_qu8_vcvt_ukernel__scalar_imagic_u2( + size_t batch, + const xnn_bfloat16* input, + uint8_t* output, + const struct xnn_bf16_qu8_cvt_params* restrict params) +{ + assert(batch != 0); + assert(batch % sizeof(xnn_bfloat16) == 0); + assert(input != NULL); + assert(output != NULL); + + const xnn_bfloat16* i = input; + // Don't let the scale be 0, which can happen for large scales, and should + // not happen because this value is a reciprocal. + const float vscale = math_max_f32(FLT_MIN, xnn_bfloat16_to_float(params->scalar.scale)); + const float vmagic_bias = 12582912.0f; + const float output_min_less_zero_point = (float) ((int32_t) 0 - (int32_t) params->scalar.output_zero_point); + const float output_max_less_zero_point = (float) ((int32_t) 255 - (int32_t) params->scalar.output_zero_point); + const int32_t vmagic_min = (int32_t) float_as_uint32(vmagic_bias + output_min_less_zero_point); + const int32_t vmagic_max = (int32_t) float_as_uint32(vmagic_bias + output_max_less_zero_point); + const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point; + + for (; batch >= 2 * sizeof(xnn_bfloat16); batch -= 2 * sizeof(xnn_bfloat16)) { + float vx0 = xnn_bfloat16_to_float(i[0]); + float vx1 = xnn_bfloat16_to_float(i[1]); + i += 2; + + vx0 *= vscale; + vx1 *= vscale; + + vx0 += vmagic_bias; + vx1 += vmagic_bias; + + int32_t vy0 = (int32_t) float_as_uint32(vx0); + int32_t vy1 = (int32_t) float_as_uint32(vx1); + + vy0 = math_max_s32(vy0, vmagic_min); + vy1 = math_max_s32(vy1, vmagic_min); + + vy0 = math_min_s32(vy0, vmagic_max); + vy1 = math_min_s32(vy1, vmagic_max); + + vy0 -= vmagic_bias_less_zero_point; + vy1 -= vmagic_bias_less_zero_point; + + output[0] = (uint8_t) vy0; + output[1] = (uint8_t) vy1; + output += 2; + } + if XNN_UNLIKELY(batch != 0) { + float vx = xnn_bfloat16_to_float(*i); + vx *= vscale; + vx += vmagic_bias; + + int32_t vy = (int32_t) float_as_uint32(vx); + vy = math_max_s32(vy, vmagic_min); + vy = math_min_s32(vy, vmagic_max); + vy -= vmagic_bias_less_zero_point; + + *output = (uint8_t) vy; + } +} diff --git a/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u3.c b/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u3.c new file mode 100644 index 00000000000..1d043fc65b2 --- /dev/null +++ b/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u3.c @@ -0,0 +1,95 @@ +// clang-format off +// Auto-generated file. Do not edit! +// Template: src/f32-qs8-vcvt/scalar-imagic.c.in +// Generator: tools/xngen +// +// Copyright 2021 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include + +#include +#include +#include + +#include "src/xnnpack/common.h" +#include "src/xnnpack/math.h" +#include "src/xnnpack/microparams.h" +#include "src/xnnpack/vcvt.h" + +void xnn_bf16_qu8_vcvt_ukernel__scalar_imagic_u3( + size_t batch, + const xnn_bfloat16* input, + uint8_t* output, + const struct xnn_bf16_qu8_cvt_params* restrict params) +{ + assert(batch != 0); + assert(batch % sizeof(xnn_bfloat16) == 0); + assert(input != NULL); + assert(output != NULL); + + const xnn_bfloat16* i = input; + // Don't let the scale be 0, which can happen for large scales, and should + // not happen because this value is a reciprocal. + const float vscale = math_max_f32(FLT_MIN, xnn_bfloat16_to_float(params->scalar.scale)); + const float vmagic_bias = 12582912.0f; + const float output_min_less_zero_point = (float) ((int32_t) 0 - (int32_t) params->scalar.output_zero_point); + const float output_max_less_zero_point = (float) ((int32_t) 255 - (int32_t) params->scalar.output_zero_point); + const int32_t vmagic_min = (int32_t) float_as_uint32(vmagic_bias + output_min_less_zero_point); + const int32_t vmagic_max = (int32_t) float_as_uint32(vmagic_bias + output_max_less_zero_point); + const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point; + + for (; batch >= 3 * sizeof(xnn_bfloat16); batch -= 3 * sizeof(xnn_bfloat16)) { + float vx0 = xnn_bfloat16_to_float(i[0]); + float vx1 = xnn_bfloat16_to_float(i[1]); + float vx2 = xnn_bfloat16_to_float(i[2]); + i += 3; + + vx0 *= vscale; + vx1 *= vscale; + vx2 *= vscale; + + vx0 += vmagic_bias; + vx1 += vmagic_bias; + vx2 += vmagic_bias; + + int32_t vy0 = (int32_t) float_as_uint32(vx0); + int32_t vy1 = (int32_t) float_as_uint32(vx1); + int32_t vy2 = (int32_t) float_as_uint32(vx2); + + vy0 = math_max_s32(vy0, vmagic_min); + vy1 = math_max_s32(vy1, vmagic_min); + vy2 = math_max_s32(vy2, vmagic_min); + + vy0 = math_min_s32(vy0, vmagic_max); + vy1 = math_min_s32(vy1, vmagic_max); + vy2 = math_min_s32(vy2, vmagic_max); + + vy0 -= vmagic_bias_less_zero_point; + vy1 -= vmagic_bias_less_zero_point; + vy2 -= vmagic_bias_less_zero_point; + + output[0] = (uint8_t) vy0; + output[1] = (uint8_t) vy1; + output[2] = (uint8_t) vy2; + output += 3; + } + if XNN_UNLIKELY(batch != 0) { + do { + float vx = xnn_bfloat16_to_float(*i++); + vx *= vscale; + vx += vmagic_bias; + + int32_t vy = (int32_t) float_as_uint32(vx); + vy = math_max_s32(vy, vmagic_min); + vy = math_min_s32(vy, vmagic_max); + vy -= vmagic_bias_less_zero_point; + + *output++ = (uint8_t) vy; + + batch -= sizeof(xnn_bfloat16); + } while (batch != 0); + } +} diff --git a/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u4.c b/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u4.c new file mode 100644 index 00000000000..4ef3764e5d7 --- /dev/null +++ b/src/bf16-qu8-vcvt/gen/bf16-qu8-vcvt-scalar-imagic-u4.c @@ -0,0 +1,103 @@ +// clang-format off +// Auto-generated file. Do not edit! +// Template: src/f32-qs8-vcvt/scalar-imagic.c.in +// Generator: tools/xngen +// +// Copyright 2021 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include + +#include +#include +#include + +#include "src/xnnpack/common.h" +#include "src/xnnpack/math.h" +#include "src/xnnpack/microparams.h" +#include "src/xnnpack/vcvt.h" + +void xnn_bf16_qu8_vcvt_ukernel__scalar_imagic_u4( + size_t batch, + const xnn_bfloat16* input, + uint8_t* output, + const struct xnn_bf16_qu8_cvt_params* restrict params) +{ + assert(batch != 0); + assert(batch % sizeof(xnn_bfloat16) == 0); + assert(input != NULL); + assert(output != NULL); + + const xnn_bfloat16* i = input; + // Don't let the scale be 0, which can happen for large scales, and should + // not happen because this value is a reciprocal. + const float vscale = math_max_f32(FLT_MIN, xnn_bfloat16_to_float(params->scalar.scale)); + const float vmagic_bias = 12582912.0f; + const float output_min_less_zero_point = (float) ((int32_t) 0 - (int32_t) params->scalar.output_zero_point); + const float output_max_less_zero_point = (float) ((int32_t) 255 - (int32_t) params->scalar.output_zero_point); + const int32_t vmagic_min = (int32_t) float_as_uint32(vmagic_bias + output_min_less_zero_point); + const int32_t vmagic_max = (int32_t) float_as_uint32(vmagic_bias + output_max_less_zero_point); + const int32_t vmagic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) params->scalar.output_zero_point; + + for (; batch >= 4 * sizeof(xnn_bfloat16); batch -= 4 * sizeof(xnn_bfloat16)) { + float vx0 = xnn_bfloat16_to_float(i[0]); + float vx1 = xnn_bfloat16_to_float(i[1]); + float vx2 = xnn_bfloat16_to_float(i[2]); + float vx3 = xnn_bfloat16_to_float(i[3]); + i += 4; + + vx0 *= vscale; + vx1 *= vscale; + vx2 *= vscale; + vx3 *= vscale; + + vx0 += vmagic_bias; + vx1 += vmagic_bias; + vx2 += vmagic_bias; + vx3 += vmagic_bias; + + int32_t vy0 = (int32_t) float_as_uint32(vx0); + int32_t vy1 = (int32_t) float_as_uint32(vx1); + int32_t vy2 = (int32_t) float_as_uint32(vx2); + int32_t vy3 = (int32_t) float_as_uint32(vx3); + + vy0 = math_max_s32(vy0, vmagic_min); + vy1 = math_max_s32(vy1, vmagic_min); + vy2 = math_max_s32(vy2, vmagic_min); + vy3 = math_max_s32(vy3, vmagic_min); + + vy0 = math_min_s32(vy0, vmagic_max); + vy1 = math_min_s32(vy1, vmagic_max); + vy2 = math_min_s32(vy2, vmagic_max); + vy3 = math_min_s32(vy3, vmagic_max); + + vy0 -= vmagic_bias_less_zero_point; + vy1 -= vmagic_bias_less_zero_point; + vy2 -= vmagic_bias_less_zero_point; + vy3 -= vmagic_bias_less_zero_point; + + output[0] = (uint8_t) vy0; + output[1] = (uint8_t) vy1; + output[2] = (uint8_t) vy2; + output[3] = (uint8_t) vy3; + output += 4; + } + if XNN_UNLIKELY(batch != 0) { + do { + float vx = xnn_bfloat16_to_float(*i++); + vx *= vscale; + vx += vmagic_bias; + + int32_t vy = (int32_t) float_as_uint32(vx); + vy = math_max_s32(vy, vmagic_min); + vy = math_min_s32(vy, vmagic_max); + vy -= vmagic_bias_less_zero_point; + + *output++ = (uint8_t) vy; + + batch -= sizeof(xnn_bfloat16); + } while (batch != 0); + } +} diff --git a/src/configs/unary-elementwise-config.c b/src/configs/unary-elementwise-config.c index 49428d0dea1..779fcbca66d 100644 --- a/src/configs/unary-elementwise-config.c +++ b/src/configs/unary-elementwise-config.c @@ -25,6 +25,7 @@ static const int consistent_config = 1; static struct xnn_unary_elementwise_config bf16_to_f32_cvt_config = {0}; static struct xnn_unary_elementwise_config bf16_to_qs8_cvt_config = {0}; +static struct xnn_unary_elementwise_config bf16_to_qu8_cvt_config = {0}; static struct xnn_unary_elementwise_config f16_abs_config = {0}; static struct xnn_unary_elementwise_config f16_approxgelu_config = {0}; static struct xnn_unary_elementwise_config f16_clamp_config = {0}; @@ -89,6 +90,7 @@ static struct xnn_unary_elementwise_config xx_copy_config = {0}; XNN_INIT_ONCE_GUARD(bf16_to_f32_cvt); XNN_INIT_ONCE_GUARD(bf16_to_qs8_cvt); +XNN_INIT_ONCE_GUARD(bf16_to_qu8_cvt); XNN_INIT_ONCE_GUARD(f16_abs); XNN_INIT_ONCE_GUARD(f16_approxgelu); XNN_INIT_ONCE_GUARD(f16_clamp); @@ -992,6 +994,12 @@ static void init_bf16_to_qs8_cvt_config(void) { bf16_to_qs8_cvt_config.init = (xnn_init_unary_uparams_fn) xnn_init_bf16_qs8_cvt_scalar_params; } +static void init_bf16_to_qu8_cvt_config(void) { + bf16_to_qu8_cvt_config.ukernel = XNN_INIT_UNARY_UKERNEL(xnn_bf16_qu8_vcvt_ukernel__scalar_imagic_u4); + bf16_to_qu8_cvt_config.element_tile = 4; + bf16_to_qu8_cvt_config.init = (xnn_init_unary_uparams_fn) xnn_init_bf16_qu8_cvt_scalar_params; +} + static void init_f16_to_f32_cvt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); @@ -3998,6 +4006,15 @@ const struct xnn_unary_elementwise_config* xnn_init_bf16_to_qs8_cvt_config() { return &bf16_to_qs8_cvt_config; } +const struct xnn_unary_elementwise_config* xnn_init_bf16_to_qu8_cvt_config() { + const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); + if (hardware_config == NULL) { + return NULL; + } + XNN_INIT_ONCE(bf16_to_qu8_cvt); + return &bf16_to_qu8_cvt_config; +} + const struct xnn_unary_elementwise_config* xnn_init_f16_to_f32_cvt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { diff --git a/src/microparams-init.c b/src/microparams-init.c index 8def6ba38db..4ae71cd712f 100644 --- a/src/microparams-init.c +++ b/src/microparams-init.c @@ -1128,6 +1128,15 @@ size_t xnn_init_bf16_qs8_cvt_scalar_params( return sizeof(params->bf16_qs8_cvt); } +size_t xnn_init_bf16_qu8_cvt_scalar_params( + union xnn_unary_uparams* params, + const union xnn_unary_params* op_params, + const struct xnn_quantization_params* input_quantization, + const struct xnn_quantization_params* output_quantization) +{ + return xnn_init_bf16_qs8_cvt_scalar_params(params, op_params, input_quantization, output_quantization); +} + size_t xnn_init_f16_qs8_cvt_scalar_params( union xnn_unary_uparams* params, const union xnn_unary_params* op_params, diff --git a/src/operator-run.c b/src/operator-run.c index c5336461026..6af0a616818 100644 --- a/src/operator-run.c +++ b/src/operator-run.c @@ -1580,11 +1580,67 @@ void xnn_compute_pad_qd8_params( } } +typedef struct xnn_qd8_quantization_params(bf16_quantization_params_fn)( + xnn_bfloat16 min, xnn_bfloat16 max, xnn_bfloat16* bf16_scale); typedef struct xnn_qd8_quantization_params(f16_quantization_params_fn)( xnn_float16 min, xnn_float16 max, xnn_float16* f32_scale); typedef struct xnn_qd8_quantization_params(f32_quantization_params_fn)( float min, float max, float* f32_scale); +XNN_NO_SANITIZE_FUNCTION void xnn_compute_bf16_qx8_convert( + struct bf16_qd8_convert_context* restrict context, + bf16_quantization_params_fn quantization_params_function, + size_t batch_index) { + const size_t x_stride = context->x_stride; + const size_t y_stride = context->y_stride; + const size_t n = context->n; + const void* input = + (const void*)((uintptr_t)context->x + x_stride * batch_index); + void* output = (void*)((uintptr_t)context->y + y_stride * batch_index); + + xnn_bfloat16 minmax[2] = {xnn_bfloat16_from_bits(UINT16_C(0x7F80)), + xnn_bfloat16_from_bits(UINT16_C(0xFF80))}; + context->rminmax_ukernel(n, input, minmax, &context->params); + xnn_bfloat16 bf16_scale; + context->quantization_params[batch_index] = + quantization_params_function(minmax[0], minmax[1], &bf16_scale); + + struct xnn_bf16_qs8_cvt_params params; + params.scalar.scale = bf16_scale; + params.scalar.output_zero_point = + context->quantization_params[batch_index].zero_point; + context->convert_ukernel(n, input, output, (union xnn_unary_uparams*)¶ms); + + if (context->rsum_ukernel) { + // Compute and store the row sum of the quantized output. + const size_t num_bytes = n / sizeof(xnn_bfloat16) * sizeof(int8_t); + int32_t row_sum = 0; + struct xnn_qs8_rsum_params rsum_params = {0,}; + context->rsum_ukernel(num_bytes, output, &row_sum, &rsum_params); + context->row_sum[batch_index] = (float)row_sum; + } +} + +void xnn_compute_bf16_qd8_convert( + struct bf16_qd8_convert_context* restrict context, size_t batch_offset, + size_t batch_range) { + for (size_t batch_index = batch_offset; + batch_index < batch_offset + batch_range; batch_index++) { + xnn_compute_bf16_qx8_convert( + context, xnn_bf16_qd8_asymmetric_quantization_params, batch_index); + } +} + +void xnn_compute_bf16_qdu8_convert( + struct bf16_qd8_convert_context* restrict context, size_t batch_offset, + size_t batch_range) { + for (size_t batch_index = batch_offset; + batch_index < batch_offset + batch_range; batch_index++) { + xnn_compute_bf16_qx8_convert( + context, xnn_bf16_qdu8_asymmetric_quantization_params, batch_index); + } +} + XNN_NO_SANITIZE_FUNCTION void xnn_compute_f16_qx8_convert( struct f16_qd8_convert_context* restrict context, f16_quantization_params_fn quantization_params_function, diff --git a/src/operators/unary-elementwise-nc.c b/src/operators/unary-elementwise-nc.c index 2bc553d94b9..7b2a3b21922 100644 --- a/src/operators/unary-elementwise-nc.c +++ b/src/operators/unary-elementwise-nc.c @@ -841,6 +841,24 @@ enum xnn_status create_convert_nc_qx8( return status; } +enum xnn_status xnn_create_convert_nc_bf16_qd8( + uint32_t flags, + xnn_operator_t* convert_op_out) { + return create_convert_nc_qx8( + flags, xnn_init_bf16_to_qs8_cvt_config(), xnn_init_bf16_rminmax_config(), + xnn_init_qs8_rsum_config(), xnn_operator_type_convert_nc_bf16_qd8, + convert_op_out); +} + +enum xnn_status xnn_create_convert_nc_bf16_qdu8( + uint32_t flags, + xnn_operator_t* convert_op_out) { + return create_convert_nc_qx8( + flags, xnn_init_bf16_to_qu8_cvt_config(), xnn_init_bf16_rminmax_config(), + xnn_init_qu8_rsum_config(), xnn_operator_type_convert_nc_bf16_qdu8, + convert_op_out); +} + enum xnn_status xnn_create_convert_nc_f16_qd8( uint32_t flags, xnn_operator_t* convert_op_out) { @@ -940,6 +958,73 @@ enum xnn_status xnn_create_copy_nc_x32( xnn_operator_type_copy_nc_x32, copy_op_out); } +enum xnn_status reshape_convert_nc_bf16_qx8( + xnn_operator_t convert_op, + size_t batch_size, + size_t channels, + size_t input_stride, + size_t output_stride, + enum xnn_operator_type expected_type, + pthreadpool_t threadpool) +{ + if (convert_op->type != expected_type) { + xnn_log_error( + "failed to setup operator: operator type mismatch (expected %s, got " + "%s)", + xnn_operator_type_to_string(expected_type), + xnn_operator_type_to_string_v2(convert_op)); + return xnn_status_invalid_parameter; + } + convert_op->state = xnn_run_state_invalid; + + if (batch_size == 0) { + convert_op->state = xnn_run_state_skip; + return xnn_status_success; + } + + convert_op->batch_size = batch_size; + + convert_op->context.bf16_qd8_convert = (struct bf16_qd8_convert_context) { + .n = channels * sizeof(uint16_t), + .x_stride = input_stride * sizeof(uint16_t), + .y_stride = output_stride, + .batch_size = batch_size, + .rminmax_ukernel = convert_op->reduce_config->ukernel, + .convert_ukernel = convert_op->unary_elementwise_config->ukernel, + .init_params = convert_op->unary_elementwise_config->init, + }; + + if (convert_op->flags & XNN_NODE_FLAG_REQUIRES_ROW_SUM) { + convert_op->context.bf16_qd8_convert.rsum_ukernel = convert_op->reduce_config2->ukernel; + } + memcpy(&convert_op->context.bf16_qd8_convert.params, &convert_op->params.bf16_default, sizeof(convert_op->params.bf16_default)); + + convert_op->compute[0].type = xnn_parallelization_type_1d_tile_1d_dynamic; + switch (expected_type) { + case xnn_operator_type_convert_nc_bf16_qd8: + convert_op->compute[0].task_1d_tile_1d_dynamic = + (pthreadpool_task_1d_tile_1d_dynamic_t)xnn_compute_bf16_qd8_convert; + break; + case xnn_operator_type_convert_nc_bf16_qdu8: + convert_op->compute[0].task_1d_tile_1d_dynamic = + (pthreadpool_task_1d_tile_1d_dynamic_t)xnn_compute_bf16_qdu8_convert; + break; + default: + XNN_UNREACHABLE; + } + convert_op->compute[0].range[0] = batch_size; + convert_op->compute[0].tile[0] = divide_round_up( + get_tile_size(convert_op), convert_op->context.bf16_qd8_convert.n); + + convert_op->compute[1].type = xnn_parallelization_type_1d; + convert_op->compute[1].task_1d = (pthreadpool_task_1d_t) xnn_compute_pad_qd8_params; + convert_op->compute[1].range[0] = 1; + + convert_op->state = xnn_run_state_needs_setup; + + return xnn_status_success; +} + enum xnn_status reshape_convert_nc_f16_qx8( xnn_operator_t convert_op, size_t batch_size, @@ -1075,6 +1160,18 @@ enum xnn_status reshape_convert_nc_f32_qx8( return xnn_status_success; } +enum xnn_status xnn_reshape_convert_nc_bf16_qd8( + xnn_operator_t convert_op, size_t batch_size, size_t channels, + size_t input_stride, size_t output_stride, pthreadpool_t threadpool) { + return reshape_convert_nc_bf16_qx8(convert_op, batch_size, channels, input_stride, output_stride, xnn_operator_type_convert_nc_bf16_qd8, threadpool); +} + +enum xnn_status xnn_reshape_convert_nc_bf16_qdu8( + xnn_operator_t convert_op, size_t batch_size, size_t channels, + size_t input_stride, size_t output_stride, pthreadpool_t threadpool) { + return reshape_convert_nc_bf16_qx8(convert_op, batch_size, channels, input_stride, output_stride, xnn_operator_type_convert_nc_bf16_qdu8, threadpool); +} + enum xnn_status xnn_reshape_convert_nc_f16_qd8( xnn_operator_t convert_op, size_t batch_size, @@ -1251,6 +1348,47 @@ enum xnn_status xnn_reshape_copy_nc_x32( threadpool); } +enum xnn_status setup_convert_nc_bf16_qx8( + xnn_operator_t convert_op, + const void* input, + void* output, + enum xnn_operator_type expected_operator_type, + void* row_sum, + struct xnn_quantization_params* quantization_params) +{ + if (convert_op->type != expected_operator_type) { + xnn_log_error( + "failed to setup operator: operator type mismatch (expected %s, got " + "%s)", + xnn_operator_type_to_string(expected_operator_type), + xnn_operator_type_to_string_v2(convert_op)); + return xnn_status_invalid_parameter; + } + + switch (convert_op->state) { + case xnn_run_state_skip: + return xnn_status_success; + case xnn_run_state_invalid: + xnn_log_error( + "failed to setup %s operator: operator has not been reshaped yet", + xnn_operator_type_to_string_v2(convert_op)); + return xnn_status_invalid_state; + case xnn_run_state_needs_setup: + // Operator has been reshaped, but not setup, continue with setup. + case xnn_run_state_ready: + // Operator has been reshaped, and we are setting up with different pointers. + break; + } + + convert_op->context.bf16_qd8_convert.x = input; + convert_op->context.bf16_qd8_convert.y = output; + convert_op->context.bf16_qd8_convert.quantization_params = (struct xnn_qd8_quantization_params*) quantization_params; + convert_op->context.bf16_qd8_convert.row_sum = row_sum; + convert_op->state = xnn_run_state_ready; + + return xnn_status_success; +} + enum xnn_status setup_convert_nc_f16_qx8( xnn_operator_t convert_op, const void* input, @@ -1355,6 +1493,26 @@ enum xnn_status xnn_setup_convert_nc_f16_qdu8( return setup_convert_nc_f16_qx8(convert_op, input, output, xnn_operator_type_convert_nc_f16_qdu8, row_sum, quantization_params); } +enum xnn_status xnn_setup_convert_nc_bf16_qd8( + xnn_operator_t convert_op, + const void* input, + int8_t* output, + float* row_sum, + struct xnn_quantization_params* quantization_params) +{ + return setup_convert_nc_bf16_qx8(convert_op, input, output, xnn_operator_type_convert_nc_bf16_qd8, row_sum, quantization_params); +} + +enum xnn_status xnn_setup_convert_nc_bf16_qdu8( + xnn_operator_t convert_op, + const void* input, + uint8_t* output, + float* row_sum, + struct xnn_quantization_params* quantization_params) +{ + return setup_convert_nc_bf16_qx8(convert_op, input, output, xnn_operator_type_convert_nc_bf16_qdu8, row_sum, quantization_params); +} + enum xnn_status xnn_setup_convert_nc_f32_qd8( xnn_operator_t convert_op, const float* input, diff --git a/src/xnnpack/compute.h b/src/xnnpack/compute.h index e9e553d1b80..be8450f4a06 100644 --- a/src/xnnpack/compute.h +++ b/src/xnnpack/compute.h @@ -1130,6 +1130,24 @@ XNN_PRIVATE void xnn_compute_slice_4d(struct slice_context* context, size_t i, XNN_PRIVATE void xnn_compute_slice_5d(struct slice_context* context, size_t i, size_t j, size_t k, size_t l, size_t m); +struct bf16_qd8_convert_context { + size_t n; + const void* x; + size_t x_stride; + int8_t* y; + size_t y_stride; + size_t batch_size; + struct xnn_qd8_quantization_params* quantization_params; + float* row_sum; + xnn_reduce_ukernel_fn rminmax_ukernel; + xnn_reduce_ukernel_fn rsum_ukernel; + xnn_vunary_ukernel_fn convert_ukernel; + xnn_init_unary_uparams_fn init_params; + union { + struct xnn_bf16_default_params bf16_default; + } params; +}; + struct f16_qd8_convert_context { size_t n; const void* x; @@ -1166,6 +1184,14 @@ struct f32_qd8_convert_context { } params; }; +XNN_PRIVATE void xnn_compute_bf16_qd8_convert( + struct bf16_qd8_convert_context* context, size_t batch_offset, + size_t batch_range); + +XNN_PRIVATE void xnn_compute_bf16_qdu8_convert( + struct bf16_qd8_convert_context* context, size_t batch_offset, + size_t batch_range); + XNN_PRIVATE void xnn_compute_f16_qd8_convert( struct f16_qd8_convert_context* context, size_t batch_offset, size_t batch_range); diff --git a/src/xnnpack/config.h b/src/xnnpack/config.h index c4549227c3c..b4f9e030213 100644 --- a/src/xnnpack/config.h +++ b/src/xnnpack/config.h @@ -133,6 +133,8 @@ xnn_init_bf16_to_f32_cvt_config(); XNN_INTERNAL const struct xnn_unary_elementwise_config* xnn_init_bf16_to_qs8_cvt_config(); XNN_INTERNAL const struct xnn_unary_elementwise_config* +xnn_init_bf16_to_qu8_cvt_config(); +XNN_INTERNAL const struct xnn_unary_elementwise_config* xnn_init_f16_to_f32_cvt_config(); XNN_INTERNAL const struct xnn_unary_elementwise_config* xnn_init_f16_to_qs8_cvt_config(); diff --git a/src/xnnpack/internal.h b/src/xnnpack/internal.h index 7f9326c547c..ee93855e532 100644 --- a/src/xnnpack/internal.h +++ b/src/xnnpack/internal.h @@ -381,6 +381,32 @@ enum xnn_status xnn_create_convolution2d_nhwc_pf32( float output_min, float output_max, uint32_t flags, xnn_weights_cache_t weights_cache, xnn_operator_t* convolution_op_out); +// quantization_params must be padded with at least +// XNN_EXTRA_QUANTIZATION_PARAMS entries. +enum xnn_status xnn_setup_convert_nc_bf16_qd8( + xnn_operator_t convert_op, const void* input, int8_t* output, + float* row_sum, struct xnn_quantization_params* quantization_params); + +enum xnn_status xnn_create_convert_nc_bf16_qd8(uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_bf16_qd8( + xnn_operator_t convert_op, size_t batch_size, size_t channels, + size_t input_stride, size_t output_stride, pthreadpool_t threadpool); + +// quantization_params must be padded with at least +// XNN_EXTRA_QUANTIZATION_PARAMS entries. +enum xnn_status xnn_setup_convert_nc_bf16_qdu8( + xnn_operator_t convert_op, const void* input, uint8_t* output, + float* row_sum, struct xnn_quantization_params* quantization_params); + +enum xnn_status xnn_create_convert_nc_bf16_qdu8(uint32_t flags, + xnn_operator_t* convert_op_out); + +enum xnn_status xnn_reshape_convert_nc_bf16_qdu8( + xnn_operator_t convert_op, size_t batch_size, size_t channels, + size_t input_stride, size_t output_stride, pthreadpool_t threadpool); + // quantization_params must be padded with at least // XNN_EXTRA_QUANTIZATION_PARAMS entries. enum xnn_status xnn_setup_convert_nc_f16_qdu8( diff --git a/src/xnnpack/microparams-init.h b/src/xnnpack/microparams-init.h index de6f4d04109..b0d46636341 100644 --- a/src/xnnpack/microparams-init.h +++ b/src/xnnpack/microparams-init.h @@ -193,6 +193,7 @@ DECLARE_INIT_UNARY_MICROPARAMS_FUNCTION(xnn_init_qs8_clamp_scalar_params); DECLARE_INIT_UNARY_MICROPARAMS_FUNCTION(xnn_init_qu8_clamp_scalar_params); DECLARE_INIT_UNARY_MICROPARAMS_FUNCTION(xnn_init_bf16_qs8_cvt_scalar_params); +DECLARE_INIT_UNARY_MICROPARAMS_FUNCTION(xnn_init_bf16_qu8_cvt_scalar_params); DECLARE_INIT_UNARY_MICROPARAMS_FUNCTION(xnn_init_f16_qs8_cvt_scalar_params); DECLARE_INIT_UNARY_MICROPARAMS_FUNCTION(xnn_init_f16_qu8_cvt_scalar_params); DECLARE_INIT_UNARY_MICROPARAMS_FUNCTION(xnn_init_f32_qs8_cvt_scalar_params); diff --git a/src/xnnpack/microparams.h b/src/xnnpack/microparams.h index e6b4ebbeb01..e7312f3cb8b 100644 --- a/src/xnnpack/microparams.h +++ b/src/xnnpack/microparams.h @@ -455,6 +455,13 @@ struct xnn_bf16_qs8_cvt_params { } scalar; }; +struct xnn_bf16_qu8_cvt_params { + struct { + xnn_bfloat16 scale; + int16_t output_zero_point; + } scalar; +}; + struct xnn_f16_qs8_cvt_params { struct { xnn_float16 scale; @@ -609,6 +616,7 @@ struct xnn_unary_reference_params { union xnn_unary_uparams { struct xnn_bf16_qs8_cvt_params bf16_qs8_cvt; + struct xnn_bf16_qu8_cvt_params bf16_qu8_cvt; struct xnn_f32_qs8_cvt_params f32_qs8_cvt; struct xnn_f32_qu8_cvt_params f32_qu8_cvt; struct xnn_f16_qs8_cvt_params f16_qs8_cvt; diff --git a/src/xnnpack/operator-type-defs.inc b/src/xnnpack/operator-type-defs.inc index 54d85be0808..be44dbed203 100644 --- a/src/xnnpack/operator-type-defs.inc +++ b/src/xnnpack/operator-type-defs.inc @@ -38,6 +38,8 @@ XNN_ENUM_ITEM(xnn_operator_type_binary_elementwise, "Binary Elementwise (ND)") XNN_ENUM_ITEM(xnn_operator_type_constant_pad_nd_x8, "Constant Pad (ND, X8)") XNN_ENUM_ITEM(xnn_operator_type_constant_pad_nd_x16, "Constant Pad (ND, X16)") XNN_ENUM_ITEM(xnn_operator_type_constant_pad_nd_x32, "Constant Pad (ND, X32)") +XNN_ENUM_ITEM(xnn_operator_type_convert_nc_bf16_qd8, "Convert (NC, BF16, QD8)") +XNN_ENUM_ITEM(xnn_operator_type_convert_nc_bf16_qdu8, "Convert (NC, BF16, QDU8)") XNN_ENUM_ITEM(xnn_operator_type_convert_nc_f16_qd8, "Convert (NC, F16, QD8)") XNN_ENUM_ITEM(xnn_operator_type_convert_nc_f16_qdu8, "Convert (NC, F16, QDU8)") XNN_ENUM_ITEM(xnn_operator_type_convert_nc_f32_qd8, "Convert (NC, F32, QD8)") diff --git a/src/xnnpack/operator.h b/src/xnnpack/operator.h index 066871a95d9..5945fb3d0e7 100644 --- a/src/xnnpack/operator.h +++ b/src/xnnpack/operator.h @@ -194,6 +194,7 @@ struct xnn_convolution_operator { union xnn_params { union xnn_binary_uparams binary; + struct xnn_bf16_default_params bf16_default; struct xnn_f16_default_params f16_default; struct xnn_f32_default_params f32_default; struct xnn_f16_minmax_params f16_minmax; @@ -361,6 +362,7 @@ struct xnn_operator { struct transpose_context transpose; struct floating_point_softmax_context floating_point_softmax; struct u8_softmax_context u8_softmax; + struct bf16_qd8_convert_context bf16_qd8_convert; struct f16_qd8_convert_context f16_qd8_convert; struct f32_qd8_convert_context f32_qd8_convert; struct f32_qp8_convert_context f32_qp8_convert; diff --git a/src/xnnpack/quantization.h b/src/xnnpack/quantization.h index 77ce00824d5..9ce9b9751bb 100644 --- a/src/xnnpack/quantization.h +++ b/src/xnnpack/quantization.h @@ -90,4 +90,26 @@ xnn_f16_qd8_asymmetric_quantization_params(xnn_float16 min, xnn_float16 max, return params; } +static inline struct xnn_qd8_quantization_params +xnn_bf16_qd8_asymmetric_quantization_params(xnn_bfloat16 min, xnn_bfloat16 max, + xnn_bfloat16* bf16_scale) { + struct xnn_qd8_quantization_params params = + xnn_qd8_asymmetric_quantization_params(xnn_bfloat16_to_float(min), + xnn_bfloat16_to_float(max)); + *bf16_scale = xnn_bfloat16_from_float(params.inv_scale); + params.inv_scale = 1.f / params.inv_scale; + return params; +} + +static inline struct xnn_qd8_quantization_params +xnn_bf16_qdu8_asymmetric_quantization_params(xnn_bfloat16 min, xnn_bfloat16 max, + xnn_bfloat16* bf16_scale) { + struct xnn_qd8_quantization_params params = + xnn_qdu8_asymmetric_quantization_params(xnn_bfloat16_to_float(min), + xnn_bfloat16_to_float(max)); + *bf16_scale = xnn_bfloat16_from_float(params.inv_scale); + params.inv_scale = 1.f / params.inv_scale; + return params; +} + #endif // XNNPACK_SRC_XNNPACK_QUANTIZATION_H_ diff --git a/src/xnnpack/vcvt.h b/src/xnnpack/vcvt.h index a8e98fc370e..bdfdbfc3287 100644 --- a/src/xnnpack/vcvt.h +++ b/src/xnnpack/vcvt.h @@ -23,6 +23,7 @@ extern "C" { const params_type* params); #include "src/bf16-f32-vcvt/bf16-f32-vcvt.inc" #include "src/bf16-qs8-vcvt/bf16-qs8-vcvt.inc" +#include "src/bf16-qu8-vcvt/bf16-qu8-vcvt.inc" #include "src/f16-f32-vcvt/f16-f32-vcvt.inc" #include "src/f16-qs8-vcvt/f16-qs8-vcvt.inc" #include "src/f16-qu8-vcvt/f16-qu8-vcvt.inc" diff --git a/test/BUILD.bazel b/test/BUILD.bazel index 0d7965d1316..7b4aa241418 100644 --- a/test/BUILD.bazel +++ b/test/BUILD.bazel @@ -121,6 +121,7 @@ xnnpack_cxx_library( ) for kernel in [ "bf16_f32_vcvt", "bf16_qs8_vcvt", + "bf16_qu8_vcvt", "f16_f32_vcvt", "f16_qs8_vcvt", "f16_qu8_vcvt", diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a7028a14135..830161c59b9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -342,6 +342,7 @@ ENDFOREACH() SET(MICROKERNEL_VCVT_TESTS bf16-f32-vcvt bf16-qs8-vcvt + bf16-qu8-vcvt f16-f32-vcvt f16-qs8-vcvt f16-qu8-vcvt diff --git a/test/bf16-qu8-vcvt.cc b/test/bf16-qu8-vcvt.cc new file mode 100644 index 00000000000..b28acbc4a85 --- /dev/null +++ b/test/bf16-qu8-vcvt.cc @@ -0,0 +1,40 @@ +// Copyright 2025 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include "src/xnnpack/microparams-init.h" +#include "src/xnnpack/vcvt.h" +#include "test/vunary-microkernel-tester.h" + +#define XNN_QUANTIZED(T) xnnpack::quantized +#define XNN_UKERNEL(arch_flags, ukernel, batch_tile, \ + vector_tile, datatype_in, datatype_out, \ + params_type, init_params) \ + TEST(ukernel, batch_eq) { \ + TestBatchEq(arch_flags, batch_tile, \ + ukernel, init_params); \ + } \ + TEST(ukernel, batch_div) { \ + TestBatchDiv(arch_flags, batch_tile, \ + ukernel, init_params); \ + } \ + TEST(ukernel, batch_lt) { \ + TestBatchLT(arch_flags, batch_tile, \ + ukernel, init_params); \ + } \ + TEST(ukernel, batch_gt) { \ + TestBatchGT(arch_flags, batch_tile, \ + ukernel, init_params); \ + } \ + TEST(ukernel, output_scale) { \ + TestOutputScale( \ + arch_flags, batch_tile, ukernel, init_params); \ + } \ + TEST(ukernel, output_zero_point) { \ + TestOutputZeroPoint( \ + arch_flags, batch_tile, ukernel, init_params); \ + } +#include "src/bf16-qu8-vcvt/bf16-qu8-vcvt.inc" +#undef XNN_UKERNEL +#undef XNN_QUANTIZED diff --git a/test/operators/convert-nc.cc b/test/operators/convert-nc.cc index f52f13fb039..b3427bff5e1 100644 --- a/test/operators/convert-nc.cc +++ b/test/operators/convert-nc.cc @@ -104,6 +104,164 @@ class ConvertOperatorTester { size_t iterations() const { return this->iterations_; } + void TestBF16toQD8() const { + xnnpack::ReplicableRandomDevice rng; + + xnnpack::Buffer input_float((batch_size() - 1) * input_stride() + + channels()); + xnnpack::Buffer input( + (batch_size() - 1) * input_stride() + channels(), + xnnpack::XnnExtraBytes); + xnnpack::Buffer output((batch_size() - 1) * output_stride() + + channels()); + xnnpack::Buffer quantization_params( + batch_size() + XNN_EXTRA_QUANTIZATION_PARAMS); + std::uniform_real_distribution range_dist(-10, 10); + for (size_t iteration = 0; iteration < iterations(); iteration++) { + const float min_val = std::min(range_dist(rng), range_dist(rng)); + const float max_val = std::uniform_real_distribution( + min_val * + (1.0f + std::numeric_limits::max() * 7.8125e-3f), + 10.0f)(rng); + std::uniform_real_distribution f32dist(min_val, max_val); + std::generate(input_float.begin(), input_float.end(), + [&]() { return f32dist(rng); }); + std::copy(input_float.begin(), input_float.end(), input.begin()); + std::copy(input.begin(), input.begin() + channels(), input_float.begin()); + + // Create, setup, run, and destroy Convert operator. + ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); + xnn_operator_t convert_op = nullptr; + + xnn_status status = xnn_create_convert_nc_bf16_qd8(0, &convert_op); + if (status == xnn_status_unsupported_hardware) { + GTEST_SKIP(); + } + ASSERT_EQ(xnn_status_success, status); + ASSERT_NE(nullptr, convert_op); + + // Smart pointer to automatically delete convert op. + std::unique_ptr + auto_convert_op(convert_op, xnn_delete_operator); + + ASSERT_EQ(xnn_status_success, + xnn_reshape_convert_nc_bf16_qd8( + convert_op, batch_size(), channels(), input_stride(), + output_stride(), /*threadpool=*/nullptr)); + ASSERT_EQ(xnn_status_success, xnn_setup_convert_nc_bf16_qd8( + convert_op, input.data(), output.data(), + /*row_sum=*/nullptr, + quantization_params.data())); + ASSERT_EQ(xnn_status_success, + xnn_run_operator(convert_op, /*threadpool=*/nullptr)); + + // Verify results. + for (size_t i = 0; i < batch_size(); i++) { + const float* input_ptr = &input_float[i * input_stride()]; + const auto minmax = + std::minmax_element(input_ptr, input_ptr + channels()); + const float rmin = math_min_f32(0.0f, *minmax.first); + const float rmax = math_max_f32(0.0f, *minmax.second); + // bf16 has 7-bit mantissa (vs f16's 10), so scale/zero_point are + // less precise. Use a wider tolerance than the f16 test (0.8f). + const float max_acceptable_error = + 2.0f * (rmax - rmin) / std::numeric_limits::max(); + for (size_t c = 0; c < channels(); c++) { + float expected = input_float[i * input_stride() + c]; + int8_t quantized_val = (int)output[i * output_stride() + c]; + float dequantized_val = + static_cast(quantized_val - + quantization_params[i].zero_point) * + quantization_params[i].scale; + ASSERT_NEAR(expected, dequantized_val, max_acceptable_error) + << "at batch " << i << " / " << batch_size() << ", channel " << c + << " / " << channels() << ", rmin=" << rmin << ", rmax=" << rmax + << ", quantization_params={zero_point=" + << quantization_params[i].zero_point + << ", scale=" << quantization_params[i].scale << "}"; + } + } + } + } + + void TestBF16toQDU8() const { + xnnpack::ReplicableRandomDevice rng; + + xnnpack::Buffer input_float((batch_size() - 1) * input_stride() + + channels()); + xnnpack::Buffer input( + (batch_size() - 1) * input_stride() + channels(), + xnnpack::XnnExtraBytes); + xnnpack::Buffer output((batch_size() - 1) * output_stride() + + channels()); + xnnpack::Buffer quantization_params( + batch_size() + XNN_EXTRA_QUANTIZATION_PARAMS); + std::uniform_real_distribution range_dist(-10, 10); + for (size_t iteration = 0; iteration < iterations(); iteration++) { + const float min_val = std::min(range_dist(rng), range_dist(rng)); + const float max_val = std::uniform_real_distribution( + min_val * + (1.0f + std::numeric_limits::max() * 7.8125e-3f), + 10.0f)(rng); + std::uniform_real_distribution f32dist(min_val, max_val); + std::generate(input_float.begin(), input_float.end(), + [&]() { return f32dist(rng); }); + std::copy(input_float.begin(), input_float.end(), input.begin()); + std::copy(input.begin(), input.begin() + channels(), input_float.begin()); + + // Create, setup, run, and destroy Convert operator. + ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); + xnn_operator_t convert_op = nullptr; + + xnn_status status = xnn_create_convert_nc_bf16_qdu8(0, &convert_op); + if (status == xnn_status_unsupported_hardware) { + GTEST_SKIP(); + } + ASSERT_EQ(xnn_status_success, status); + ASSERT_NE(nullptr, convert_op); + + // Smart pointer to automatically delete convert op. + std::unique_ptr + auto_convert_op(convert_op, xnn_delete_operator); + + ASSERT_EQ(xnn_status_success, + xnn_reshape_convert_nc_bf16_qdu8( + convert_op, batch_size(), channels(), input_stride(), + output_stride(), /*threadpool=*/nullptr)); + ASSERT_EQ(xnn_status_success, xnn_setup_convert_nc_bf16_qdu8( + convert_op, input.data(), output.data(), + /*row_sum=*/nullptr, + quantization_params.data())); + ASSERT_EQ(xnn_status_success, + xnn_run_operator(convert_op, /*threadpool=*/nullptr)); + + // Verify results. + for (size_t i = 0; i < batch_size(); i++) { + const float* input_ptr = &input_float[i * input_stride()]; + const auto minmax = + std::minmax_element(input_ptr, input_ptr + channels()); + const float rmin = math_min_f32(0.0f, *minmax.first); + const float rmax = math_max_f32(0.0f, *minmax.second); + const float max_acceptable_error = + 2.0f * (rmax - rmin) / std::numeric_limits::max(); + for (size_t c = 0; c < channels(); c++) { + float expected = input_float[i * input_stride() + c]; + uint8_t quantized_val = output[i * output_stride() + c]; + float dequantized_val = + static_cast(quantized_val - + quantization_params[i].zero_point) * + quantization_params[i].scale; + ASSERT_NEAR(expected, dequantized_val, max_acceptable_error) + << "at batch " << i << " / " << batch_size() << ", channel " << c + << " / " << channels() << ", rmin=" << rmin << ", rmax=" << rmax + << ", quantization_params={zero_point=" + << quantization_params[i].zero_point + << ", scale=" << quantization_params[i].scale << "}"; + } + } + } + } + void TestF16toQD8() const { xnnpack::ReplicableRandomDevice rng; @@ -589,6 +747,114 @@ class ConvertOperatorTester { size_t iterations_{15}; }; +TEST(CONVERT_NC_BF16_QD8, unit_batch) { + for (size_t channels = 1; channels < 100; channels++) { + ConvertOperatorTester() + .batch_size(1) + .channels(channels) + .iterations(3) + .TestBF16toQD8(); + } +} + +TEST(CONVERT_NC_BF16_QD8, small_batch) { + for (size_t channels = 1; channels < 100; channels++) { + ConvertOperatorTester() + .batch_size(3) + .channels(channels) + .iterations(3) + .TestBF16toQD8(); + } +} + +TEST(CONVERT_NC_BF16_QD8, small_batch_with_input_stride) { + for (size_t channels = 10; channels < 11; channels += 15) { + ConvertOperatorTester() + .batch_size(3) + .channels(channels) + .input_stride(129) + .iterations(3) + .TestBF16toQD8(); + } +} + +TEST(CONVERT_NC_BF16_QD8, small_batch_with_output_stride) { + for (size_t channels = 1; channels < 100; channels += 15) { + ConvertOperatorTester() + .batch_size(3) + .channels(channels) + .output_stride(117) + .iterations(3) + .TestBF16toQD8(); + } +} + +TEST(CONVERT_NC_BF16_QD8, small_batch_with_input_and_output_stride) { + for (size_t channels = 1; channels < 100; channels += 15) { + ConvertOperatorTester() + .batch_size(3) + .channels(channels) + .input_stride(129) + .output_stride(117) + .iterations(3) + .TestBF16toQD8(); + } +} + +TEST(CONVERT_NC_BF16_QDU8, unit_batch) { + for (size_t channels = 1; channels < 100; channels++) { + ConvertOperatorTester() + .batch_size(1) + .channels(channels) + .iterations(3) + .TestBF16toQDU8(); + } +} + +TEST(CONVERT_NC_BF16_QDU8, small_batch) { + for (size_t channels = 1; channels < 100; channels++) { + ConvertOperatorTester() + .batch_size(3) + .channels(channels) + .iterations(3) + .TestBF16toQDU8(); + } +} + +TEST(CONVERT_NC_BF16_QDU8, small_batch_with_input_stride) { + for (size_t channels = 10; channels < 11; channels += 15) { + ConvertOperatorTester() + .batch_size(3) + .channels(channels) + .input_stride(129) + .iterations(3) + .TestBF16toQDU8(); + } +} + +TEST(CONVERT_NC_BF16_QDU8, small_batch_with_output_stride) { + for (size_t channels = 1; channels < 100; channels += 15) { + ConvertOperatorTester() + .batch_size(3) + .channels(channels) + .output_stride(117) + .iterations(3) + .TestBF16toQDU8(); + } +} + +TEST(CONVERT_NC_BF16_QDU8, small_batch_with_input_and_output_stride) { + for (size_t channels = 1; channels < 100; channels += 15) { + ConvertOperatorTester() + .batch_size(3) + .channels(channels) + .input_stride(129) + .output_stride(117) + .iterations(3) + .TestBF16toQDU8(); + } +} + TEST(CONVERT_NC_F16_QD8, unit_batch) { for (size_t channels = 1; channels < 100; channels++) { ConvertOperatorTester()