Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmake/gen/rvv_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ SET(PROD_RVV_MICROKERNEL_SRCS
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-2x2v.c
src/f32-gemm/gen/f32-gemm-1x4v-minmax-rvv.c
src/f32-gemm/gen/f32-gemm-7x4v-minmax-rvv.c
src/f32-ibilinear/gen/f32-ibilinear-rvv-u2v.c
src/f32-igemm/gen/f32-igemm-1x4v-minmax-rvv.c
src/f32-igemm/gen/f32-igemm-7x4v-minmax-rvv.c
src/f32-maxpool/gen/f32-maxpool-9p-minmax-rvv-u2v.c
Expand Down Expand Up @@ -166,6 +167,8 @@ SET(NON_PROD_RVV_MICROKERNEL_SRCS
src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-8x1v.c
src/f32-gemm/gen/f32-gemm-1x4v-rvv.c
src/f32-gemm/gen/f32-gemm-7x4v-rvv.c
src/f32-ibilinear/gen/f32-ibilinear-rvv-u1v.c
src/f32-ibilinear/gen/f32-ibilinear-rvv-u4v.c
src/f32-igemm/gen/f32-igemm-1x4v-rvv.c
src/f32-igemm/gen/f32-igemm-7x4v-rvv.c
src/f32-qs8-vcvt/gen/f32-qs8-vcvt-rvv-u1v.c
Expand Down
3 changes: 3 additions & 0 deletions gen/rvv_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ PROD_RVV_MICROKERNEL_SRCS = [
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-2x2v.c",
"src/f32-gemm/gen/f32-gemm-1x4v-minmax-rvv.c",
"src/f32-gemm/gen/f32-gemm-7x4v-minmax-rvv.c",
"src/f32-ibilinear/gen/f32-ibilinear-rvv-u2v.c",
"src/f32-igemm/gen/f32-igemm-1x4v-minmax-rvv.c",
"src/f32-igemm/gen/f32-igemm-7x4v-minmax-rvv.c",
"src/f32-maxpool/gen/f32-maxpool-9p-minmax-rvv-u2v.c",
Expand Down Expand Up @@ -163,6 +164,8 @@ NON_PROD_RVV_MICROKERNEL_SRCS = [
"src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-8x1v.c",
"src/f32-gemm/gen/f32-gemm-1x4v-rvv.c",
"src/f32-gemm/gen/f32-gemm-7x4v-rvv.c",
"src/f32-ibilinear/gen/f32-ibilinear-rvv-u1v.c",
"src/f32-ibilinear/gen/f32-ibilinear-rvv-u4v.c",
"src/f32-igemm/gen/f32-igemm-1x4v-rvv.c",
"src/f32-igemm/gen/f32-igemm-7x4v-rvv.c",
"src/f32-qs8-vcvt/gen/f32-qs8-vcvt-rvv-u1v.c",
Expand Down
5 changes: 5 additions & 0 deletions scripts/generate-f32-ibilinear.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,9 @@ tools/xngen src/f32-ibilinear/neon.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -D FMA
tools/xngen src/f32-ibilinear/sse.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/f32-ibilinear-sse-u4.c &
tools/xngen src/f32-ibilinear/sse.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/f32-ibilinear-sse-u8.c &

################################## RISC-V RVV #################################
tools/xngen src/f32-ibilinear/rvv.c.in -D LMUL=1 -o src/f32-ibilinear/gen/f32-ibilinear-rvv-u1v.c &
tools/xngen src/f32-ibilinear/rvv.c.in -D LMUL=2 -o src/f32-ibilinear/gen/f32-ibilinear-rvv-u2v.c &
tools/xngen src/f32-ibilinear/rvv.c.in -D LMUL=4 -o src/f32-ibilinear/gen/f32-ibilinear-rvv-u4v.c &

wait
8 changes: 8 additions & 0 deletions src/configs/ibilinear-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ static void init_f32_ibilinear_config(void) {
f32_ibilinear_config.ukernel = XNN_INIT_IBILINEAR_UKERNEL(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_u8);
#elif XNN_ARCH_WASMSIMD
f32_ibilinear_config.ukernel = XNN_INIT_IBILINEAR_UKERNEL(xnn_f32_ibilinear_ukernel__wasmsimd_u8);
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_riscv_vector) {
f32_ibilinear_config.ukernel = XNN_INIT_IBILINEAR_UKERNEL(xnn_f32_ibilinear_ukernel__rvv_u2v);
} else {
f32_ibilinear_config.ukernel = XNN_INIT_IBILINEAR_UKERNEL(xnn_f32_ibilinear_ukernel__scalar_u2);
}
#else
f32_ibilinear_config.ukernel = XNN_INIT_IBILINEAR_UKERNEL(xnn_f32_ibilinear_ukernel__scalar_u2);
#endif
Expand Down
75 changes: 75 additions & 0 deletions src/f32-ibilinear/gen/f32-ibilinear-rvv-u1v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// clang-format off
// Auto-generated file. Do not edit!
// Template: src/f32-ibilinear/rvv.c.in
// Generator: tools/xngen
//
// Copyright 2024 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include <riscv_vector.h>

#include "src/xnnpack/common.h"
#include "src/xnnpack/ibilinear.h"


void xnn_f32_ibilinear_ukernel__rvv_u1v(
size_t output_pixels,
size_t channels,
const float** restrict input,
size_t input_offset,
const float* restrict weights,
float* restrict output,
size_t output_increment)
{
assert(output_pixels != 0);
assert(channels != 0);
assert(channels % sizeof(float) == 0);

do {
const float* i0 = (const float*) ((uintptr_t) input[0] + input_offset);
const float* i1 = (const float*) ((uintptr_t) input[1] + input_offset);
const float* i2 = (const float*) ((uintptr_t) input[2] + input_offset);
const float* i3 = (const float*) ((uintptr_t) input[3] + input_offset);
input += 4;

const float valphah = weights[0];
const float valphav = weights[1];
weights += 2;

size_t c = channels >> XNN_LOG2_SIZEOF_FLOAT;
do {
const size_t n = __riscv_vsetvl_e32m1(c);

// Load top-left, top-right, bottom-left, bottom-right.
vfloat32m1_t vtl = __riscv_vle32_v_f32m1(i0, n); i0 += n;
vfloat32m1_t vtr = __riscv_vle32_v_f32m1(i1, n); i1 += n;
vfloat32m1_t vbl = __riscv_vle32_v_f32m1(i2, n); i2 += n;
vfloat32m1_t vbr = __riscv_vle32_v_f32m1(i3, n); i3 += n;

// Horizontal interpolation differences.
vfloat32m1_t vtd = __riscv_vfsub_vv_f32m1(vtr, vtl, n);
vfloat32m1_t vbd = __riscv_vfsub_vv_f32m1(vbr, vbl, n);

// Horizontal interpolation: top = tl + (tr - tl) * alphah.
vfloat32m1_t vt = __riscv_vfmacc_vf_f32m1(vtl, valphah, vtd, n);
vfloat32m1_t vb = __riscv_vfmacc_vf_f32m1(vbl, valphah, vbd, n);

// Vertical interpolation: output = top + (bottom - top) * alphav.
vfloat32m1_t vd = __riscv_vfsub_vv_f32m1(vb, vt, n);
vfloat32m1_t vo = __riscv_vfmacc_vf_f32m1(vt, valphav, vd, n);

__riscv_vse32_v_f32m1(output, vo, n);
output += n;

c -= n;
} while (c != 0);

output = (float*) ((uintptr_t) output + output_increment);
} while (--output_pixels != 0);
}
75 changes: 75 additions & 0 deletions src/f32-ibilinear/gen/f32-ibilinear-rvv-u2v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// clang-format off
// Auto-generated file. Do not edit!
// Template: src/f32-ibilinear/rvv.c.in
// Generator: tools/xngen
//
// Copyright 2024 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include <riscv_vector.h>

#include "src/xnnpack/common.h"
#include "src/xnnpack/ibilinear.h"


void xnn_f32_ibilinear_ukernel__rvv_u2v(
size_t output_pixels,
size_t channels,
const float** restrict input,
size_t input_offset,
const float* restrict weights,
float* restrict output,
size_t output_increment)
{
assert(output_pixels != 0);
assert(channels != 0);
assert(channels % sizeof(float) == 0);

do {
const float* i0 = (const float*) ((uintptr_t) input[0] + input_offset);
const float* i1 = (const float*) ((uintptr_t) input[1] + input_offset);
const float* i2 = (const float*) ((uintptr_t) input[2] + input_offset);
const float* i3 = (const float*) ((uintptr_t) input[3] + input_offset);
input += 4;

const float valphah = weights[0];
const float valphav = weights[1];
weights += 2;

size_t c = channels >> XNN_LOG2_SIZEOF_FLOAT;
do {
const size_t n = __riscv_vsetvl_e32m2(c);

// Load top-left, top-right, bottom-left, bottom-right.
vfloat32m2_t vtl = __riscv_vle32_v_f32m2(i0, n); i0 += n;
vfloat32m2_t vtr = __riscv_vle32_v_f32m2(i1, n); i1 += n;
vfloat32m2_t vbl = __riscv_vle32_v_f32m2(i2, n); i2 += n;
vfloat32m2_t vbr = __riscv_vle32_v_f32m2(i3, n); i3 += n;

// Horizontal interpolation differences.
vfloat32m2_t vtd = __riscv_vfsub_vv_f32m2(vtr, vtl, n);
vfloat32m2_t vbd = __riscv_vfsub_vv_f32m2(vbr, vbl, n);

// Horizontal interpolation: top = tl + (tr - tl) * alphah.
vfloat32m2_t vt = __riscv_vfmacc_vf_f32m2(vtl, valphah, vtd, n);
vfloat32m2_t vb = __riscv_vfmacc_vf_f32m2(vbl, valphah, vbd, n);

// Vertical interpolation: output = top + (bottom - top) * alphav.
vfloat32m2_t vd = __riscv_vfsub_vv_f32m2(vb, vt, n);
vfloat32m2_t vo = __riscv_vfmacc_vf_f32m2(vt, valphav, vd, n);

__riscv_vse32_v_f32m2(output, vo, n);
output += n;

c -= n;
} while (c != 0);

output = (float*) ((uintptr_t) output + output_increment);
} while (--output_pixels != 0);
}
75 changes: 75 additions & 0 deletions src/f32-ibilinear/gen/f32-ibilinear-rvv-u4v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// clang-format off
// Auto-generated file. Do not edit!
// Template: src/f32-ibilinear/rvv.c.in
// Generator: tools/xngen
//
// Copyright 2024 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include <riscv_vector.h>

#include "src/xnnpack/common.h"
#include "src/xnnpack/ibilinear.h"


void xnn_f32_ibilinear_ukernel__rvv_u4v(
size_t output_pixels,
size_t channels,
const float** restrict input,
size_t input_offset,
const float* restrict weights,
float* restrict output,
size_t output_increment)
{
assert(output_pixels != 0);
assert(channels != 0);
assert(channels % sizeof(float) == 0);

do {
const float* i0 = (const float*) ((uintptr_t) input[0] + input_offset);
const float* i1 = (const float*) ((uintptr_t) input[1] + input_offset);
const float* i2 = (const float*) ((uintptr_t) input[2] + input_offset);
const float* i3 = (const float*) ((uintptr_t) input[3] + input_offset);
input += 4;

const float valphah = weights[0];
const float valphav = weights[1];
weights += 2;

size_t c = channels >> XNN_LOG2_SIZEOF_FLOAT;
do {
const size_t n = __riscv_vsetvl_e32m4(c);

// Load top-left, top-right, bottom-left, bottom-right.
vfloat32m4_t vtl = __riscv_vle32_v_f32m4(i0, n); i0 += n;
vfloat32m4_t vtr = __riscv_vle32_v_f32m4(i1, n); i1 += n;
vfloat32m4_t vbl = __riscv_vle32_v_f32m4(i2, n); i2 += n;
vfloat32m4_t vbr = __riscv_vle32_v_f32m4(i3, n); i3 += n;

// Horizontal interpolation differences.
vfloat32m4_t vtd = __riscv_vfsub_vv_f32m4(vtr, vtl, n);
vfloat32m4_t vbd = __riscv_vfsub_vv_f32m4(vbr, vbl, n);

// Horizontal interpolation: top = tl + (tr - tl) * alphah.
vfloat32m4_t vt = __riscv_vfmacc_vf_f32m4(vtl, valphah, vtd, n);
vfloat32m4_t vb = __riscv_vfmacc_vf_f32m4(vbl, valphah, vbd, n);

// Vertical interpolation: output = top + (bottom - top) * alphav.
vfloat32m4_t vd = __riscv_vfsub_vv_f32m4(vb, vt, n);
vfloat32m4_t vo = __riscv_vfmacc_vf_f32m4(vt, valphav, vd, n);

__riscv_vse32_v_f32m4(output, vo, n);
output += n;

c -= n;
} while (c != 0);

output = (float*) ((uintptr_t) output + output_increment);
} while (--output_pixels != 0);
}
71 changes: 71 additions & 0 deletions src/f32-ibilinear/rvv.c.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Copyright 2024 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

$assert LMUL in [1, 2, 4, 8]
#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include <riscv_vector.h>

#include "src/xnnpack/common.h"
#include "src/xnnpack/ibilinear.h"


void xnn_f32_ibilinear_ukernel__rvv_u${LMUL}v(
size_t output_pixels,
size_t channels,
const float** restrict input,
size_t input_offset,
const float* restrict weights,
float* restrict output,
size_t output_increment)
{
assert(output_pixels != 0);
assert(channels != 0);
assert(channels % sizeof(float) == 0);

do {
const float* i0 = (const float*) ((uintptr_t) input[0] + input_offset);
const float* i1 = (const float*) ((uintptr_t) input[1] + input_offset);
const float* i2 = (const float*) ((uintptr_t) input[2] + input_offset);
const float* i3 = (const float*) ((uintptr_t) input[3] + input_offset);
input += 4;

const float valphah = weights[0];
const float valphav = weights[1];
weights += 2;

size_t c = channels >> XNN_LOG2_SIZEOF_FLOAT;
do {
const size_t n = __riscv_vsetvl_e32m${LMUL}(c);

// Load top-left, top-right, bottom-left, bottom-right.
vfloat32m${LMUL}_t vtl = __riscv_vle32_v_f32m${LMUL}(i0, n); i0 += n;
vfloat32m${LMUL}_t vtr = __riscv_vle32_v_f32m${LMUL}(i1, n); i1 += n;
vfloat32m${LMUL}_t vbl = __riscv_vle32_v_f32m${LMUL}(i2, n); i2 += n;
vfloat32m${LMUL}_t vbr = __riscv_vle32_v_f32m${LMUL}(i3, n); i3 += n;

// Horizontal interpolation differences.
vfloat32m${LMUL}_t vtd = __riscv_vfsub_vv_f32m${LMUL}(vtr, vtl, n);
vfloat32m${LMUL}_t vbd = __riscv_vfsub_vv_f32m${LMUL}(vbr, vbl, n);

// Horizontal interpolation: top = tl + (tr - tl) * alphah.
vfloat32m${LMUL}_t vt = __riscv_vfmacc_vf_f32m${LMUL}(vtl, valphah, vtd, n);
vfloat32m${LMUL}_t vb = __riscv_vfmacc_vf_f32m${LMUL}(vbl, valphah, vbd, n);

// Vertical interpolation: output = top + (bottom - top) * alphav.
vfloat32m${LMUL}_t vd = __riscv_vfsub_vv_f32m${LMUL}(vb, vt, n);
vfloat32m${LMUL}_t vo = __riscv_vfmacc_vf_f32m${LMUL}(vt, valphav, vd, n);

__riscv_vse32_v_f32m${LMUL}(output, vo, n);
output += n;

c -= n;
} while (c != 0);

output = (float*) ((uintptr_t) output + output_increment);
} while (--output_pixels != 0);
}
4 changes: 4 additions & 0 deletions src/xnnpack/ibilinear.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(
DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(
xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_u8)

DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__rvv_u1v)
DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__rvv_u2v)
DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__rvv_u4v)

#define DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name(size_t output_pixels, size_t channels, \
const int8_t** input, size_t input_offset, \
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ SET(MICROKERNEL_UNIT_TESTS
f32-conv-hwc
f32-conv-hwc2chw
f32-ibilinear
f32-ibilinear-rvv
f32-ibilinear-chw
f32-raddexpminusmax
f32-raddextexp
Expand Down
Loading