google · velonica0 · Apr 14, 2026 · Apr 15, 2026 · May 18, 2026
diff --git a/cmake/gen/rvv_microkernels.cmake b/cmake/gen/rvv_microkernels.cmake
@@ -25,6 +25,7 @@ SET(PROD_RVV_MICROKERNEL_SRCS
   src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-2x2v.c
   src/f32-gemm/gen/f32-gemm-1x4v-minmax-rvv.c
   src/f32-gemm/gen/f32-gemm-7x4v-minmax-rvv.c
+  src/f32-ibilinear/gen/f32-ibilinear-rvv-u2v.c
   src/f32-igemm/gen/f32-igemm-1x4v-minmax-rvv.c
   src/f32-igemm/gen/f32-igemm-7x4v-minmax-rvv.c
   src/f32-maxpool/gen/f32-maxpool-9p-minmax-rvv-u2v.c
@@ -166,6 +167,8 @@ SET(NON_PROD_RVV_MICROKERNEL_SRCS
   src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-8x1v.c
   src/f32-gemm/gen/f32-gemm-1x4v-rvv.c
   src/f32-gemm/gen/f32-gemm-7x4v-rvv.c
+  src/f32-ibilinear/gen/f32-ibilinear-rvv-u1v.c
+  src/f32-ibilinear/gen/f32-ibilinear-rvv-u4v.c
   src/f32-igemm/gen/f32-igemm-1x4v-rvv.c
   src/f32-igemm/gen/f32-igemm-7x4v-rvv.c
   src/f32-qs8-vcvt/gen/f32-qs8-vcvt-rvv-u1v.c

diff --git a/gen/rvv_microkernels.bzl b/gen/rvv_microkernels.bzl
@@ -21,6 +21,7 @@ PROD_RVV_MICROKERNEL_SRCS = [
     "src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-2x2v.c",
     "src/f32-gemm/gen/f32-gemm-1x4v-minmax-rvv.c",
     "src/f32-gemm/gen/f32-gemm-7x4v-minmax-rvv.c",
+    "src/f32-ibilinear/gen/f32-ibilinear-rvv-u2v.c",
     "src/f32-igemm/gen/f32-igemm-1x4v-minmax-rvv.c",
     "src/f32-igemm/gen/f32-igemm-7x4v-minmax-rvv.c",
     "src/f32-maxpool/gen/f32-maxpool-9p-minmax-rvv-u2v.c",
@@ -163,6 +164,8 @@ NON_PROD_RVV_MICROKERNEL_SRCS = [
     "src/f32-dwconv2d-chw/gen/f32-dwconv2d-chw-3x3s2p1-minmax-rvv-8x1v.c",
     "src/f32-gemm/gen/f32-gemm-1x4v-rvv.c",
     "src/f32-gemm/gen/f32-gemm-7x4v-rvv.c",
+    "src/f32-ibilinear/gen/f32-ibilinear-rvv-u1v.c",
+    "src/f32-ibilinear/gen/f32-ibilinear-rvv-u4v.c",
     "src/f32-igemm/gen/f32-igemm-1x4v-rvv.c",
     "src/f32-igemm/gen/f32-igemm-7x4v-rvv.c",
     "src/f32-qs8-vcvt/gen/f32-qs8-vcvt-rvv-u1v.c",

diff --git a/scripts/generate-f32-ibilinear.sh b/scripts/generate-f32-ibilinear.sh
@@ -27,4 +27,9 @@ tools/xngen src/f32-ibilinear/neon.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -D FMA
 tools/xngen src/f32-ibilinear/sse.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/f32-ibilinear-sse-u4.c &
 tools/xngen src/f32-ibilinear/sse.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/f32-ibilinear-sse-u8.c &
 
+################################## RISC-V RVV #################################
+tools/xngen src/f32-ibilinear/rvv.c.in -D LMUL=1 -o src/f32-ibilinear/gen/f32-ibilinear-rvv-u1v.c &
+tools/xngen src/f32-ibilinear/rvv.c.in -D LMUL=2 -o src/f32-ibilinear/gen/f32-ibilinear-rvv-u2v.c &
+tools/xngen src/f32-ibilinear/rvv.c.in -D LMUL=4 -o src/f32-ibilinear/gen/f32-ibilinear-rvv-u4v.c &
+
 wait
diff --git a/src/configs/ibilinear-config.c b/src/configs/ibilinear-config.c
@@ -92,6 +92,14 @@ static void init_f32_ibilinear_config(void) {
     f32_ibilinear_config.ukernel = XNN_INIT_IBILINEAR_UKERNEL(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_u8);
   #elif XNN_ARCH_WASMSIMD
     f32_ibilinear_config.ukernel = XNN_INIT_IBILINEAR_UKERNEL(xnn_f32_ibilinear_ukernel__wasmsimd_u8);
+  #elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR
+    const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
+    assert(hardware_config != NULL);
+    if (hardware_config->arch_flags & xnn_arch_riscv_vector) {
+      f32_ibilinear_config.ukernel = XNN_INIT_IBILINEAR_UKERNEL(xnn_f32_ibilinear_ukernel__rvv_u2v);
+    } else {
+      f32_ibilinear_config.ukernel = XNN_INIT_IBILINEAR_UKERNEL(xnn_f32_ibilinear_ukernel__scalar_u2);
+    }
   #else
     f32_ibilinear_config.ukernel = XNN_INIT_IBILINEAR_UKERNEL(xnn_f32_ibilinear_ukernel__scalar_u2);
   #endif

diff --git a/src/f32-ibilinear/gen/f32-ibilinear-rvv-u1v.c b/src/f32-ibilinear/gen/f32-ibilinear-rvv-u1v.c
@@ -0,0 +1,75 @@
+// clang-format off
+// Auto-generated file. Do not edit!
+//   Template: src/f32-ibilinear/rvv.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2024 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <riscv_vector.h>
+
+#include "src/xnnpack/common.h"
+#include "src/xnnpack/ibilinear.h"
+
+
+void xnn_f32_ibilinear_ukernel__rvv_u1v(
+    size_t output_pixels,
+    size_t channels,
+    const float** restrict input,
+    size_t input_offset,
+    const float* restrict weights,
+    float* restrict output,
+    size_t output_increment)
+{
+  assert(output_pixels != 0);
+  assert(channels != 0);
+  assert(channels % sizeof(float) == 0);
+
+  do {
+    const float* i0 = (const float*) ((uintptr_t) input[0] + input_offset);
+    const float* i1 = (const float*) ((uintptr_t) input[1] + input_offset);
+    const float* i2 = (const float*) ((uintptr_t) input[2] + input_offset);
+    const float* i3 = (const float*) ((uintptr_t) input[3] + input_offset);
+    input += 4;
+
+    const float valphah = weights[0];
+    const float valphav = weights[1];
+    weights += 2;
+
+    size_t c = channels >> XNN_LOG2_SIZEOF_FLOAT;
+    do {
+      const size_t n = __riscv_vsetvl_e32m1(c);
+
+      // Load top-left, top-right, bottom-left, bottom-right.
+      vfloat32m1_t vtl = __riscv_vle32_v_f32m1(i0, n); i0 += n;
+      vfloat32m1_t vtr = __riscv_vle32_v_f32m1(i1, n); i1 += n;
+      vfloat32m1_t vbl = __riscv_vle32_v_f32m1(i2, n); i2 += n;
+      vfloat32m1_t vbr = __riscv_vle32_v_f32m1(i3, n); i3 += n;
+
+      // Horizontal interpolation differences.
+      vfloat32m1_t vtd = __riscv_vfsub_vv_f32m1(vtr, vtl, n);
+      vfloat32m1_t vbd = __riscv_vfsub_vv_f32m1(vbr, vbl, n);
+
+      // Horizontal interpolation: top = tl + (tr - tl) * alphah.
+      vfloat32m1_t vt = __riscv_vfmacc_vf_f32m1(vtl, valphah, vtd, n);
+      vfloat32m1_t vb = __riscv_vfmacc_vf_f32m1(vbl, valphah, vbd, n);
+
+      // Vertical interpolation: output = top + (bottom - top) * alphav.
+      vfloat32m1_t vd = __riscv_vfsub_vv_f32m1(vb, vt, n);
+      vfloat32m1_t vo = __riscv_vfmacc_vf_f32m1(vt, valphav, vd, n);
+
+      __riscv_vse32_v_f32m1(output, vo, n);
+      output += n;
+
+      c -= n;
+    } while (c != 0);
+
+    output = (float*) ((uintptr_t) output + output_increment);
+  } while (--output_pixels != 0);
+}
diff --git a/src/f32-ibilinear/gen/f32-ibilinear-rvv-u2v.c b/src/f32-ibilinear/gen/f32-ibilinear-rvv-u2v.c
@@ -0,0 +1,75 @@
+// clang-format off
+// Auto-generated file. Do not edit!
+//   Template: src/f32-ibilinear/rvv.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2024 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <riscv_vector.h>
+
+#include "src/xnnpack/common.h"
+#include "src/xnnpack/ibilinear.h"
+
+
+void xnn_f32_ibilinear_ukernel__rvv_u2v(
+    size_t output_pixels,
+    size_t channels,
+    const float** restrict input,
+    size_t input_offset,
+    const float* restrict weights,
+    float* restrict output,
+    size_t output_increment)
+{
+  assert(output_pixels != 0);
+  assert(channels != 0);
+  assert(channels % sizeof(float) == 0);
+
+  do {
+    const float* i0 = (const float*) ((uintptr_t) input[0] + input_offset);
+    const float* i1 = (const float*) ((uintptr_t) input[1] + input_offset);
+    const float* i2 = (const float*) ((uintptr_t) input[2] + input_offset);
+    const float* i3 = (const float*) ((uintptr_t) input[3] + input_offset);
+    input += 4;
+
+    const float valphah = weights[0];
+    const float valphav = weights[1];
+    weights += 2;
+
+    size_t c = channels >> XNN_LOG2_SIZEOF_FLOAT;
+    do {
+      const size_t n = __riscv_vsetvl_e32m2(c);
+
+      // Load top-left, top-right, bottom-left, bottom-right.
+      vfloat32m2_t vtl = __riscv_vle32_v_f32m2(i0, n); i0 += n;
+      vfloat32m2_t vtr = __riscv_vle32_v_f32m2(i1, n); i1 += n;
+      vfloat32m2_t vbl = __riscv_vle32_v_f32m2(i2, n); i2 += n;
+      vfloat32m2_t vbr = __riscv_vle32_v_f32m2(i3, n); i3 += n;
+
+      // Horizontal interpolation differences.
+      vfloat32m2_t vtd = __riscv_vfsub_vv_f32m2(vtr, vtl, n);
+      vfloat32m2_t vbd = __riscv_vfsub_vv_f32m2(vbr, vbl, n);
+
+      // Horizontal interpolation: top = tl + (tr - tl) * alphah.
+      vfloat32m2_t vt = __riscv_vfmacc_vf_f32m2(vtl, valphah, vtd, n);
+      vfloat32m2_t vb = __riscv_vfmacc_vf_f32m2(vbl, valphah, vbd, n);
+
+      // Vertical interpolation: output = top + (bottom - top) * alphav.
+      vfloat32m2_t vd = __riscv_vfsub_vv_f32m2(vb, vt, n);
+      vfloat32m2_t vo = __riscv_vfmacc_vf_f32m2(vt, valphav, vd, n);
+
+      __riscv_vse32_v_f32m2(output, vo, n);
+      output += n;
+
+      c -= n;
+    } while (c != 0);
+
+    output = (float*) ((uintptr_t) output + output_increment);
+  } while (--output_pixels != 0);
+}
diff --git a/src/f32-ibilinear/gen/f32-ibilinear-rvv-u4v.c b/src/f32-ibilinear/gen/f32-ibilinear-rvv-u4v.c
@@ -0,0 +1,75 @@
+// clang-format off
+// Auto-generated file. Do not edit!
+//   Template: src/f32-ibilinear/rvv.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2024 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <riscv_vector.h>
+
+#include "src/xnnpack/common.h"
+#include "src/xnnpack/ibilinear.h"
+
+
+void xnn_f32_ibilinear_ukernel__rvv_u4v(
+    size_t output_pixels,
+    size_t channels,
+    const float** restrict input,
+    size_t input_offset,
+    const float* restrict weights,
+    float* restrict output,
+    size_t output_increment)
+{
+  assert(output_pixels != 0);
+  assert(channels != 0);
+  assert(channels % sizeof(float) == 0);
+
+  do {
+    const float* i0 = (const float*) ((uintptr_t) input[0] + input_offset);
+    const float* i1 = (const float*) ((uintptr_t) input[1] + input_offset);
+    const float* i2 = (const float*) ((uintptr_t) input[2] + input_offset);
+    const float* i3 = (const float*) ((uintptr_t) input[3] + input_offset);
+    input += 4;
+
+    const float valphah = weights[0];
+    const float valphav = weights[1];
+    weights += 2;
+
+    size_t c = channels >> XNN_LOG2_SIZEOF_FLOAT;
+    do {
+      const size_t n = __riscv_vsetvl_e32m4(c);
+
+      // Load top-left, top-right, bottom-left, bottom-right.
+      vfloat32m4_t vtl = __riscv_vle32_v_f32m4(i0, n); i0 += n;
+      vfloat32m4_t vtr = __riscv_vle32_v_f32m4(i1, n); i1 += n;
+      vfloat32m4_t vbl = __riscv_vle32_v_f32m4(i2, n); i2 += n;
+      vfloat32m4_t vbr = __riscv_vle32_v_f32m4(i3, n); i3 += n;
+
+      // Horizontal interpolation differences.
+      vfloat32m4_t vtd = __riscv_vfsub_vv_f32m4(vtr, vtl, n);
+      vfloat32m4_t vbd = __riscv_vfsub_vv_f32m4(vbr, vbl, n);
+
+      // Horizontal interpolation: top = tl + (tr - tl) * alphah.
+      vfloat32m4_t vt = __riscv_vfmacc_vf_f32m4(vtl, valphah, vtd, n);
+      vfloat32m4_t vb = __riscv_vfmacc_vf_f32m4(vbl, valphah, vbd, n);
+
+      // Vertical interpolation: output = top + (bottom - top) * alphav.
+      vfloat32m4_t vd = __riscv_vfsub_vv_f32m4(vb, vt, n);
+      vfloat32m4_t vo = __riscv_vfmacc_vf_f32m4(vt, valphav, vd, n);
+
+      __riscv_vse32_v_f32m4(output, vo, n);
+      output += n;
+
+      c -= n;
+    } while (c != 0);
+
+    output = (float*) ((uintptr_t) output + output_increment);
+  } while (--output_pixels != 0);
+}
diff --git a/src/f32-ibilinear/rvv.c.in b/src/f32-ibilinear/rvv.c.in
@@ -0,0 +1,71 @@
+// Copyright 2024 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+$assert LMUL in [1, 2, 4, 8]
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <riscv_vector.h>
+
+#include "src/xnnpack/common.h"
+#include "src/xnnpack/ibilinear.h"
+
+
+void xnn_f32_ibilinear_ukernel__rvv_u${LMUL}v(
+    size_t output_pixels,
+    size_t channels,
+    const float** restrict input,
+    size_t input_offset,
+    const float* restrict weights,
+    float* restrict output,
+    size_t output_increment)
+{
+  assert(output_pixels != 0);
+  assert(channels != 0);
+  assert(channels % sizeof(float) == 0);
+
+  do {
+    const float* i0 = (const float*) ((uintptr_t) input[0] + input_offset);
+    const float* i1 = (const float*) ((uintptr_t) input[1] + input_offset);
+    const float* i2 = (const float*) ((uintptr_t) input[2] + input_offset);
+    const float* i3 = (const float*) ((uintptr_t) input[3] + input_offset);
+    input += 4;
+
+    const float valphah = weights[0];
+    const float valphav = weights[1];
+    weights += 2;
+
+    size_t c = channels >> XNN_LOG2_SIZEOF_FLOAT;
+    do {
+      const size_t n = __riscv_vsetvl_e32m${LMUL}(c);
+
+      // Load top-left, top-right, bottom-left, bottom-right.
+      vfloat32m${LMUL}_t vtl = __riscv_vle32_v_f32m${LMUL}(i0, n); i0 += n;
+      vfloat32m${LMUL}_t vtr = __riscv_vle32_v_f32m${LMUL}(i1, n); i1 += n;
+      vfloat32m${LMUL}_t vbl = __riscv_vle32_v_f32m${LMUL}(i2, n); i2 += n;
+      vfloat32m${LMUL}_t vbr = __riscv_vle32_v_f32m${LMUL}(i3, n); i3 += n;
+
+      // Horizontal interpolation differences.
+      vfloat32m${LMUL}_t vtd = __riscv_vfsub_vv_f32m${LMUL}(vtr, vtl, n);
+      vfloat32m${LMUL}_t vbd = __riscv_vfsub_vv_f32m${LMUL}(vbr, vbl, n);
+
+      // Horizontal interpolation: top = tl + (tr - tl) * alphah.
+      vfloat32m${LMUL}_t vt = __riscv_vfmacc_vf_f32m${LMUL}(vtl, valphah, vtd, n);
+      vfloat32m${LMUL}_t vb = __riscv_vfmacc_vf_f32m${LMUL}(vbl, valphah, vbd, n);
+
+      // Vertical interpolation: output = top + (bottom - top) * alphav.
+      vfloat32m${LMUL}_t vd = __riscv_vfsub_vv_f32m${LMUL}(vb, vt, n);
+      vfloat32m${LMUL}_t vo = __riscv_vfmacc_vf_f32m${LMUL}(vt, valphav, vd, n);
+
+      __riscv_vse32_v_f32m${LMUL}(output, vo, n);
+      output += n;
+
+      c -= n;
+    } while (c != 0);
+
+    output = (float*) ((uintptr_t) output + output_increment);
+  } while (--output_pixels != 0);
+}
diff --git a/src/xnnpack/ibilinear.h b/src/xnnpack/ibilinear.h
@@ -57,6 +57,10 @@ DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(
 DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(
     xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_u8)
 
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__rvv_u1v)
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__rvv_u2v)
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__rvv_u4v)
+
 #define DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(fn_name)                 \
   XNN_INTERNAL void fn_name(size_t output_pixels, size_t channels,     \
                             const int8_t** input, size_t input_offset, \

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -102,6 +102,7 @@ SET(MICROKERNEL_UNIT_TESTS
     f32-conv-hwc
     f32-conv-hwc2chw
     f32-ibilinear
+    f32-ibilinear-rvv
     f32-ibilinear-chw
     f32-raddexpminusmax
     f32-raddextexp