diff --git a/llvm/lib/Target/SBF/SBFFrameLowering.cpp b/llvm/lib/Target/SBF/SBFFrameLowering.cpp index dc6ec974ff745..db0aa1dbd865a 100644 --- a/llvm/lib/Target/SBF/SBFFrameLowering.cpp +++ b/llvm/lib/Target/SBF/SBFFrameLowering.cpp @@ -37,6 +37,12 @@ void SBFFrameLowering::emitPrologue(MachineFunction &MF, if (Subtarget.isDynamicFramesV1()) NumBytes = -NumBytes; + else if (NumBytes <= FrameSize) + // In V3, we don't bump if the number of bytes is less than the default + // frame size. + return; + else + NumBytes -= FrameSize; BuildMI(MBB, MBBI, Dl, TII.get(SBF::ADD_ri), SBF::R10) .addReg(SBF::R10) diff --git a/llvm/lib/Target/SBF/SBFFrameLowering.h b/llvm/lib/Target/SBF/SBFFrameLowering.h index 192513ca20260..97cbad65f5a12 100644 --- a/llvm/lib/Target/SBF/SBFFrameLowering.h +++ b/llvm/lib/Target/SBF/SBFFrameLowering.h @@ -39,6 +39,8 @@ class SBFFrameLowering : public TargetFrameLowering { MachineBasicBlock::iterator MI) const override { return MBB.erase(MI); } +private: + const int FrameSize = 4096; }; -} +} // namespace llvm #endif diff --git a/llvm/lib/Target/SBF/SBFRegisterInfo.cpp b/llvm/lib/Target/SBF/SBFRegisterInfo.cpp index b6500e1f8ac10..94fe6deb0a3eb 100644 --- a/llvm/lib/Target/SBF/SBFRegisterInfo.cpp +++ b/llvm/lib/Target/SBF/SBFRegisterInfo.cpp @@ -26,7 +26,7 @@ #include "SBFGenRegisterInfo.inc" using namespace llvm; -unsigned SBFRegisterInfo::FrameLength = 512; +unsigned SBFRegisterInfo::FrameLength = 4096; SBFRegisterInfo::SBFRegisterInfo() : SBFGenRegisterInfo(SBF::R0) {} @@ -176,11 +176,10 @@ int SBFRegisterInfo::resolveInternalFrameIndex( Offset += Imm.value_or(0); if (SubTarget.getHasDynamicFrames()) { - Offset += static_cast(StackSize); if (SubTarget.isDynamicFramesV1()) - return Offset; + return Offset + static_cast(StackSize); - return -Offset; + return -(Offset + std::max(static_cast(StackSize), static_cast(FrameLength))); } return Offset; diff --git a/llvm/test/CodeGen/SBF/dynamic_stack_frame_add_and_sub.ll b/llvm/test/CodeGen/SBF/dynamic_stack_frame_add_and_sub.ll index 109420429b06f..07e46e6271216 100644 --- a/llvm/test/CodeGen/SBF/dynamic_stack_frame_add_and_sub.ll +++ b/llvm/test/CodeGen/SBF/dynamic_stack_frame_add_and_sub.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=sbf -mattr=+dynamic-frames | FileCheck %s -; RUN: llc -O3 -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefix=CHECK-V3 %s +; RUN: llc -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefix=CHECK-V3 %s ; ; Source: ; int test_func(int * vec, int idx) { @@ -14,7 +14,7 @@ define i32 @test_func(ptr noundef %vec, i32 noundef %idx) #0 { ; CHECK-LABEL: test_func: ; CHECK: add64 r10, -128 -; CHECK-V3: add64 r10, 128 +; CHECK-V3-NOT: add64 r10, 128 entry: %vec.addr = alloca ptr, align 8 %idx.addr = alloca i512, align 4 @@ -29,4 +29,26 @@ entry: store i32 %sub, ptr %arrayidx, align 4 %3 = load i32, ptr %idx.addr, align 4 ret i32 %3 +} + +declare i64 @read_ptr(ptr %a); + +define i64 @test_func_4096(i64 %idx) { +; CHECK-LABEL: test_func_4096 +; CHECK-V3-NOT: add64 r10, 4096 +entry: + %large_var = alloca [4096 x i8], align 8 + %val = call i64 @read_ptr(ptr %large_var) + ret i64 %val +} + +define i64 @test_func_4128(i64 %idx) { +; CHECK-LABEL: test_func_4128 +; CHECK-V3: add64 r10, 64 +; The stack is aligned at 64, so we bump 64 to have a stack size of 4096+64=4160, +; so we can fit the 4128 bytes of the array. +entry: + %large_var = alloca [4128 x i8], align 8 + %val = call i64 @read_ptr(ptr %large_var) + ret i64 %val } \ No newline at end of file diff --git a/llvm/test/CodeGen/SBF/many_args_new_conv.ll b/llvm/test/CodeGen/SBF/many_args_new_conv.ll index 0512a3f225aad..421dc95deec5f 100644 --- a/llvm/test/CodeGen/SBF/many_args_new_conv.ll +++ b/llvm/test/CodeGen/SBF/many_args_new_conv.ll @@ -1,7 +1,7 @@ -; RUN: llc -O2 -march=sbf -mcpu=v1 < %s | FileCheck %s -; RUN: llc -O2 -mtriple=sbpfv1-solana-solana < %s | FileCheck %s -; RUN: llc -O2 -march=sbf -mcpu=v1 -mattr=+mem-encoding < %s | FileCheck %s -; RUN: llc -O3 -march=sbf -mattr=+dynamic-frames-v3 < %s | FileCheck --check-prefix=CHECK-V3 %s +; RUN: llc -march=sbf -mcpu=v1 < %s | FileCheck %s +; RUN: llc -mtriple=sbpfv1-solana-solana < %s | FileCheck %s +; RUN: llc -march=sbf -mcpu=v1 -mattr=+mem-encoding < %s | FileCheck %s +; RUN: llc -march=sbf -mattr=+dynamic-frames-v3 < %s | FileCheck --check-prefix=CHECK-V3 %s ; Function Attrs: nounwind uwtable define i32 @caller_no_alloca(i32 %a, i32 %b, i32 %c) #0 { @@ -35,8 +35,12 @@ entry: ; Function Attrs: nounwind uwtable define i32 @caller_alloca(i32 %a, i32 %b, i32 %c) #0 { ; CHECK-LABEL: caller_alloca -; CHECK: add64 r10, -64 -; CHECK: ldxw r1, [r10 + 60] +; CHECK: add64 r10, -4160 +; CHECK: ldxw r1, [r10 + 88] +; 88 is 8*7 + 32 + +; CHECK-V3: add64 r10, 64 +; CHECK-V3: ldxw r1, [r10 - 88] ; Saving arguments in the callee's frame @@ -65,10 +69,13 @@ define i32 @caller_alloca(i32 %a, i32 %b, i32 %c) #0 { ; CHECK: mov64 r4, 1 ; CHECK: mov64 r5, 2 ; CHECK: call callee_no_alloca +; CHECK: ldxw r1, [r10 + 16] +; CHECK-V3: ldxw r1, [r10 - 16] entry: - %g = alloca i32 - %g1 = load i32, ptr %g + %g = alloca [4128 x i8], align 8 + %off = getelementptr i64, ptr %g, i64 7 + %g1 = load i32, ptr %off %call = tail call i32 @callee_no_alloca(i32 %g1, i32 %b, i32 %c, i32 1, i32 2, i32 3, i32 4, i32 50, i32 55, i32 60) #3 %h = alloca i128 %h1 = load i32, ptr %h @@ -79,31 +86,31 @@ entry: ; Function Attrs: nounwind uwtable define i32 @callee_alloca(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %p, i32 %y, i32 %a1, i32 %a2) #1 { ; CHECK-LABEL: callee_alloca -; CHECK: add64 r10, -128 -; CHECK-V3: add64 r10, 128 +; CHECK: add64 r10, -5056 +; CHECK-V3: add64 r10, 960 ; Loading arguments -; CHECK: ldxw r2, [r10 + 120] -; CHECK: ldxw r2, [r10 + 112] -; CHECK: ldxw r2, [r10 + 104] -; CHECK: ldxw r2, [r10 + 96] -; CHECK: ldxw r2, [r10 + 88] +; CHECK: ldxw r2, [r10 + 5048] +; CHECK: ldxw r2, [r10 + 5040] +; CHECK: ldxw r2, [r10 + 5032] +; CHECK: ldxw r2, [r10 + 5024] +; CHECK: ldxw r2, [r10 + 5016] ; Loading allocated i32 -; CHECK: ldxw r0, [r10 + 24] +; CHECK: ldxw r0, [r10 + 16] -; CHECK-V3: ldxw r2, [r10 - 120] -; CHECK-V3: ldxw r2, [r10 - 112] -; CHECK-V3: ldxw r2, [r10 - 104] -; CHECK-V3: ldxw r2, [r10 - 96] -; CHECK-V3: ldxw r2, [r10 - 88] +; CHECK-V3: ldxw r2, [r10 - 5048] +; CHECK-V3: ldxw r2, [r10 - 5040] +; CHECK-V3: ldxw r2, [r10 - 5032] +; CHECK-V3: ldxw r2, [r10 - 5024] +; CHECK-V3: ldxw r2, [r10 - 5016] ; Loading allocated i32 -; CHECK-V3: ldxw r0, [r10 - 24] +; CHECK-V3: ldxw r0, [r10 - 16] ; CHECK-NOT: add64 r10, 128 entry: - %o = alloca i512 + %o = alloca [5000 x i8], align 8 %g = add i32 %a, %b %h = sub i32 %g, %c %i = add i32 %h, %d @@ -122,7 +129,7 @@ entry: define i32 @callee_no_alloca(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %p, i32 %y, i32 %a1, i32 %a2) #1 { ; CHECK-LABEL: callee_no_alloca ; CHECK: add64 r10, -64 -; CHECK-V3: add64 r10, 64 +; CHECK-V3-NOT: add64 r10, 64 ; Loading arguments ; CHECK: ldxw r1, [r10 + 56] @@ -132,11 +139,11 @@ define i32 @callee_no_alloca(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 ; CHECK: ldxw r1, [r10 + 24] ; Loading arguments -; CHECK-V3: ldxw r1, [r10 - 56] -; CHECK-V3: ldxw r1, [r10 - 48] -; CHECK-V3: ldxw r1, [r10 - 40] -; CHECK-V3: ldxw r1, [r10 - 32] -; CHECK-V3: ldxw r1, [r10 - 24] +; CHECK-V3: ldxw r1, [r10 - 4088] +; CHECK-V3: ldxw r1, [r10 - 4080] +; CHECK-V3: ldxw r1, [r10 - 4072] +; CHECK-V3: ldxw r1, [r10 - 4064] +; CHECK-V3: ldxw r1, [r10 - 4056] ; CHECK-NOT: add64 r10, 64 entry: diff --git a/llvm/test/CodeGen/SBF/many_args_value_size.ll b/llvm/test/CodeGen/SBF/many_args_value_size.ll index 927405c5d943d..c4de3b7f61c7f 100644 --- a/llvm/test/CodeGen/SBF/many_args_value_size.ll +++ b/llvm/test/CodeGen/SBF/many_args_value_size.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=sbf -mcpu=v2 < %s | FileCheck %s ; RUN: llc -mtriple=sbpfv2-solana-solana < %s | FileCheck %s -; RUN: llc -O3 -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefix=CHECK-V3 %s +; RUN: llc -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefix=CHECK-V3 %s define i64 @test_func(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { start: @@ -24,7 +24,7 @@ define i64 @func(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i8 %b8, i16 %b16, i32 % start: ; CHECK-LABEL: func: ; CHECK: add64 r10, -64 -; CHECK-V3: add64 r10, 64 +; CHECK-V3-NOT: add64 r10, 64 %a1 = add i64 %a, %b %a2 = sub i64 %a1, %c %a3 = mul i64 %a2, %d @@ -32,23 +32,27 @@ start: ; -64 + 32 = -32, so this is 5400 in %a5 ; CHECK: ldxdw r4, [r10 + 32] -; CHECK-V3: ldxdw r4, [r10 - 32] +; 4096 - 32 = 4064 +; CHECK-V3: ldxdw r4, [r10 - 4064] ; -64 + 60 = -4, so this is 5 in %b8 ; CHECK: ldxb w4, [r10 + 60] -; CHECK-V3: ldxb w4, [r10 - 60] +; 4096 - 4 = 4092 +; CHECK-V3: ldxb w4, [r10 - 4092] %c0 = trunc i64 %a to i8 %b1 = add i8 %b8, %c0 ; -64 + 52 = -12, so this is -20 in %b16 ; CHECK: ldxh w1, [r10 + 52] -; CHECK-V3: ldxh w1, [r10 - 52] +; 4096 - 12 = 4084 +; CHECK-V3: ldxh w1, [r10 - 4084] %c1 = trunc i64 %b to i16 %b2 = add i16 %b16, %c1 ; -64 + 44 = -20, so this is 300 in %b32 ; CHECK: ldxw w1, [r10 + 44] -; CHECK-V3: ldxw w1, [r10 - 44] +; 4096 - 20 = 4076 +; CHECK-V3: ldxw w1, [r10 - 4076] %c2 = trunc i64 %c to i32 %b3 = add i32 %b32, %c2