Skip to content

Commit aa4803f

Browse files
authored
[SOL] Use a relative bump for stack in sBPFv3 (#181)
1 parent 427d50d commit aa4803f

File tree

6 files changed

+82
-42
lines changed

6 files changed

+82
-42
lines changed

llvm/lib/Target/SBF/SBFFrameLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ void SBFFrameLowering::emitPrologue(MachineFunction &MF,
3737

3838
if (Subtarget.isDynamicFramesV1())
3939
NumBytes = -NumBytes;
40+
else if (NumBytes <= FrameSize)
41+
// In V3, we don't bump if the number of bytes is less than the default
42+
// frame size.
43+
return;
44+
else
45+
NumBytes -= FrameSize;
4046

4147
BuildMI(MBB, MBBI, Dl, TII.get(SBF::ADD_ri), SBF::R10)
4248
.addReg(SBF::R10)

llvm/lib/Target/SBF/SBFFrameLowering.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ class SBFFrameLowering : public TargetFrameLowering {
3939
MachineBasicBlock::iterator MI) const override {
4040
return MBB.erase(MI);
4141
}
42+
private:
43+
const int FrameSize = 4096;
4244
};
43-
}
45+
} // namespace llvm
4446
#endif

llvm/lib/Target/SBF/SBFRegisterInfo.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include "SBFGenRegisterInfo.inc"
2727
using namespace llvm;
2828

29-
unsigned SBFRegisterInfo::FrameLength = 512;
29+
unsigned SBFRegisterInfo::FrameLength = 4096;
3030

3131
SBFRegisterInfo::SBFRegisterInfo()
3232
: SBFGenRegisterInfo(SBF::R0) {}
@@ -176,11 +176,10 @@ int SBFRegisterInfo::resolveInternalFrameIndex(
176176
Offset += Imm.value_or(0);
177177

178178
if (SubTarget.getHasDynamicFrames()) {
179-
Offset += static_cast<int>(StackSize);
180179
if (SubTarget.isDynamicFramesV1())
181-
return Offset;
180+
return Offset + static_cast<int>(StackSize);
182181

183-
return -Offset;
182+
return -(Offset + std::max(static_cast<int>(StackSize), static_cast<int>(FrameLength)));
184183
}
185184

186185
return Offset;

llvm/test/CodeGen/SBF/dynamic_stack_frame_add_and_sub.ll

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: llc < %s -march=sbf -mattr=+dynamic-frames | FileCheck %s
2-
; RUN: llc -O3 -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefix=CHECK-V3 %s
2+
; RUN: llc -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefix=CHECK-V3 %s
33
;
44
; Source:
55
; int test_func(int * vec, int idx) {
@@ -14,7 +14,7 @@
1414
define i32 @test_func(ptr noundef %vec, i32 noundef %idx) #0 {
1515
; CHECK-LABEL: test_func:
1616
; CHECK: add64 r10, -128
17-
; CHECK-V3: add64 r10, 128
17+
; CHECK-V3-NOT: add64 r10, 128
1818
entry:
1919
%vec.addr = alloca ptr, align 8
2020
%idx.addr = alloca i512, align 4
@@ -29,4 +29,26 @@ entry:
2929
store i32 %sub, ptr %arrayidx, align 4
3030
%3 = load i32, ptr %idx.addr, align 4
3131
ret i32 %3
32+
}
33+
34+
declare i64 @read_ptr(ptr %a);
35+
36+
define i64 @test_func_4096(i64 %idx) {
37+
; CHECK-LABEL: test_func_4096
38+
; CHECK-V3-NOT: add64 r10, 4096
39+
entry:
40+
%large_var = alloca [4096 x i8], align 8
41+
%val = call i64 @read_ptr(ptr %large_var)
42+
ret i64 %val
43+
}
44+
45+
define i64 @test_func_4128(i64 %idx) {
46+
; CHECK-LABEL: test_func_4128
47+
; CHECK-V3: add64 r10, 64
48+
; The stack is aligned at 64, so we bump 64 to have a stack size of 4096+64=4160,
49+
; so we can fit the 4128 bytes of the array.
50+
entry:
51+
%large_var = alloca [4128 x i8], align 8
52+
%val = call i64 @read_ptr(ptr %large_var)
53+
ret i64 %val
3254
}

llvm/test/CodeGen/SBF/many_args_new_conv.ll

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
; RUN: llc -O2 -march=sbf -mcpu=v1 < %s | FileCheck %s
2-
; RUN: llc -O2 -mtriple=sbpfv1-solana-solana < %s | FileCheck %s
3-
; RUN: llc -O2 -march=sbf -mcpu=v1 -mattr=+mem-encoding < %s | FileCheck %s
4-
; RUN: llc -O3 -march=sbf -mattr=+dynamic-frames-v3 < %s | FileCheck --check-prefix=CHECK-V3 %s
1+
; RUN: llc -march=sbf -mcpu=v1 < %s | FileCheck %s
2+
; RUN: llc -mtriple=sbpfv1-solana-solana < %s | FileCheck %s
3+
; RUN: llc -march=sbf -mcpu=v1 -mattr=+mem-encoding < %s | FileCheck %s
4+
; RUN: llc -march=sbf -mattr=+dynamic-frames-v3 < %s | FileCheck --check-prefix=CHECK-V3 %s
55

66
; Function Attrs: nounwind uwtable
77
define i32 @caller_no_alloca(i32 %a, i32 %b, i32 %c) #0 {
@@ -35,8 +35,12 @@ entry:
3535
; Function Attrs: nounwind uwtable
3636
define i32 @caller_alloca(i32 %a, i32 %b, i32 %c) #0 {
3737
; CHECK-LABEL: caller_alloca
38-
; CHECK: add64 r10, -64
39-
; CHECK: ldxw r1, [r10 + 60]
38+
; CHECK: add64 r10, -4160
39+
; CHECK: ldxw r1, [r10 + 88]
40+
; 88 is 8*7 + 32
41+
42+
; CHECK-V3: add64 r10, 64
43+
; CHECK-V3: ldxw r1, [r10 - 88]
4044

4145
; Saving arguments in the callee's frame
4246

@@ -65,10 +69,13 @@ define i32 @caller_alloca(i32 %a, i32 %b, i32 %c) #0 {
6569
; CHECK: mov64 r4, 1
6670
; CHECK: mov64 r5, 2
6771
; CHECK: call callee_no_alloca
72+
; CHECK: ldxw r1, [r10 + 16]
73+
; CHECK-V3: ldxw r1, [r10 - 16]
6874

6975
entry:
70-
%g = alloca i32
71-
%g1 = load i32, ptr %g
76+
%g = alloca [4128 x i8], align 8
77+
%off = getelementptr i64, ptr %g, i64 7
78+
%g1 = load i32, ptr %off
7279
%call = tail call i32 @callee_no_alloca(i32 %g1, i32 %b, i32 %c, i32 1, i32 2, i32 3, i32 4, i32 50, i32 55, i32 60) #3
7380
%h = alloca i128
7481
%h1 = load i32, ptr %h
@@ -79,31 +86,31 @@ entry:
7986
; Function Attrs: nounwind uwtable
8087
define i32 @callee_alloca(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %p, i32 %y, i32 %a1, i32 %a2) #1 {
8188
; CHECK-LABEL: callee_alloca
82-
; CHECK: add64 r10, -128
83-
; CHECK-V3: add64 r10, 128
89+
; CHECK: add64 r10, -5056
90+
; CHECK-V3: add64 r10, 960
8491

8592
; Loading arguments
86-
; CHECK: ldxw r2, [r10 + 120]
87-
; CHECK: ldxw r2, [r10 + 112]
88-
; CHECK: ldxw r2, [r10 + 104]
89-
; CHECK: ldxw r2, [r10 + 96]
90-
; CHECK: ldxw r2, [r10 + 88]
93+
; CHECK: ldxw r2, [r10 + 5048]
94+
; CHECK: ldxw r2, [r10 + 5040]
95+
; CHECK: ldxw r2, [r10 + 5032]
96+
; CHECK: ldxw r2, [r10 + 5024]
97+
; CHECK: ldxw r2, [r10 + 5016]
9198
; Loading allocated i32
92-
; CHECK: ldxw r0, [r10 + 24]
99+
; CHECK: ldxw r0, [r10 + 16]
93100

94-
; CHECK-V3: ldxw r2, [r10 - 120]
95-
; CHECK-V3: ldxw r2, [r10 - 112]
96-
; CHECK-V3: ldxw r2, [r10 - 104]
97-
; CHECK-V3: ldxw r2, [r10 - 96]
98-
; CHECK-V3: ldxw r2, [r10 - 88]
101+
; CHECK-V3: ldxw r2, [r10 - 5048]
102+
; CHECK-V3: ldxw r2, [r10 - 5040]
103+
; CHECK-V3: ldxw r2, [r10 - 5032]
104+
; CHECK-V3: ldxw r2, [r10 - 5024]
105+
; CHECK-V3: ldxw r2, [r10 - 5016]
99106
; Loading allocated i32
100-
; CHECK-V3: ldxw r0, [r10 - 24]
107+
; CHECK-V3: ldxw r0, [r10 - 16]
101108

102109

103110
; CHECK-NOT: add64 r10, 128
104111

105112
entry:
106-
%o = alloca i512
113+
%o = alloca [5000 x i8], align 8
107114
%g = add i32 %a, %b
108115
%h = sub i32 %g, %c
109116
%i = add i32 %h, %d
@@ -122,7 +129,7 @@ entry:
122129
define i32 @callee_no_alloca(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %p, i32 %y, i32 %a1, i32 %a2) #1 {
123130
; CHECK-LABEL: callee_no_alloca
124131
; CHECK: add64 r10, -64
125-
; CHECK-V3: add64 r10, 64
132+
; CHECK-V3-NOT: add64 r10, 64
126133

127134
; Loading arguments
128135
; CHECK: ldxw r1, [r10 + 56]
@@ -132,11 +139,11 @@ define i32 @callee_no_alloca(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32
132139
; CHECK: ldxw r1, [r10 + 24]
133140

134141
; Loading arguments
135-
; CHECK-V3: ldxw r1, [r10 - 56]
136-
; CHECK-V3: ldxw r1, [r10 - 48]
137-
; CHECK-V3: ldxw r1, [r10 - 40]
138-
; CHECK-V3: ldxw r1, [r10 - 32]
139-
; CHECK-V3: ldxw r1, [r10 - 24]
142+
; CHECK-V3: ldxw r1, [r10 - 4088]
143+
; CHECK-V3: ldxw r1, [r10 - 4080]
144+
; CHECK-V3: ldxw r1, [r10 - 4072]
145+
; CHECK-V3: ldxw r1, [r10 - 4064]
146+
; CHECK-V3: ldxw r1, [r10 - 4056]
140147

141148
; CHECK-NOT: add64 r10, 64
142149
entry:

llvm/test/CodeGen/SBF/many_args_value_size.ll

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; RUN: llc -march=sbf -mcpu=v2 < %s | FileCheck %s
22
; RUN: llc -mtriple=sbpfv2-solana-solana < %s | FileCheck %s
3-
; RUN: llc -O3 -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefix=CHECK-V3 %s
3+
; RUN: llc -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefix=CHECK-V3 %s
44

55
define i64 @test_func(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
66
start:
@@ -24,31 +24,35 @@ define i64 @func(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i8 %b8, i16 %b16, i32 %
2424
start:
2525
; CHECK-LABEL: func:
2626
; CHECK: add64 r10, -64
27-
; CHECK-V3: add64 r10, 64
27+
; CHECK-V3-NOT: add64 r10, 64
2828
%a1 = add i64 %a, %b
2929
%a2 = sub i64 %a1, %c
3030
%a3 = mul i64 %a2, %d
3131
%a4 = add i64 %a3, %e
3232

3333
; -64 + 32 = -32, so this is 5400 in %a5
3434
; CHECK: ldxdw r4, [r10 + 32]
35-
; CHECK-V3: ldxdw r4, [r10 - 32]
35+
; 4096 - 32 = 4064
36+
; CHECK-V3: ldxdw r4, [r10 - 4064]
3637

3738
; -64 + 60 = -4, so this is 5 in %b8
3839
; CHECK: ldxb w4, [r10 + 60]
39-
; CHECK-V3: ldxb w4, [r10 - 60]
40+
; 4096 - 4 = 4092
41+
; CHECK-V3: ldxb w4, [r10 - 4092]
4042
%c0 = trunc i64 %a to i8
4143
%b1 = add i8 %b8, %c0
4244

4345
; -64 + 52 = -12, so this is -20 in %b16
4446
; CHECK: ldxh w1, [r10 + 52]
45-
; CHECK-V3: ldxh w1, [r10 - 52]
47+
; 4096 - 12 = 4084
48+
; CHECK-V3: ldxh w1, [r10 - 4084]
4649
%c1 = trunc i64 %b to i16
4750
%b2 = add i16 %b16, %c1
4851

4952
; -64 + 44 = -20, so this is 300 in %b32
5053
; CHECK: ldxw w1, [r10 + 44]
51-
; CHECK-V3: ldxw w1, [r10 - 44]
54+
; 4096 - 20 = 4076
55+
; CHECK-V3: ldxw w1, [r10 - 4076]
5256
%c2 = trunc i64 %c to i32
5357
%b3 = add i32 %b32, %c2
5458

0 commit comments

Comments
 (0)