Skip to content

Commit eea5c02

Browse files
authored
[SOL] Add option to optimize stack usage (#182)
1 parent aa4803f commit eea5c02

File tree

7 files changed

+151
-10
lines changed

7 files changed

+151
-10
lines changed

llvm/lib/Target/SBF/SBFFrameLowering.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,24 @@ void SBFFrameLowering::emitPrologue(MachineFunction &MF,
3030
MachineFrameInfo &MFI = MF.getFrameInfo();
3131
int NumBytes = (int)MFI.getStackSize();
3232

33-
if (NumBytes && MBBI != MBB.end()) {
33+
if (MBBI != MBB.end()) {
3434
DebugLoc Dl = MBBI->getDebugLoc();
3535
const SBFInstrInfo &TII =
3636
*static_cast<const SBFInstrInfo *>(MF.getSubtarget().getInstrInfo());
3737

3838
if (Subtarget.isDynamicFramesV1())
3939
NumBytes = -NumBytes;
40-
else if (NumBytes <= FrameSize)
40+
else if (NumBytes <= FrameSize && !Subtarget.getOptimizeStackSpace())
4141
// In V3, we don't bump if the number of bytes is less than the default
4242
// frame size.
4343
return;
4444
else
4545
NumBytes -= FrameSize;
4646

47-
BuildMI(MBB, MBBI, Dl, TII.get(SBF::ADD_ri), SBF::R10)
48-
.addReg(SBF::R10)
49-
.addImm(NumBytes);
47+
if (NumBytes)
48+
BuildMI(MBB, MBBI, Dl, TII.get(SBF::ADD_ri), SBF::R10)
49+
.addReg(SBF::R10)
50+
.addImm(NumBytes);
5051
}
5152
}
5253

llvm/lib/Target/SBF/SBFRegisterInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ int SBFRegisterInfo::resolveInternalFrameIndex(
179179
if (SubTarget.isDynamicFramesV1())
180180
return Offset + static_cast<int>(StackSize);
181181

182+
if (SubTarget.getOptimizeStackSpace())
183+
return -(Offset + static_cast<int>(StackSize));
184+
182185
return -(Offset + std::max(static_cast<int>(StackSize), static_cast<int>(FrameLength)));
183186
}
184187

llvm/lib/Target/SBF/SBFSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ class SBFSubtarget : public SBFGenSubtargetInfo {
9595
// Whether we are dealing with dynamic stack frames in SBPFv3
9696
bool HasDynamicFramesV3;
9797

98+
// Whether we should bump down the frame pointer in SBPFv3
99+
bool OptimizeStackSpace;
100+
98101
std::unique_ptr<CallLowering> CallLoweringInfo;
99102
std::unique_ptr<InstructionSelector> InstSelector;
100103
std::unique_ptr<LegalizerInfo> Legalizer;
@@ -130,6 +133,7 @@ class SBFSubtarget : public SBFGenSubtargetInfo {
130133
bool getNewMemEncoding() const { return NewMemEncoding; }
131134
bool getHasStaticSyscalls() const { return HasStaticSyscalls; }
132135
bool getHasJmp32() const { return HasJmp32; }
136+
bool getOptimizeStackSpace() const { return OptimizeStackSpace; }
133137
const SBFInstrInfo *getInstrInfo() const override { return &InstrInfo; }
134138
const SBFFrameLowering *getFrameLowering() const override {
135139
return &FrameLowering;

llvm/lib/Target/SBF/SBFTargetFeatures.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ def FeatureJmp32 : SubtargetFeature<"jmp-32", "HasJmp32", "true",
5858
def FeatureDynamicFramesV3 : SubtargetFeature<"dynamic-frames-v3", "HasDynamicFramesV3", "true",
5959
"Enable dynamic frames in SBPFv3">;
6060

61+
def OptimizeStackSpace : SubtargetFeature<"optimize-stack-space", "OptimizeStackSpace", "true",
62+
"Optimize stack space usage in SBPFv3">;
63+
6164
class Proc<string Name, list<SubtargetFeature> Features>
6265
: Processor<Name, NoItineraries, Features>;
6366

llvm/test/CodeGen/SBF/dynamic_stack_frame_add_and_sub.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: llc < %s -march=sbf -mattr=+dynamic-frames | FileCheck %s
22
; RUN: llc -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefix=CHECK-V3 %s
3+
; RUN: llc -march=sbf -mattr=+dynamic-frames-v3,+optimize-stack-space < %s | FileCheck --check-prefix=CHECK-OPT %s
34
;
45
; Source:
56
; int test_func(int * vec, int idx) {
@@ -15,6 +16,7 @@ define i32 @test_func(ptr noundef %vec, i32 noundef %idx) #0 {
1516
; CHECK-LABEL: test_func:
1617
; CHECK: add64 r10, -128
1718
; CHECK-V3-NOT: add64 r10, 128
19+
; CHECK-OPT: add64 r10, -3968
1820
entry:
1921
%vec.addr = alloca ptr, align 8
2022
%idx.addr = alloca i512, align 4
@@ -36,6 +38,7 @@ declare i64 @read_ptr(ptr %a);
3638
define i64 @test_func_4096(i64 %idx) {
3739
; CHECK-LABEL: test_func_4096
3840
; CHECK-V3-NOT: add64 r10, 4096
41+
; CHECK-OPT-NOT: add64 r10, 4096
3942
entry:
4043
%large_var = alloca [4096 x i8], align 8
4144
%val = call i64 @read_ptr(ptr %large_var)
@@ -45,6 +48,7 @@ entry:
4548
define i64 @test_func_4128(i64 %idx) {
4649
; CHECK-LABEL: test_func_4128
4750
; CHECK-V3: add64 r10, 64
51+
; CHECK-OPT: add64 r10, 64
4852
; The stack is aligned at 64, so we bump 64 to have a stack size of 4096+64=4160,
4953
; so we can fit the 4128 bytes of the array.
5054
entry:
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
; RUN: llc -march=sbf -mattr=+dynamic-frames-v3,+optimize-stack-space < %s | FileCheck --check-prefix=CHECK %s
2+
3+
; Function Attrs: nounwind uwtable
4+
define i32 @caller_no_alloca(i32 %a, i32 %b, i32 %c) #0 {
5+
entry:
6+
; CHECK-LABEL: caller_no_alloca
7+
8+
; No changes to the stack pointer
9+
; CHECK: add64 r10, -4096
10+
11+
; Saving arguments on the stack
12+
; CHECK: stdw [r10 + 40], 60
13+
; CHECK: stdw [r10 + 32], 55
14+
; CHECK: stdw [r10 + 24], 50
15+
; CHECK: stdw [r10 + 16], 4
16+
; CHECK: stdw [r10 + 8], 3
17+
18+
; CHECK: mov64 r4, 1
19+
; CHECK: mov64 r5, 2
20+
; CHECK: call callee_alloca
21+
22+
%call = tail call i32 @callee_alloca(i32 %a, i32 %b, i32 %c, i32 1, i32 2, i32 3, i32 4, i32 50, i32 55, i32 60) #3
23+
ret i32 %call
24+
}
25+
26+
; Function Attrs: nounwind uwtable
27+
define i32 @caller_alloca(i32 %a, i32 %b, i32 %c) #0 {
28+
; CHECK-LABEL: caller_alloca
29+
; CHECK: add64 r10, -896
30+
; CHECK: ldxw r1, [r10 - 128]
31+
; 88 is 8*7 + 32
32+
33+
34+
; Saving arguments in the callee's frame
35+
36+
; Offset in the callee: frame_size - 40
37+
; CHECK-V3: stdw [r10 + 40], 60
38+
; Offset in the callee: frame_size - 32
39+
; CHECK-V3: stdw [r10 + 32], 55
40+
; Offset in the callee: frame_size - 24
41+
; CHECK-V3: stdw [r10 + 24], 50
42+
; Offset in the callee: frame_size - 16
43+
; CHECK-V3: stdw [r10 + 16], 4
44+
; Offset in the callee: frame_size - 8
45+
; CHECK-V3: stdw [r10 + 8], 3
46+
47+
; CHECK: mov64 r4, 1
48+
; CHECK: mov64 r5, 2
49+
; CHECK: call callee_no_alloca
50+
; CHECK: ldxw r1, [r10 - 56]
51+
52+
entry:
53+
%g = alloca [3128 x i8], align 8
54+
%off = getelementptr i64, ptr %g, i64 7
55+
%g1 = load i32, ptr %off
56+
%call = tail call i32 @callee_no_alloca(i32 %g1, i32 %b, i32 %c, i32 1, i32 2, i32 3, i32 4, i32 50, i32 55, i32 60) #3
57+
%h = alloca i128
58+
%h1 = load i32, ptr %h
59+
%res = sub i32 %call, %h1
60+
ret i32 %res
61+
}
62+
63+
; Function Attrs: nounwind uwtable
64+
define i32 @callee_alloca(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %p, i32 %y, i32 %a1, i32 %a2) #1 {
65+
; CHECK-LABEL: callee_alloca
66+
; CHECK: add64 r10, -1024
67+
68+
; Loading arguments
69+
; CHECK: ldxw r2, [r10 - 3064]
70+
; CHECK: ldxw r2, [r10 - 3056]
71+
; CHECK: ldxw r2, [r10 - 3048]
72+
; CHECK: ldxw r2, [r10 - 3040]
73+
; CHECK: ldxw r2, [r10 - 3032]
74+
; Loading allocated i32
75+
; CHECK-V3: ldxw r0, [r10 - 32]
76+
77+
78+
; CHECK-NOT: add64 r10, 128
79+
80+
entry:
81+
%o = alloca [3000 x i8], align 8
82+
%g = add i32 %a, %b
83+
%h = sub i32 %g, %c
84+
%i = add i32 %h, %d
85+
%j = sub i32 %i, %e
86+
%k = add i32 %j, %f
87+
%l = add i32 %k, %p
88+
%m = add i32 %l, %y
89+
%n = add i32 %m, %a1
90+
%q = add i32 %n, %a2
91+
%r = load i32, ptr %o
92+
%s = add i32 %r, %q
93+
ret i32 %s
94+
}
95+
96+
; Function Attrs: nounwind uwtable
97+
define i32 @callee_no_alloca(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %p, i32 %y, i32 %a1, i32 %a2) #1 {
98+
; CHECK-LABEL: callee_no_alloca
99+
; CHECK: add64 r10, -4032
100+
101+
; Loading arguments
102+
; CHECK: ldxw r1, [r10 - 56]
103+
; CHECK: ldxw r1, [r10 - 48]
104+
; CHECK: ldxw r1, [r10 - 40]
105+
; CHECK: ldxw r1, [r10 - 32]
106+
; CHECK: ldxw r1, [r10 - 24]
107+
108+
; CHECK-NOT: add64 r10, 64
109+
entry:
110+
%g = add i32 %a, %b
111+
%h = sub i32 %g, %c
112+
%i = add i32 %h, %d
113+
%j = sub i32 %i, %e
114+
%k = add i32 %j, %f
115+
%l = add i32 %k, %p
116+
%m = add i32 %l, %y
117+
%n = add i32 %m, %a1
118+
%q = add i32 %n, %a2
119+
ret i32 %q
120+
}

llvm/test/CodeGen/SBF/many_args_value_size.ll

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; RUN: llc -march=sbf -mcpu=v2 < %s | FileCheck %s
22
; RUN: llc -mtriple=sbpfv2-solana-solana < %s | FileCheck %s
3-
; RUN: llc -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefix=CHECK-V3 %s
3+
; RUN: llc -march=sbf -mattr=+dynamic-frames-v3,+alu32 < %s | FileCheck --check-prefixes=CHECK-V3,CHECK-COMMON %s
4+
; RUN: llc -march=sbf -mattr=+dynamic-frames-v3,+optimize-stack-space,+alu32 < %s | FileCheck --check-prefixes=CHECK-OPT,CHECK-COMMON %s
45

56
define i64 @test_func(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
67
start:
@@ -11,10 +12,10 @@ start:
1112
; CHECK: stw [r10 - 12], 65516
1213
; CHECK: stw [r10 - 4], 5
1314

14-
; CHECK-V3: stdw [r10 + 32], 5400
15-
; CHECK-V3: stw [r10 + 20], 300
16-
; CHECK-V3: stw [r10 + 12], 65516
17-
; CHECK-V3: stw [r10 + 4], 5
15+
; CHECK-COMMON: stdw [r10 + 32], 5400
16+
; CHECK-COMMON: stw [r10 + 20], 300
17+
; CHECK-COMMON: stw [r10 + 12], 65516
18+
; CHECK-COMMON: stw [r10 + 4], 5
1819

1920
%res = call i64 @func(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i8 5, i16 -20, i32 300, i64 5400)
2021
ret i64 %res
@@ -25,6 +26,7 @@ start:
2526
; CHECK-LABEL: func:
2627
; CHECK: add64 r10, -64
2728
; CHECK-V3-NOT: add64 r10, 64
29+
; CHECK-OPT: add64 r10, -4032
2830
%a1 = add i64 %a, %b
2931
%a2 = sub i64 %a1, %c
3032
%a3 = mul i64 %a2, %d
@@ -34,25 +36,29 @@ start:
3436
; CHECK: ldxdw r4, [r10 + 32]
3537
; 4096 - 32 = 4064
3638
; CHECK-V3: ldxdw r4, [r10 - 4064]
39+
; CHECK-OPT: ldxdw r4, [r10 - 32]
3740

3841
; -64 + 60 = -4, so this is 5 in %b8
3942
; CHECK: ldxb w4, [r10 + 60]
4043
; 4096 - 4 = 4092
4144
; CHECK-V3: ldxb w4, [r10 - 4092]
45+
; CHECK-OPT: ldxb w4, [r10 - 60]
4246
%c0 = trunc i64 %a to i8
4347
%b1 = add i8 %b8, %c0
4448

4549
; -64 + 52 = -12, so this is -20 in %b16
4650
; CHECK: ldxh w1, [r10 + 52]
4751
; 4096 - 12 = 4084
4852
; CHECK-V3: ldxh w1, [r10 - 4084]
53+
; CHECK-OPT: ldxh w1, [r10 - 52]
4954
%c1 = trunc i64 %b to i16
5055
%b2 = add i16 %b16, %c1
5156

5257
; -64 + 44 = -20, so this is 300 in %b32
5358
; CHECK: ldxw w1, [r10 + 44]
5459
; 4096 - 20 = 4076
5560
; CHECK-V3: ldxw w1, [r10 - 4076]
61+
; CHECK-OPT: ldxw w1, [r10 - 44]
5662
%c2 = trunc i64 %c to i32
5763
%b3 = add i32 %b32, %c2
5864

0 commit comments

Comments
 (0)