-
Notifications
You must be signed in to change notification settings - Fork 15.4k
AMDGPU: Avoid crashing on statepoint-like pseudoinstructions #170657
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
AMDGPU: Avoid crashing on statepoint-like pseudoinstructions #170657
Conversation
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesAt the moment the MIR tests are somewhat redundant. The waitcnt Full diff: https://github.com/llvm/llvm-project/pull/170657.diff 13 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 18142c2c0adf3..bdd9fee795e08 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2350,7 +2350,18 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
/// Returns the callee operand from the given \p MI.
virtual const MachineOperand &getCalleeOperand(const MachineInstr &MI) const {
- return MI.getOperand(0);
+ assert(MI.isCall());
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::STATEPOINT:
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::PATCHPOINT:
+ return MI.getOperand(3);
+ default:
+ return MI.getOperand(0);
+ }
+
+ llvm_unreachable("impossible call instruction");
}
/// Return the uniformity behavior of the given instruction.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index dd8f18d3b8a6a..7998da0ea06eb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -331,6 +331,17 @@ namespace llvm {
MachineBasicBlock *
TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::STATEPOINT:
+ // As an implementation detail, STATEPOINT shares the STACKMAP format at
+ // this point in the process. We diverge later.
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::PATCHPOINT:
+ return emitPatchPoint(MI, MBB);
+ default:
+ break;
+ }
+
#ifndef NDEBUG
dbgs() << "If a target marks an instruction with "
"'usesCustomInserter', it must implement "
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 46a5e44374e1c..5b8cd343557fa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1145,6 +1145,8 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB,
bool VarArgDisallowed, bool ForceVoidReturnTy) {
StatepointLoweringInfo SI(DAG);
+ SI.CLI.CB = Call;
+
unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
populateCallLoweringInfo(
SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index bf9b4297bd435..99c1ab8d379d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -406,6 +406,16 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
+ unsigned Opc = MI->getOpcode();
+ if (LLVM_UNLIKELY(Opc == TargetOpcode::STATEPOINT ||
+ Opc == TargetOpcode::STACKMAP ||
+ Opc == TargetOpcode::PATCHPOINT)) {
+ LLVMContext &Ctx = MI->getMF()->getFunction().getContext();
+ Ctx.emitError("unhandled statepoint-like instruction");
+ OutStreamer->emitRawComment("unsupported statepoint/stackmap/patchpoint");
+ return;
+ }
+
if (isVerbose())
if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index b03d50f2d451d..4e664e084fb88 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -256,10 +256,13 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
// Pseudo used just to encode the underlying global. Is there a better
// way to track this?
+ // TODO: Some of the generic call-like pseudos do not encode the callee,
+ // so we overly conservatively treat this as an indirect call.
const MachineOperand *CalleeOp =
TII->getNamedOperand(MI, AMDGPU::OpName::callee);
- const Function *Callee = getCalleeFunction(*CalleeOp);
+ const Function *Callee =
+ CalleeOp ? getCalleeFunction(*CalleeOp) : nullptr;
auto isSameFunction = [](const MachineFunction &MF, const Function *F) {
return F == &MF.getFunction();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 301f2fc8dab45..0660e3a64c70e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -275,6 +275,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v16i64, MVT::v16i32, Expand);
setOperationAction(ISD::GlobalAddress, {MVT::i32, MVT::i64}, Custom);
+ setOperationAction(ISD::ExternalSymbol, {MVT::i32, MVT::i64}, Custom);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
@@ -6838,6 +6839,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
return LowerGlobalAddress(MFI, Op, DAG);
}
+ case ISD::ExternalSymbol:
+ return LowerExternalSymbol(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
@@ -9017,6 +9020,15 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
MachineMemOperand::MOInvariant);
}
+SDValue SITargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
+ // TODO: Handle this. It should be mostly the same as LowerGlobalAddress.
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+ DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
+ Fn, "unsupported external symbol", Op.getDebugLoc()));
+ return DAG.getPOISON(Op.getValueType());
+}
+
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain,
const SDLoc &DL, SDValue V) const {
// We can't use S_MOV_B32 directly, because there is no way to specify m0 as
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index fb162948caf4c..e82f4528fcd09 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -79,6 +79,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
MVT VT, unsigned Offset) const;
SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 64a29e1841245..9e744c9187c48 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1955,7 +1955,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// load). We also need to check WAW dependency with saved PC.
Wait = AMDGPU::Waitcnt();
- const auto &CallAddrOp = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand &CallAddrOp = TII->getCalleeOperand(MI);
if (CallAddrOp.isReg()) {
RegInterval CallAddrOpInterval =
ScoreBrackets.getRegInterval(&MI, CallAddrOp);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 7535407741f1f..ef011236642f8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10510,6 +10510,14 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return SchedModel.computeInstrLatency(&MI);
}
+const MachineOperand &
+SIInstrInfo::getCalleeOperand(const MachineInstr &MI) const {
+ if (const MachineOperand *CallAddrOp =
+ getNamedOperand(MI, AMDGPU::OpName::src0))
+ return *CallAddrOp;
+ return TargetInstrInfo::getCalleeOperand(MI);
+}
+
InstructionUniformity
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b1d6563bf3c0b..7d7cbda644b77 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1641,6 +1641,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
const MachineInstr &MI,
unsigned *PredCost = nullptr) const override;
+ const MachineOperand &getCalleeOperand(const MachineInstr &MI) const override;
+
InstructionUniformity
getInstructionUniformity(const MachineInstr &MI) const final;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.deoptimize.ll b/llvm/test/CodeGen/AMDGPU/llvm.deoptimize.ll
new file mode 100644
index 0000000000000..ef8c00bb3b4b0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.deoptimize.ll
@@ -0,0 +1,16 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2> %t.err | FileCheck %s
+; RUN: FileCheck -check-prefix=ERR %s < %t.err
+
+; ERR: error: <unknown>:0:0: in function caller_0 i32 (): unsupported external symbol
+; ERR: error: unhandled statepoint-like instruction
+
+; CHECK: ;unsupported statepoint/stackmap/patchpoint
+declare i32 @llvm.experimental.deoptimize.i32(...)
+declare i8 @llvm.experimental.deoptimize.i8(...)
+
+define i32 @caller_0() {
+entry:
+ %v = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ]
+ ret i32 %v
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/statepoint-asm-printer.mir b/llvm/test/CodeGen/AMDGPU/statepoint-asm-printer.mir
new file mode 100644
index 0000000000000..1c9ae91557b7e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/statepoint-asm-printer.mir
@@ -0,0 +1,40 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-asm-printer -o - %s 2> %t.err | FileCheck %s
+# RUN: FileCheck -check-prefix=ERR %s < %t.err
+
+# CHECK: ;unsupported statepoint/stackmap/patchpoint
+# ERR: error: unhandled statepoint-like instruction
+
+---
+name: test_statepoint
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 16
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+ isCalleeSavedInfoValid: true
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4 }
+machineFunctionInfo:
+ hasSpilledSGPRs: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0.entry:
+ liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+
+ S_WAITCNT 0
+ $sgpr16 = S_MOV_B32 $sgpr33
+ $sgpr33 = S_MOV_B32 $sgpr32
+ $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ $exec = S_MOV_B64 killed $sgpr18_sgpr19
+ $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
+ $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40
+ $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 1024, implicit-def dead $scc
+ $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40
+ STATEPOINT 2882400015, 0, 11, undef renamable $sgpr4_sgpr5, 0, killed $sgpr4_sgpr5, killed $sgpr6_sgpr7, killed $sgpr8_sgpr9, killed $sgpr10_sgpr11, killed $sgpr12, killed $sgpr13, killed $sgpr14, killed $sgpr15, killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, csr_amdgpu
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/statepoint-insert-waitcnts.mir b/llvm/test/CodeGen/AMDGPU/statepoint-insert-waitcnts.mir
new file mode 100644
index 0000000000000..00b1f6e33412f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/statepoint-insert-waitcnts.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-insert-waitcnts -o - %s | FileCheck %s
+
+# Make sure the waitcnt pass doesn't crash on statepoint
+# pseudoinstructions, and handlest he wait for the callee operand
+# correctly.
+
+---
+name: test_wait_statepoint_callee
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 16
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+ isCalleeSavedInfoValid: true
+stack:
+ - { id: 0, offset: 4, size: 4, alignment: 4 }
+ - { id: 1, type: spill-slot, size: 4, alignment: 4 }
+ - { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+machineFunctionInfo:
+ hasSpilledSGPRs: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ spillPhysVGPRs:
+ - '$vgpr40'
+ wwmReservedRegs:
+ - '$vgpr40'
+ scavengeFI: '%stack.0'
+body: |
+ bb.0:
+ liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13
+
+ ; CHECK-LABEL: name: test_wait_statepoint_callee
+ ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: $sgpr16 = S_MOV_B32 $sgpr33
+ ; CHECK-NEXT: $sgpr33 = S_MOV_B32 $sgpr32
+ ; CHECK-NEXT: $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr18_sgpr19
+ ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
+ ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40
+ ; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 1024, implicit-def dead $scc
+ ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40
+ ; CHECK-NEXT: $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+ ; CHECK-NEXT: S_WAITCNT 49279
+ ; CHECK-NEXT: STATEPOINT 2882400015, 0, 11, renamable $sgpr14_sgpr15, 0, killed $sgpr4_sgpr5, killed $sgpr6_sgpr7, killed $sgpr8_sgpr9, killed $sgpr10_sgpr11, killed $sgpr12, killed $sgpr13, killed $sgpr14, killed $sgpr15, killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, csr_amdgpu
+ $sgpr16 = S_MOV_B32 $sgpr33
+ $sgpr33 = S_MOV_B32 $sgpr32
+ $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ $exec = S_MOV_B64 killed $sgpr18_sgpr19
+ $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
+ $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40
+ $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 1024, implicit-def dead $scc
+ $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40
+ $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
+ STATEPOINT 2882400015, 0, 11, renamable $sgpr14_sgpr15, 0, killed $sgpr4_sgpr5, killed $sgpr6_sgpr7, killed $sgpr8_sgpr9, killed $sgpr10_sgpr11, killed $sgpr12, killed $sgpr13, killed $sgpr14, killed $sgpr15, killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, csr_amdgpu
+
+...
|
6d54816 to
ce03c68
Compare
ecf5f9d to
5c91afe
Compare
ro-i
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I currently don't really know anything specific about stackmap stuff.
Your ISel choices match those in X86TargetLowering tho, so seems kinda proven
5c91afe to
764e922
Compare
At the moment the MIR tests are somewhat redundant. The waitcnt one is needed to ensure we actually have a load, given we are currently just emitting an error on ExternalSymbol. The asm printer one is more redundant for the moment, since it's stressed by the IR test. However I am planning to change the error path for the IR test, so it will soon not be redundant.
764e922 to
42a3179
Compare

At the moment the MIR tests are somewhat redundant. The waitcnt
one is needed to ensure we actually have a load, given we are
currently just emitting an error on ExternalSymbol. The asm printer
one is more redundant for the moment, since it's stressed by the IR
test. However I am planning to change the error path for the IR test,
so it will soon not be redundant.