Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion llvm/include/llvm/CodeGen/TargetInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -2350,7 +2350,18 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {

/// Returns the callee operand from the given \p MI.
virtual const MachineOperand &getCalleeOperand(const MachineInstr &MI) const {
return MI.getOperand(0);
assert(MI.isCall());

switch (MI.getOpcode()) {
case TargetOpcode::STATEPOINT:
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return MI.getOperand(3);
default:
return MI.getOperand(0);
}

llvm_unreachable("impossible call instruction");
}

/// Return the uniformity behavior of the given instruction.
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,17 @@ namespace llvm {
MachineBasicBlock *
TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
case TargetOpcode::STATEPOINT:
// As an implementation detail, STATEPOINT shares the STACKMAP format at
// this point in the process. We diverge later.
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, MBB);
default:
break;
}

#ifndef NDEBUG
dbgs() << "If a target marks an instruction with "
"'usesCustomInserter', it must implement "
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1145,6 +1145,8 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB,
bool VarArgDisallowed, bool ForceVoidReturnTy) {
StatepointLoweringInfo SI(DAG);
SI.CLI.CB = Call;

unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
populateCallLoweringInfo(
SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,16 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}

unsigned Opc = MI->getOpcode();
if (LLVM_UNLIKELY(Opc == TargetOpcode::STATEPOINT ||
Opc == TargetOpcode::STACKMAP ||
Opc == TargetOpcode::PATCHPOINT)) {
LLVMContext &Ctx = MI->getMF()->getFunction().getContext();
Ctx.emitError("unhandled statepoint-like instruction");
OutStreamer->emitRawComment("unsupported statepoint/stackmap/patchpoint");
return;
}

if (isVerbose())
if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(),
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,13 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
// Pseudo used just to encode the underlying global. Is there a better
// way to track this?

// TODO: Some of the generic call-like pseudos do not encode the callee,
// so we overly conservatively treat this as an indirect call.
const MachineOperand *CalleeOp =
TII->getNamedOperand(MI, AMDGPU::OpName::callee);

const Function *Callee = getCalleeFunction(*CalleeOp);
const Function *Callee =
CalleeOp ? getCalleeFunction(*CalleeOp) : nullptr;

auto isSameFunction = [](const MachineFunction &MF, const Function *F) {
return F == &MF.getFunction();
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v16i64, MVT::v16i32, Expand);

setOperationAction(ISD::GlobalAddress, {MVT::i32, MVT::i64}, Custom);
setOperationAction(ISD::ExternalSymbol, {MVT::i32, MVT::i64}, Custom);

setOperationAction(ISD::SELECT, MVT::i1, Promote);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
Expand Down Expand Up @@ -6840,6 +6841,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
return LowerGlobalAddress(MFI, Op, DAG);
}
case ISD::ExternalSymbol:
return LowerExternalSymbol(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
Expand Down Expand Up @@ -9019,6 +9022,15 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
MachineMemOperand::MOInvariant);
}

SDValue SITargetLowering::LowerExternalSymbol(SDValue Op,
SelectionDAG &DAG) const {
// TODO: Handle this. It should be mostly the same as LowerGlobalAddress.
const Function &Fn = DAG.getMachineFunction().getFunction();
DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
Fn, "unsupported external symbol", Op.getDebugLoc()));
return DAG.getPOISON(Op.getValueType());
}

SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain,
const SDLoc &DL, SDValue V) const {
// We can't use S_MOV_B32 directly, because there is no way to specify m0 as
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {

SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;

SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
MVT VT, unsigned Offset) const;
SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1951,7 +1951,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// load). We also need to check WAW dependency with saved PC.
Wait = AMDGPU::Waitcnt();

const auto &CallAddrOp = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
const MachineOperand &CallAddrOp = TII->getCalleeOperand(MI);
if (CallAddrOp.isReg()) {
RegInterval CallAddrOpInterval =
ScoreBrackets.getRegInterval(&MI, CallAddrOp);
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10521,6 +10521,14 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return SchedModel.computeInstrLatency(&MI);
}

const MachineOperand &
SIInstrInfo::getCalleeOperand(const MachineInstr &MI) const {
if (const MachineOperand *CallAddrOp =
getNamedOperand(MI, AMDGPU::OpName::src0))
return *CallAddrOp;
return TargetInstrInfo::getCalleeOperand(MI);
}

InstructionUniformity
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1641,6 +1641,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
const MachineInstr &MI,
unsigned *PredCost = nullptr) const override;

const MachineOperand &getCalleeOperand(const MachineInstr &MI) const override;

InstructionUniformity
getInstructionUniformity(const MachineInstr &MI) const final;

Expand Down
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.deoptimize.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2> %t.err | FileCheck %s
; RUN: FileCheck -check-prefix=ERR %s < %t.err

; ERR: error: <unknown>:0:0: in function caller_0 i32 (): unsupported external symbol
; ERR: error: unhandled statepoint-like instruction

; CHECK: ;unsupported statepoint/stackmap/patchpoint
declare i32 @llvm.experimental.deoptimize.i32(...)
declare i8 @llvm.experimental.deoptimize.i8(...)

define i32 @caller_0() {
entry:
%v = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ]
ret i32 %v
}

40 changes: 40 additions & 0 deletions llvm/test/CodeGen/AMDGPU/statepoint-asm-printer.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-asm-printer -o - %s 2> %t.err | FileCheck %s
# RUN: FileCheck -check-prefix=ERR %s < %t.err

# CHECK: ;unsupported statepoint/stackmap/patchpoint
# ERR: error: unhandled statepoint-like instruction

---
name: test_statepoint
tracksRegLiveness: true
frameInfo:
stackSize: 16
maxAlignment: 4
adjustsStack: true
hasCalls: true
maxCallFrameSize: 0
isCalleeSavedInfoValid: true
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
machineFunctionInfo:
hasSpilledSGPRs: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0.entry:
liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11

S_WAITCNT 0
$sgpr16 = S_MOV_B32 $sgpr33
$sgpr33 = S_MOV_B32 $sgpr32
$sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
$exec = S_MOV_B64 killed $sgpr18_sgpr19
$vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
$vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40
$sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 1024, implicit-def dead $scc
$vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40
STATEPOINT 2882400015, 0, 11, undef renamable $sgpr4_sgpr5, 0, killed $sgpr4_sgpr5, killed $sgpr6_sgpr7, killed $sgpr8_sgpr9, killed $sgpr10_sgpr11, killed $sgpr12, killed $sgpr13, killed $sgpr14, killed $sgpr15, killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, csr_amdgpu
...

64 changes: 64 additions & 0 deletions llvm/test/CodeGen/AMDGPU/statepoint-insert-waitcnts.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-insert-waitcnts -o - %s | FileCheck %s

# Make sure the waitcnt pass doesn't crash on statepoint
# pseudoinstructions, and handles the wait for the callee operand
# correctly.

---
name: test_wait_statepoint_callee
tracksRegLiveness: true
frameInfo:
stackSize: 16
maxAlignment: 4
adjustsStack: true
hasCalls: true
maxCallFrameSize: 0
isCalleeSavedInfoValid: true
stack:
- { id: 0, offset: 4, size: 4, alignment: 4 }
- { id: 1, type: spill-slot, size: 4, alignment: 4 }
- { id: 2, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
hasSpilledSGPRs: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
spillPhysVGPRs:
- '$vgpr40'
wwmReservedRegs:
- '$vgpr40'
scavengeFI: '%stack.0'
body: |
bb.0:
liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13

; CHECK-LABEL: name: test_wait_statepoint_callee
; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_WAITCNT 0
; CHECK-NEXT: $sgpr16 = S_MOV_B32 $sgpr33
; CHECK-NEXT: $sgpr33 = S_MOV_B32 $sgpr32
; CHECK-NEXT: $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr18_sgpr19
; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40
; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 1024, implicit-def dead $scc
; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40
; CHECK-NEXT: $sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
; CHECK-NEXT: S_WAITCNT 49279
; CHECK-NEXT: STATEPOINT 2882400015, 0, 11, renamable $sgpr14_sgpr15, 0, killed $sgpr4_sgpr5, killed $sgpr6_sgpr7, killed $sgpr8_sgpr9, killed $sgpr10_sgpr11, killed $sgpr12, killed $sgpr13, killed $sgpr14, killed $sgpr15, killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, csr_amdgpu
$sgpr16 = S_MOV_B32 $sgpr33
$sgpr33 = S_MOV_B32 $sgpr32
$sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
$exec = S_MOV_B64 killed $sgpr18_sgpr19
$vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
$vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40
$sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 1024, implicit-def dead $scc
$vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40
$sgpr14_sgpr15 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
STATEPOINT 2882400015, 0, 11, renamable $sgpr14_sgpr15, 0, killed $sgpr4_sgpr5, killed $sgpr6_sgpr7, killed $sgpr8_sgpr9, killed $sgpr10_sgpr11, killed $sgpr12, killed $sgpr13, killed $sgpr14, killed $sgpr15, killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, 2, 0, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, csr_amdgpu

...
Loading