Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/include/llvm/CodeGen/MachineRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,13 @@ class MachineRegisterInfo {
/// multiple definitions or no definition, return null.
LLVM_ABI MachineInstr *getUniqueVRegDef(Register Reg) const;

/// getDomVRegDefInBasicBlock - Return the last machine instr that defines
/// the specified virtual register in the basic block, searching backwards
/// from instruction I (inclusive). Returns nullptr if no definition is found.
/// accepts end() sentinel value iterator as a valid parameter, will decrement
/// it to the previous instruction if it is end()
LLVM_ABI MachineBasicBlock::iterator getDomVRegDefInBasicBlock(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;

/// clearKillFlags - Iterate over all the uses of the given register and
/// clear the kill flag from the MachineOperand. This function is used by
/// optimization passes which extend register lifetimes and need only
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/CodeGen/MachineRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,20 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(Register Reg) const {
return &*I;
}

/// getDomVRegDefInBasicBlock - Return the last machine instr that defines
/// the specified virtual register in the basic block, searching backwards
/// from instruction I (exclusive). Returns MBB.end() if no definition is found.
MachineBasicBlock::iterator MachineRegisterInfo::getDomVRegDefInBasicBlock(
Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
if(I == MBB.begin()) return MBB.end();
// Iterate backwards from I (exclusive) to the beginning of the basic block
do {
--I;
if (I->modifiesRegister(Reg, getTargetRegisterInfo())) return I;
} while (I != MBB.begin());
return MBB.end();
}

bool MachineRegisterInfo::hasOneNonDBGUse(Register RegNo) const {
return hasSingleElement(use_nodbg_operands(RegNo));
}
Expand Down
48 changes: 24 additions & 24 deletions llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1909,7 +1909,7 @@ void ControlFlowRewriter::rewrite() {
Opcode = AMDGPU::S_CBRANCH_SCC1;
} else {
Register CondReg = Info.OrigCondition;
bool isCondRegSubsetOfExec = LMA.isSubsetOfExec(CondReg, *Node->Block);
bool isCondRegSubsetOfExec = LMA.isSubsetOfExec(CondReg, *Node->Block, Node->Block->end());
LLVM_DEBUG(dbgs() << "isSubsetOfExec(" << printReg(CondReg, MRI.getTargetRegisterInfo(), 0, &MRI) << "," << Node->Block->name() << ") : " << isCondRegSubsetOfExec << "\n");

if (!isCondRegSubsetOfExec) {
Expand Down Expand Up @@ -1951,7 +1951,6 @@ void ControlFlowRewriter::rewrite() {
RegMap;
GCNLaneMaskUpdater Updater(Function);
Updater.setLaneMaskAnalysis(&LMA);
Updater.setAccumulating(true);

for (WaveNode *LaneTarget : NodeOrder) {
LLVM_DEBUG(dbgs() << "\nPROCESSING NODE:" << LaneTarget->printableName() << "\n\n");
Expand All @@ -1973,7 +1972,7 @@ void ControlFlowRewriter::rewrite() {
// Step 2.1: Add conditions branching to LaneTarget to the Lane mask
// Updater.
// FIXME: we are creating a register here only to initialize the updater
Updater.init(LMU.createLaneMaskReg());
Updater.init();
Updater.addReset(*LaneTarget->Block, GCNLaneMaskUpdater::ResetInMiddle);
LLVM_DEBUG(dbgs() << "\nMark ResetInMiddle(X): " << LaneTarget->printableName() << '\n');
for (const auto &NodeDivergentPair : LaneTargetInfo.OriginBranch) {
Expand Down Expand Up @@ -2023,7 +2022,7 @@ void ControlFlowRewriter::rewrite() {
}
} else {
CondReg = LaneOrigin.CondReg;
bool isCondRegSubsetOfExec = LMA.isSubsetOfExec(LaneOrigin.CondReg, *LaneOrigin.Node->Block);
bool isCondRegSubsetOfExec = LMA.isSubsetOfExec(LaneOrigin.CondReg, *LaneOrigin.Node->Block, LaneOrigin.Node->Block->getFirstTerminator());
LLVM_DEBUG(dbgs() << "isSubsetOfExec(" << printReg(LaneOrigin.CondReg, MRI.getTargetRegisterInfo(), 0, &MRI) << "," << LaneOrigin.Node->Block->name() << ") : " << isCondRegSubsetOfExec << "\n");
if (!isCondRegSubsetOfExec) {
Register Prev = CondReg;
Expand Down Expand Up @@ -2120,7 +2119,7 @@ void ControlFlowRewriter::rewrite() {
LLVM_DEBUG(dbgs() << "\nRejoin @ " << Secondary->printableName() << '\n');
Secondary->dump();
// FIXME: we are creating a register here only to initialize the updater
Updater.init(LMU.createLaneMaskReg());
Updater.init();
Updater.addReset(*Secondary->Block, GCNLaneMaskUpdater::ResetInMiddle);
LLVM_DEBUG(dbgs() << "\nMark ResetInMiddle(X): " << Secondary->printableName() << '\n');

Expand All @@ -2132,32 +2131,32 @@ void ControlFlowRewriter::rewrite() {
Register PrimaryExec = PredInfo.PrimarySuccessorExec;
LLVM_DEBUG(dbgs() << "Pred:" << Pred->Block->name() << "\nPrimaryExec:" << printReg(PrimaryExec,MRI.getTargetRegisterInfo(), 0, &MRI) << "\n");

MachineInstr *PrimaryExecDef;
for (;;) {
PrimaryExecDef = MRI.getVRegDef(PrimaryExec);
if (PrimaryExecDef->getOpcode() != AMDGPU::COPY)
break;
PrimaryExec = PrimaryExecDef->getOperand(1).getReg();
}
// MachineInstr *PrimaryExecDef;
// for (;;) {
// PrimaryExecDef = MRI.getVRegDef(PrimaryExec);
// if (PrimaryExecDef->getOpcode() != AMDGPU::COPY)
// break;
// PrimaryExec = PrimaryExecDef->getOperand(1).getReg();
// }

LLVM_DEBUG(dbgs() << "PrimaryExecDef:");
LLVM_DEBUG(PrimaryExecDef->dump());
LLVM_DEBUG(dbgs() << "\n");
// LLVM_DEBUG(dbgs() << "PrimaryExecDef:");
// LLVM_DEBUG(PrimaryExecDef->dump());
// LLVM_DEBUG(dbgs() << "\n");

// Rejoin = EXEC ^ PrimaryExec
//
// Fold immediately if PrimaryExec was obtained via XOR as well.
Register Rejoin;

if (PrimaryExecDef->getParent() == Pred->Block &&
PrimaryExecDef->getOpcode() == LMC.XorOpc &&
PrimaryExecDef->getOperand(1).isReg() &&
PrimaryExecDef->getOperand(2).isReg()) {
if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg)
Rejoin = PrimaryExecDef->getOperand(2).getReg();
else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg)
Rejoin = PrimaryExecDef->getOperand(1).getReg();
}
// if (PrimaryExecDef->getParent() == Pred->Block &&
// PrimaryExecDef->getOpcode() == LMC.XorOpc &&
// PrimaryExecDef->getOperand(1).isReg() &&
// PrimaryExecDef->getOperand(2).isReg()) {
// if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg)
// Rejoin = PrimaryExecDef->getOperand(2).getReg();
// else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg)
// Rejoin = PrimaryExecDef->getOperand(1).getReg();
// }

if (!Rejoin) {
// Try to find a previously generated XOR (or merely masked) value
Expand Down Expand Up @@ -2199,6 +2198,7 @@ void ControlFlowRewriter::rewrite() {

}

Updater.insertAccumulatorResets();
Updater.cleanup();

LLVM_DEBUG(dbgs() << "CFG_BEGIN:" << Function.getName().str() << "_clean\n");
Expand Down
Loading