Skip to content

Commit 1334f38

Browse files
committed
cleanup
1 parent 3fce7cf commit 1334f38

File tree

3 files changed

+53
-58
lines changed

3 files changed

+53
-58
lines changed

llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1951,7 +1951,6 @@ void ControlFlowRewriter::rewrite() {
19511951
RegMap;
19521952
GCNLaneMaskUpdater Updater(Function);
19531953
Updater.setLaneMaskAnalysis(&LMA);
1954-
Updater.setAccumulating(true);
19551954

19561955
for (WaveNode *LaneTarget : NodeOrder) {
19571956
LLVM_DEBUG(dbgs() << "\nPROCESSING NODE:" << LaneTarget->printableName() << "\n\n");

llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.cpp

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,7 @@ Register GCNLaneMaskUtils::createLaneMaskReg() const {
108108
/// DstReg = (PrevReg & ~EXEC) | (CurReg & EXEC)
109109
///
110110
/// before \p I in basic block \p MBB. Some simplifications are applied on the
111-
/// fly based on constant inputs and analysis via \p LMA, and further
112-
/// simplifications can be requested in "accumulating" mode.
111+
/// fly based on constant inputs and analysis via \p LMA
113112
///
114113
/// \param DstReg The virtual register into which the merged mask is written.
115114
/// \param PrevReg The virtual register with the "previous" lane mask value;
@@ -118,16 +117,11 @@ Register GCNLaneMaskUtils::createLaneMaskReg() const {
118117
/// be merged into "previous".
119118
/// \param LMA If non-null, used to test whether CurReg may already be a subset
120119
/// of EXEC.
121-
/// \param accumulating Indicates that we should assume PrevReg is already
122-
/// properly masked, i.e. use PrevReg directly instead of
123-
/// (PrevReg & ~EXEC), and don't add extra 1-bits to DstReg
124-
/// beyond (CurReg & EXEC).
125120
void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
126121
MachineBasicBlock::iterator I,
127122
const DebugLoc &DL, Register DstReg,
128123
Register PrevReg, Register CurReg,
129124
GCNLaneMaskAnalysis *LMA,
130-
bool accumulating,
131125
bool isPrevZeroReg) const {
132126
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
133127
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -153,7 +147,7 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
153147
else dbgs() << "end of block";
154148
dbgs() << "\n";
155149

156-
assert(PrevReg || !accumulating);
150+
assert(PrevReg);
157151

158152
if (PrevConstant && CurConstant) {// is wave wide constant?
159153
if (PrevVal == CurVal) {
@@ -178,17 +172,7 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
178172
Register PrevMaskedReg;
179173
Register CurMaskedReg;
180174
if (!PrevConstant) {
181-
if (accumulating || (CurConstant && CurVal)) {
182-
PrevMaskedReg = PrevReg;
183-
} else {
184-
PrevMaskedReg = createLaneMaskReg();
185-
dbgs() << "\t ";
186-
PrevMaskedBuilt =
187-
BuildMI(MBB, I, DL, TII->get(LMC.AndN2Opc), PrevMaskedReg)
188-
.addReg(PrevReg)
189-
.addReg(LMC.ExecReg);
190-
PrevMaskedBuilt->dump();
191-
}
175+
PrevMaskedReg = PrevReg;
192176
}
193177
if (!CurConstant) {
194178
bool isCurRegSubsetOfExec = LMA && LMA->isSubsetOfExec(CurReg, MBB, I);
@@ -254,14 +238,17 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
254238
for (;;) {
255239
if (!Register::isVirtualRegister(Reg)) {
256240
if (Reg == LMC.ExecReg &&
257-
(DefInstr == UseBlock.end() || DefInstr->getParent() == &UseBlock))
258-
return true;
241+
(DefInstr == UseBlock.end() || DefInstr->getParent() == &UseBlock)){
242+
dbgs() << "Reg is EXEC in same BB, return true\n";
243+
return true;}
244+
dbgs() << "Reg is not EXEC or is in other BB, return false\n";
259245
return false;
260246
}
261247

262248
DefInstr = MRI.getDomVRegDefInBasicBlock(Reg, UseBlock, I);
263-
if(DefInstr == UseBlock.end())
264-
return false;
249+
if(DefInstr == UseBlock.end()){
250+
dbgs() << "DefInstr == UseBlock.end(), return false\n";
251+
return false;}
265252
if (DefInstr->getOpcode() == AMDGPU::COPY) {
266253
Reg = DefInstr->getOperand(1).getReg();
267254
I = DefInstr;
@@ -270,57 +257,80 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
270257

271258
if (DefInstr->getOpcode() == LMC.MovOpc) {
272259
if (DefInstr->getOperand(1).isImm() &&
273-
DefInstr->getOperand(1).getImm() == 0)
274-
return true;
260+
DefInstr->getOperand(1).getImm() == 0){
261+
dbgs() << "MOV 0, return true\n";
262+
return true;}
263+
dbgs() << "MOV is not imm or not 0, return false\n";
275264
return false;
276265
}
277266

278267
break;
279268
}
280269

281-
if (DefInstr->getParent() != &UseBlock)
282-
return false;
270+
dbgs() << "DefInstr:";
271+
DefInstr->dump();
272+
dbgs() << "\n";
273+
274+
if (DefInstr->getParent() != &UseBlock){
275+
dbgs() << "DefInstr->getParent() != &UseBlock, return false\n";
276+
return false;}
283277

284278
auto CacheIt = SubsetOfExec.find(Reg);
285-
if (CacheIt != SubsetOfExec.end())
279+
if (CacheIt != SubsetOfExec.end()){
280+
dbgs() << "CacheIt != SubsetOfExec.end(), return CacheIt->second: " << CacheIt->second << " \n";
286281
return CacheIt->second;
282+
}
287283

288284
// V_CMP_xx always return a subset of EXEC.
289285
if (DefInstr->isCompare() &&
290286
(SIInstrInfo::isVOPC(*DefInstr) || SIInstrInfo::isVOP3(*DefInstr))) {
291287
SubsetOfExec[Reg] = true;
288+
dbgs() << "DefInstr is VOPC or VOP3, return true\n";
292289
return true;
293290
}
294291

295292
if (!RemainingDepth--)
296-
return false;
293+
{dbgs() << "RemainingDepth-- is 0, return false\n";
294+
return false;}
297295

298296
bool LikeOr = DefInstr->getOpcode() == LMC.OrOpc ||
299297
DefInstr->getOpcode() == LMC.XorOpc ||
300298
DefInstr->getOpcode() == LMC.CSelectOpc;
301299
bool IsAnd = DefInstr->getOpcode() == LMC.AndOpc;
302300
bool IsAndN2 = DefInstr->getOpcode() == LMC.AndN2Opc;
301+
dbgs() << "LikeOr: " << LikeOr << " IsAnd: " << IsAnd << " IsAndN2: " << IsAndN2 << "\n";
303302
if ((LikeOr || IsAnd || IsAndN2) &&
304303
(DefInstr->getOperand(1).isReg() && DefInstr->getOperand(2).isReg())) {
305304
bool FirstIsSubset = isSubsetOfExec(DefInstr->getOperand(1).getReg(),
306305
UseBlock, DefInstr, RemainingDepth);
307-
if (!FirstIsSubset && (LikeOr || IsAndN2))
308-
return SubsetOfExec.try_emplace(Reg, false).first->second;
306+
307+
dbgs() << "FirstIsSubset: " << FirstIsSubset << "\n";
308+
309+
if (!FirstIsSubset && (LikeOr || IsAndN2)){
310+
bool res = SubsetOfExec.try_emplace(Reg, false).first->second;
311+
dbgs() << "FirstIsSubset is false and (LikeOr || IsAndN2), return res: " << res << "\n";
312+
return res;}
309313

310314
if (FirstIsSubset && (IsAnd || IsAndN2)) {
311315
SubsetOfExec[Reg] = true;
316+
dbgs() << "FirstIsSubset is true and (IsAnd || IsAndN2), return true\n";
312317
return true;
313318
}
314319

315320
bool SecondIsSubset = isSubsetOfExec(DefInstr->getOperand(2).getReg(),
316321
UseBlock, DefInstr, RemainingDepth);
317-
if (!SecondIsSubset)
318-
return SubsetOfExec.try_emplace(Reg, false).first->second;
322+
dbgs() << "SecondIsSubset: " << SecondIsSubset << "\n";
323+
if (!SecondIsSubset){
324+
bool res = SubsetOfExec.try_emplace(Reg, false).first->second;
325+
dbgs() << "SecondIsSubset is false, return res: " << res << "\n";
326+
return res;}
319327

320328
SubsetOfExec[Reg] = true;
329+
dbgs() << "SecondIsSubset is true, return true\n";
321330
return true;
322331
}
323332

333+
dbgs() << "Enod of function ,return false\n";
324334
return false;
325335
}
326336

@@ -329,19 +339,19 @@ void GCNLaneMaskUpdater::init() {
329339
Processed = false;
330340
Blocks.clear();
331341
// SSAUpdater.Initialize(LMU.getLaneMaskConsts().LaneMaskRC);
332-
Accumulator = {};
342+
Accumulator = AMDGPU::NoRegister;
333343
}
334344

335345
/// Optional cleanup, may remove stray instructions.
336346
void GCNLaneMaskUpdater::cleanup() {
337347
Processed = false;
338348
Blocks.clear();
339-
Accumulator = {};
349+
Accumulator = AMDGPU::NoRegister;
340350
MachineRegisterInfo &MRI = LMU.function()->getRegInfo();
341351

342352
if (ZeroReg && MRI.use_empty(ZeroReg)) {
343353
MRI.getVRegDef(ZeroReg)->eraseFromParent();
344-
ZeroReg = {};
354+
ZeroReg = AMDGPU::NoRegister;
345355
}
346356

347357
for (MachineInstr *MI : PotentiallyDead) {
@@ -401,7 +411,7 @@ Register GCNLaneMaskUpdater::getValueInMiddleOfBlock(MachineBasicBlock &Block) {
401411
/// Return the value at the end of the given block, i.e. after any change that
402412
/// was registered via \ref addAvailable.
403413
///
404-
/// Note: If \p Block is the reset block in accumulating mode with ResetAtEnd
414+
/// Note: If \p Block is the reset block with ResetAtEnd
405415
/// reset mode, then this value will be 0. You likely want
406416
/// \ref getPreReset instead.
407417
Register GCNLaneMaskUpdater::getValueAtEndOfBlock(MachineBasicBlock &Block) {
@@ -418,7 +428,7 @@ Register GCNLaneMaskUpdater::getValueAfterMerge(MachineBasicBlock &Block) {
418428
dbgs() << "GCNLaneMaskUpdater::getValueAfterMerge(" << Block.name() << ")\n";
419429
if (!Processed)
420430
process();
421-
Register reg = {};
431+
Register reg = AMDGPU::NoRegister;
422432
auto BlockIt = findBlockInfo(Block);
423433
if (BlockIt != Blocks.end()) {
424434
if (BlockIt->Value){
@@ -515,8 +525,7 @@ void GCNLaneMaskUpdater::process() {
515525
LMU.function()->getSubtarget<GCNSubtarget>().getInstrInfo();
516526
MachineBasicBlock &Entry = LMU.function()->front();
517527

518-
// Prepare an all-zero value for the default and reset in accumulating mode.
519-
if (Accumulating && !ZeroReg) {
528+
if (!ZeroReg) {
520529
ZeroReg = LMU.createLaneMaskReg();
521530
BuildMI(Entry, Entry.getFirstTerminator(), {},
522531
TII->get(LMU.getLaneMaskConsts().MovOpc), ZeroReg)
@@ -537,7 +546,6 @@ void GCNLaneMaskUpdater::process() {
537546
// Add available values.
538547
for (BlockInfo &Info : Blocks) {
539548
dbgs() << "\tAdd avail value for BlockInfo:" << Info.Block->name() << "\n\t";
540-
assert(Accumulating || !Info.Flags);
541549
assert(Info.Flags || Info.Value);
542550
Info.dump(MRI);
543551
if(!Info.Value || (Info.Flags & ResetAtEnd)){
@@ -561,27 +569,19 @@ void GCNLaneMaskUpdater::process() {
561569
if (Info.Block != &LMU.function()->front() &&
562570
!(Info.Flags & ResetInMiddle)) {
563571
Previous = Accumulator;
564-
if (!Accumulating) {
565-
MachineInstr *PrevInstr = MRI.getVRegDef(Previous);
566-
if (PrevInstr && PrevInstr->getOpcode() == AMDGPU::IMPLICIT_DEF) {
567-
PotentiallyDead.insert(PrevInstr);
568-
Previous = {};
569-
}
570-
}
571572
} else {
572573
dbgs() << "\tEither one of the following 2 conds are true:\n";
573574
dbgs() << "\tInfo.Block == &LMU.function()->front():" << (Info.Block == &LMU.function()->front()) << "\n";
574575
dbgs() << "\tInfo.Flags & ResetInMiddle:" << (Info.Flags & ResetInMiddle) << "\n";
575-
if (Accumulating){
576-
Previous = ZeroReg;
577-
dbgs() << "\tBlock:" << Info.Block->name() << " Previous is ZeroReg:" << printReg(Previous , MRI.getTargetRegisterInfo(), 0, &MRI) << "\n";
578-
}
576+
Previous = ZeroReg;
577+
dbgs() << "\tBlock:" << Info.Block->name() << " Previous is ZeroReg:" << printReg(Previous , MRI.getTargetRegisterInfo(), 0, &MRI) << "\n";
578+
579579
}
580580

581581
// Insert merge logic.
582582
MachineBasicBlock::iterator insertPt = getSaluInsertionAtEnd(*Info.Block);
583583
LMU.buildMergeLaneMasks(*Info.Block, insertPt, {}, Accumulator, Previous,
584-
Info.Value, LMA, Accumulating, (Previous == ZeroReg));
584+
Info.Value, LMA, (Previous == ZeroReg));
585585

586586

587587
/*if (Info.Flags & ResetAtEnd) {

llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ class GCNLaneMaskUtils {
5151
MachineBasicBlock::iterator I, const DebugLoc &DL,
5252
Register DstReg, Register PrevReg, Register CurReg,
5353
GCNLaneMaskAnalysis *LMA = nullptr,
54-
bool Accumulating = false,
5554
bool isPrevZeroReg = false) const;
5655
};
5756

@@ -109,7 +108,6 @@ class GCNLaneMaskUpdater {
109108
GCNLaneMaskUtils LMU;
110109
GCNLaneMaskAnalysis *LMA = nullptr;
111110
MachineRegisterInfo &MRI;
112-
bool Accumulating = false;
113111

114112
bool Processed = false;
115113

@@ -146,8 +144,6 @@ class GCNLaneMaskUpdater {
146144
void init();
147145
void cleanup();
148146

149-
void setAccumulating(bool Val) { Accumulating = Val; }
150-
151147
void addReset(MachineBasicBlock &Block, ResetFlags Flags);
152148
void addAvailable(MachineBasicBlock &Block, Register Value);
153149

0 commit comments

Comments
 (0)