@@ -108,8 +108,7 @@ Register GCNLaneMaskUtils::createLaneMaskReg() const {
108108// / DstReg = (PrevReg & ~EXEC) | (CurReg & EXEC)
109109// /
110110// / before \p I in basic block \p MBB. Some simplifications are applied on the
111- // / fly based on constant inputs and analysis via \p LMA, and further
112- // / simplifications can be requested in "accumulating" mode.
111+ // / fly based on constant inputs and analysis via \p LMA
113112// /
114113// / \param DstReg The virtual register into which the merged mask is written.
115114// / \param PrevReg The virtual register with the "previous" lane mask value;
@@ -118,16 +117,11 @@ Register GCNLaneMaskUtils::createLaneMaskReg() const {
118117// / be merged into "previous".
119118// / \param LMA If non-null, used to test whether CurReg may already be a subset
120119// / of EXEC.
121- // / \param accumulating Indicates that we should assume PrevReg is already
122- // / properly masked, i.e. use PrevReg directly instead of
123- // / (PrevReg & ~EXEC), and don't add extra 1-bits to DstReg
124- // / beyond (CurReg & EXEC).
125120void GCNLaneMaskUtils::buildMergeLaneMasks (MachineBasicBlock &MBB,
126121 MachineBasicBlock::iterator I,
127122 const DebugLoc &DL, Register DstReg,
128123 Register PrevReg, Register CurReg,
129124 GCNLaneMaskAnalysis *LMA,
130- bool accumulating,
131125 bool isPrevZeroReg) const {
132126 const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
133127 const SIInstrInfo *TII = ST.getInstrInfo ();
@@ -153,7 +147,7 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
153147 else dbgs () << " end of block" ;
154148 dbgs () << " \n " ;
155149
156- assert (PrevReg || !accumulating );
150+ assert (PrevReg);
157151
158152 if (PrevConstant && CurConstant) {// is wave wide constant?
159153 if (PrevVal == CurVal) {
@@ -178,17 +172,7 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
178172 Register PrevMaskedReg;
179173 Register CurMaskedReg;
180174 if (!PrevConstant) {
181- if (accumulating || (CurConstant && CurVal)) {
182- PrevMaskedReg = PrevReg;
183- } else {
184- PrevMaskedReg = createLaneMaskReg ();
185- dbgs () << " \t " ;
186- PrevMaskedBuilt =
187- BuildMI (MBB, I, DL, TII->get (LMC.AndN2Opc ), PrevMaskedReg)
188- .addReg (PrevReg)
189- .addReg (LMC.ExecReg );
190- PrevMaskedBuilt->dump ();
191- }
175+ PrevMaskedReg = PrevReg;
192176 }
193177 if (!CurConstant) {
194178 bool isCurRegSubsetOfExec = LMA && LMA->isSubsetOfExec (CurReg, MBB, I);
@@ -254,14 +238,17 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
254238 for (;;) {
255239 if (!Register::isVirtualRegister (Reg)) {
256240 if (Reg == LMC.ExecReg &&
257- (DefInstr == UseBlock.end () || DefInstr->getParent () == &UseBlock))
258- return true ;
241+ (DefInstr == UseBlock.end () || DefInstr->getParent () == &UseBlock)){
242+ dbgs () << " Reg is EXEC in same BB, return true\n " ;
243+ return true ;}
244+ dbgs () << " Reg is not EXEC or is in other BB, return false\n " ;
259245 return false ;
260246 }
261247
262248 DefInstr = MRI.getDomVRegDefInBasicBlock (Reg, UseBlock, I);
263- if (DefInstr == UseBlock.end ())
264- return false ;
249+ if (DefInstr == UseBlock.end ()){
250+ dbgs () << " DefInstr == UseBlock.end(), return false\n " ;
251+ return false ;}
265252 if (DefInstr->getOpcode () == AMDGPU::COPY) {
266253 Reg = DefInstr->getOperand (1 ).getReg ();
267254 I = DefInstr;
@@ -270,57 +257,80 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
270257
271258 if (DefInstr->getOpcode () == LMC.MovOpc ) {
272259 if (DefInstr->getOperand (1 ).isImm () &&
273- DefInstr->getOperand (1 ).getImm () == 0 )
274- return true ;
260+ DefInstr->getOperand (1 ).getImm () == 0 ){
261+ dbgs () << " MOV 0, return true\n " ;
262+ return true ;}
263+ dbgs () << " MOV is not imm or not 0, return false\n " ;
275264 return false ;
276265 }
277266
278267 break ;
279268 }
280269
281- if (DefInstr->getParent () != &UseBlock)
282- return false ;
270+ dbgs () << " DefInstr:" ;
271+ DefInstr->dump ();
272+ dbgs () << " \n " ;
273+
274+ if (DefInstr->getParent () != &UseBlock){
275+ dbgs () << " DefInstr->getParent() != &UseBlock, return false\n " ;
276+ return false ;}
283277
284278 auto CacheIt = SubsetOfExec.find (Reg);
285- if (CacheIt != SubsetOfExec.end ())
279+ if (CacheIt != SubsetOfExec.end ()){
280+ dbgs () << " CacheIt != SubsetOfExec.end(), return CacheIt->second: " << CacheIt->second << " \n " ;
286281 return CacheIt->second ;
282+ }
287283
288284 // V_CMP_xx always return a subset of EXEC.
289285 if (DefInstr->isCompare () &&
290286 (SIInstrInfo::isVOPC (*DefInstr) || SIInstrInfo::isVOP3 (*DefInstr))) {
291287 SubsetOfExec[Reg] = true ;
288+ dbgs () << " DefInstr is VOPC or VOP3, return true\n " ;
292289 return true ;
293290 }
294291
295292 if (!RemainingDepth--)
296- return false ;
293+ {dbgs () << " RemainingDepth-- is 0, return false\n " ;
294+ return false ;}
297295
298296 bool LikeOr = DefInstr->getOpcode () == LMC.OrOpc ||
299297 DefInstr->getOpcode () == LMC.XorOpc ||
300298 DefInstr->getOpcode () == LMC.CSelectOpc ;
301299 bool IsAnd = DefInstr->getOpcode () == LMC.AndOpc ;
302300 bool IsAndN2 = DefInstr->getOpcode () == LMC.AndN2Opc ;
301+ dbgs () << " LikeOr: " << LikeOr << " IsAnd: " << IsAnd << " IsAndN2: " << IsAndN2 << " \n " ;
303302 if ((LikeOr || IsAnd || IsAndN2) &&
304303 (DefInstr->getOperand (1 ).isReg () && DefInstr->getOperand (2 ).isReg ())) {
305304 bool FirstIsSubset = isSubsetOfExec (DefInstr->getOperand (1 ).getReg (),
306305 UseBlock, DefInstr, RemainingDepth);
307- if (!FirstIsSubset && (LikeOr || IsAndN2))
308- return SubsetOfExec.try_emplace (Reg, false ).first ->second ;
306+
307+ dbgs () << " FirstIsSubset: " << FirstIsSubset << " \n " ;
308+
309+ if (!FirstIsSubset && (LikeOr || IsAndN2)){
310+ bool res = SubsetOfExec.try_emplace (Reg, false ).first ->second ;
311+ dbgs () << " FirstIsSubset is false and (LikeOr || IsAndN2), return res: " << res << " \n " ;
312+ return res;}
309313
310314 if (FirstIsSubset && (IsAnd || IsAndN2)) {
311315 SubsetOfExec[Reg] = true ;
316+ dbgs () << " FirstIsSubset is true and (IsAnd || IsAndN2), return true\n " ;
312317 return true ;
313318 }
314319
315320 bool SecondIsSubset = isSubsetOfExec (DefInstr->getOperand (2 ).getReg (),
316321 UseBlock, DefInstr, RemainingDepth);
317- if (!SecondIsSubset)
318- return SubsetOfExec.try_emplace (Reg, false ).first ->second ;
322+ dbgs () << " SecondIsSubset: " << SecondIsSubset << " \n " ;
323+ if (!SecondIsSubset){
324+ bool res = SubsetOfExec.try_emplace (Reg, false ).first ->second ;
325+ dbgs () << " SecondIsSubset is false, return res: " << res << " \n " ;
326+ return res;}
319327
320328 SubsetOfExec[Reg] = true ;
329+ dbgs () << " SecondIsSubset is true, return true\n " ;
321330 return true ;
322331 }
323332
333+ dbgs () << " Enod of function ,return false\n " ;
324334 return false ;
325335}
326336
@@ -329,19 +339,19 @@ void GCNLaneMaskUpdater::init() {
329339 Processed = false ;
330340 Blocks.clear ();
331341 // SSAUpdater.Initialize(LMU.getLaneMaskConsts().LaneMaskRC);
332- Accumulator = {} ;
342+ Accumulator = AMDGPU::NoRegister ;
333343}
334344
335345// / Optional cleanup, may remove stray instructions.
336346void GCNLaneMaskUpdater::cleanup () {
337347 Processed = false ;
338348 Blocks.clear ();
339- Accumulator = {} ;
349+ Accumulator = AMDGPU::NoRegister ;
340350 MachineRegisterInfo &MRI = LMU.function ()->getRegInfo ();
341351
342352 if (ZeroReg && MRI.use_empty (ZeroReg)) {
343353 MRI.getVRegDef (ZeroReg)->eraseFromParent ();
344- ZeroReg = {} ;
354+ ZeroReg = AMDGPU::NoRegister ;
345355 }
346356
347357 for (MachineInstr *MI : PotentiallyDead) {
@@ -401,7 +411,7 @@ Register GCNLaneMaskUpdater::getValueInMiddleOfBlock(MachineBasicBlock &Block) {
401411// / Return the value at the end of the given block, i.e. after any change that
402412// / was registered via \ref addAvailable.
403413// /
404- // / Note: If \p Block is the reset block in accumulating mode with ResetAtEnd
414+ // / Note: If \p Block is the reset block with ResetAtEnd
405415// / reset mode, then this value will be 0. You likely want
406416// / \ref getPreReset instead.
407417Register GCNLaneMaskUpdater::getValueAtEndOfBlock (MachineBasicBlock &Block) {
@@ -418,7 +428,7 @@ Register GCNLaneMaskUpdater::getValueAfterMerge(MachineBasicBlock &Block) {
418428 dbgs () << " GCNLaneMaskUpdater::getValueAfterMerge(" << Block.name () << " )\n " ;
419429 if (!Processed)
420430 process ();
421- Register reg = {} ;
431+ Register reg = AMDGPU::NoRegister ;
422432 auto BlockIt = findBlockInfo (Block);
423433 if (BlockIt != Blocks.end ()) {
424434 if (BlockIt->Value ){
@@ -515,8 +525,7 @@ void GCNLaneMaskUpdater::process() {
515525 LMU.function ()->getSubtarget <GCNSubtarget>().getInstrInfo ();
516526 MachineBasicBlock &Entry = LMU.function ()->front ();
517527
518- // Prepare an all-zero value for the default and reset in accumulating mode.
519- if (Accumulating && !ZeroReg) {
528+ if (!ZeroReg) {
520529 ZeroReg = LMU.createLaneMaskReg ();
521530 BuildMI (Entry, Entry.getFirstTerminator (), {},
522531 TII->get (LMU.getLaneMaskConsts ().MovOpc ), ZeroReg)
@@ -537,7 +546,6 @@ void GCNLaneMaskUpdater::process() {
537546 // Add available values.
538547 for (BlockInfo &Info : Blocks) {
539548 dbgs () << " \t Add avail value for BlockInfo:" << Info.Block ->name () << " \n\t " ;
540- assert (Accumulating || !Info.Flags );
541549 assert (Info.Flags || Info.Value );
542550 Info.dump (MRI);
543551 if (!Info.Value || (Info.Flags & ResetAtEnd)){
@@ -561,27 +569,19 @@ void GCNLaneMaskUpdater::process() {
561569 if (Info.Block != &LMU.function ()->front () &&
562570 !(Info.Flags & ResetInMiddle)) {
563571 Previous = Accumulator;
564- if (!Accumulating) {
565- MachineInstr *PrevInstr = MRI.getVRegDef (Previous);
566- if (PrevInstr && PrevInstr->getOpcode () == AMDGPU::IMPLICIT_DEF) {
567- PotentiallyDead.insert (PrevInstr);
568- Previous = {};
569- }
570- }
571572 } else {
572573 dbgs () << " \t Either one of the following 2 conds are true:\n " ;
573574 dbgs () << " \t Info.Block == &LMU.function()->front():" << (Info.Block == &LMU.function ()->front ()) << " \n " ;
574575 dbgs () << " \t Info.Flags & ResetInMiddle:" << (Info.Flags & ResetInMiddle) << " \n " ;
575- if (Accumulating){
576- Previous = ZeroReg;
577- dbgs () << " \t Block:" << Info.Block ->name () << " Previous is ZeroReg:" << printReg (Previous , MRI.getTargetRegisterInfo (), 0 , &MRI) << " \n " ;
578- }
576+ Previous = ZeroReg;
577+ dbgs () << " \t Block:" << Info.Block ->name () << " Previous is ZeroReg:" << printReg (Previous , MRI.getTargetRegisterInfo (), 0 , &MRI) << " \n " ;
578+
579579 }
580580
581581 // Insert merge logic.
582582 MachineBasicBlock::iterator insertPt = getSaluInsertionAtEnd (*Info.Block );
583583 LMU.buildMergeLaneMasks (*Info.Block , insertPt, {}, Accumulator, Previous,
584- Info.Value , LMA, Accumulating, (Previous == ZeroReg));
584+ Info.Value , LMA, (Previous == ZeroReg));
585585
586586
587587 /* if (Info.Flags & ResetAtEnd) {
0 commit comments