@@ -502,27 +502,14 @@ static Value *promoteAllocaUserToVector(
502502 Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
503503 unsigned VecStoreSize, unsigned ElementSize,
504504 DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
505- std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx, Value *CurVal,
506- SmallVectorImpl<LoadInst *> &DeferredLoads ) {
505+ std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
506+ function_ref<Value *()> GetCurVal ) {
507507 // Note: we use InstSimplifyFolder because it can leverage the DataLayout
508508 // to do more folding, especially in the case of vector splats.
509509 IRBuilder<InstSimplifyFolder> Builder (Inst->getContext (),
510510 InstSimplifyFolder (DL));
511511 Builder.SetInsertPoint (Inst);
512512
513- const auto GetOrLoadCurrentVectorValue = [&]() -> Value * {
514- if (CurVal)
515- return CurVal;
516-
517- // If the current value is not known, insert a dummy load and lower it on
518- // the second pass.
519- LoadInst *Dummy =
520- Builder.CreateLoad (VectorTy, PoisonValue::get (Builder.getPtrTy ()),
521- " promotealloca.dummyload" );
522- DeferredLoads.push_back (Dummy);
523- return Dummy;
524- };
525-
526513 const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,
527514 Type *PtrTy) -> Value * {
528515 assert (DL.getTypeStoreSize (Val->getType ()) == DL.getTypeStoreSize (PtrTy));
@@ -542,12 +529,7 @@ static Value *promoteAllocaUserToVector(
542529
543530 switch (Inst->getOpcode ()) {
544531 case Instruction::Load: {
545- // Loads can only be lowered if the value is known.
546- if (!CurVal) {
547- DeferredLoads.push_back (cast<LoadInst>(Inst));
548- return nullptr ;
549- }
550-
532+ Value *CurVal = GetCurVal ();
551533 Value *Index = calculateVectorIndex (
552534 cast<LoadInst>(Inst)->getPointerOperand (), GEPVectorIdx);
553535
@@ -637,7 +619,7 @@ static Value *promoteAllocaUserToVector(
637619
638620 Val = Builder.CreateBitOrPointerCast (Val, SubVecTy);
639621
640- Value *CurVec = GetOrLoadCurrentVectorValue ();
622+ Value *CurVec = GetCurVal ();
641623 for (unsigned K = 0 , NumElts = std::min (NumWrittenElts, NumVecElts);
642624 K < NumElts; ++K) {
643625 Value *CurIdx =
@@ -650,8 +632,7 @@ static Value *promoteAllocaUserToVector(
650632
651633 if (Val->getType () != VecEltTy)
652634 Val = Builder.CreateBitOrPointerCast (Val, VecEltTy);
653- return Builder.CreateInsertElement (GetOrLoadCurrentVectorValue (), Val,
654- Index);
635+ return Builder.CreateInsertElement (GetCurVal (), Val, Index);
655636 }
656637 case Instruction::Call: {
657638 if (auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
@@ -673,7 +654,7 @@ static Value *promoteAllocaUserToVector(
673654 }
674655 }
675656
676- return Builder.CreateShuffleVector (GetOrLoadCurrentVectorValue (), Mask);
657+ return Builder.CreateShuffleVector (GetCurVal (), Mask);
677658 }
678659
679660 if (auto *MSI = dyn_cast<MemSetInst>(Inst)) {
@@ -1038,37 +1019,46 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
10381019
10391020 Updater.AddAvailableValue (EntryBB, AllocaInitValue);
10401021
1041- // First handle the initial worklist.
1042- SmallVector<LoadInst *, 4 > DeferredLoads;
1022+ // First handle the initial worklist, in basic block order.
1023+ //
1024+ // Insert a placeholder whenever we need the vector value at the top of a
1025+ // basic block.
1026+ SmallVector<Instruction *> Placeholders;
10431027 forEachWorkListItem (WorkList, [&](Instruction *I) {
10441028 BasicBlock *BB = I->getParent ();
1045- // On the first pass, we only take values that are trivially known, i.e.
1046- // where AddAvailableValue was already called in this block.
1047- Value *Result = promoteAllocaUserToVector (
1048- I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1049- Updater.FindValueForBlock (BB), DeferredLoads);
1029+ auto GetCurVal = [&]() -> Value * {
1030+ if (Value *CurVal = Updater.FindValueForBlock (BB))
1031+ return CurVal;
1032+
1033+ if (!Placeholders.empty () && Placeholders.back ()->getParent () == BB)
1034+ return Placeholders.back ();
1035+
1036+ // If the current value in the basic block is not yet known, insert a
1037+ // placeholder that we will replace later.
1038+ IRBuilder<> Builder (I);
1039+ auto *Placeholder = cast<Instruction>(Builder.CreateFreeze (
1040+ PoisonValue::get (VectorTy), " promotealloca.placeholder" ));
1041+ Placeholders.push_back (Placeholder);
1042+ return Placeholders.back ();
1043+ };
1044+
1045+ Value *Result =
1046+ promoteAllocaUserToVector (I, *DL, VectorTy, VecStoreSize, ElementSize,
1047+ TransferInfo, GEPVectorIdx, GetCurVal);
10501048 if (Result)
10511049 Updater.AddAvailableValue (BB, Result);
10521050 });
10531051
1054- // Then handle deferred loads.
1055- forEachWorkListItem (DeferredLoads, [&](Instruction *I) {
1056- SmallVector<LoadInst *, 0 > NewDLs;
1057- BasicBlock *BB = I->getParent ();
1058- // On the second pass, we use GetValueInMiddleOfBlock to guarantee we always
1059- // get a value, inserting PHIs as needed.
1060- Value *Result = promoteAllocaUserToVector (
1061- I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1062- Updater.GetValueInMiddleOfBlock (I->getParent ()), NewDLs);
1063- if (Result)
1064- Updater.AddAvailableValue (BB, Result);
1065- assert (NewDLs.empty () && " No more deferred loads should be queued!" );
1066- });
1052+ // Now fixup the placeholders.
1053+ for (Instruction *Placeholder : Placeholders) {
1054+ Placeholder->replaceAllUsesWith (
1055+ Updater.GetValueInMiddleOfBlock (Placeholder->getParent ()));
1056+ Placeholder->eraseFromParent ();
1057+ }
10671058
10681059 // Delete all instructions. On the first pass, new dummy loads may have been
10691060 // added so we need to collect them too.
10701061 DenseSet<Instruction *> InstsToDelete (WorkList.begin (), WorkList.end ());
1071- InstsToDelete.insert_range (DeferredLoads);
10721062 for (Instruction *I : InstsToDelete) {
10731063 assert (I->use_empty ());
10741064 I->eraseFromParent ();
0 commit comments