@@ -13874,6 +13874,9 @@ static bool addresses16Bits(int Mask) {
1387413874 int Low8 = Mask & 0xff;
1387513875 int Hi8 = (Mask & 0xff00) >> 8;
1387613876
13877+ if (Hi8 == 0x0c || Low8 == 0x0c)
13878+ return false;
13879+
1387713880 assert(Low8 < 8 && Hi8 < 8);
1387813881 // Are the bytes contiguous in the order of increasing addresses.
1387913882 bool IsConsecutive = (Hi8 - Low8 == 1);
@@ -13968,58 +13971,70 @@ static SDValue getDWordFromOffset(SelectionDAG &DAG, SDLoc SL, SDValue Src,
1396813971
1396913972static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
1397013973 SelectionDAG &DAG = DCI.DAG;
13974+ assert(!DAG.getDataLayout().isBigEndian());
13975+
1397113976 [[maybe_unused]] EVT VT = N->getValueType(0);
13972- SmallVector<ByteProvider<SDValue>, 8 > PermNodes;
13977+ SmallVector<ByteProvider<SDValue>, 4 > PermNodes;
1397313978
1397413979 // VT is known to be MVT::i32, so we need to provide 4 bytes.
1397513980 assert(VT == MVT::i32);
1397613981 for (int i = 0; i < 4; i++) {
1397713982 // Find the ByteProvider that provides the ith byte of the result of OR
1397813983 std::optional<ByteProvider<SDValue>> P =
1397913984 calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
13980- // TODO support constantZero
13981- if (!P || P->isConstantZero())
13985+ if (!P)
1398213986 return SDValue();
1398313987
1398413988 PermNodes.push_back(*P);
1398513989 }
13986- if (PermNodes.size() != 4)
13987- return SDValue();
1398813990
13989- std::pair<unsigned, unsigned> FirstSrc(0, PermNodes[0].SrcOffset / 4);
13990- std::optional<std::pair<unsigned, unsigned>> SecondSrc;
13991+ static auto isSameSrc = [](SDValue SrcA, unsigned DWordA, SDValue SrcB,
13992+ unsigned DWordB) {
13993+ // If the Src uses a byte from a different DWORD, then it corresponds
13994+ // with a difference source
13995+ return SrcA == SrcB && DWordA == DWordB;
13996+ };
13997+
13998+ SDValue Src0, Src1;
13999+ unsigned DWord0, DWord1;
1399114000 uint64_t PermMask = 0x00000000;
1399214001 for (size_t i = 0; i < PermNodes.size(); i++) {
13993- auto PermOp = PermNodes[i];
13994- // Since the mask is applied to Src1:Src2, Src1 bytes must be offset
13995- // by sizeof(Src2) = 4
13996- int SrcByteAdjust = 4;
14002+ ByteProvider<SDValue> PermOp = PermNodes[i];
14003+ if (PermOp.isConstantZero()) {
14004+ PermMask |= 0x0c << (i * 8);
14005+ continue;
14006+ }
1399714007
13998- // If the Src uses a byte from a different DWORD, then it corresponds
13999- // with a difference source
14000- if (!PermOp.hasSameSrc(PermNodes[FirstSrc.first]) ||
14001- ((PermOp.SrcOffset / 4) != FirstSrc.second)) {
14002- if (SecondSrc)
14003- if (!PermOp.hasSameSrc(PermNodes[SecondSrc->first]) ||
14004- ((PermOp.SrcOffset / 4) != SecondSrc->second))
14005- return SDValue();
14008+ const SDValue SrcI = PermOp.Src.value();
14009+ const unsigned DWordI = PermOp.SrcOffset / 4;
14010+ const unsigned ByteI = PermOp.SrcOffset % 4;
14011+ if (!Src0) {
14012+ Src0 = SrcI;
14013+ DWord0 = DWordI;
14014+ }
1400614015
14007- // Set the index of the second distinct Src node
14008- SecondSrc = {i, PermNodes[i].SrcOffset / 4};
14009- assert(!(PermNodes[SecondSrc->first].Src->getValueSizeInBits() % 8));
14010- SrcByteAdjust = 0;
14016+ if (!isSameSrc(Src0, DWord0, SrcI, DWordI)) {
14017+ if (!Src1) {
14018+ Src1 = SrcI;
14019+ DWord1 = DWordI;
14020+ } else if (!isSameSrc(Src1, DWord1, SrcI, DWordI))
14021+ return SDValue();
1401114022 }
14012- assert((PermOp.SrcOffset % 4) + SrcByteAdjust < 8);
14013- assert(!DAG.getDataLayout().isBigEndian());
14014- PermMask |= ((PermOp.SrcOffset % 4) + SrcByteAdjust) << (i * 8);
14023+
14024+ // Since the mask is applied to Src0:Src1, Src0 bytes must be offset
14025+ // by sizeof(Src1) = 4
14026+ const int SrcByteAdjust = SrcI == Src0 ? 4 : 0;
14027+ assert(ByteI + SrcByteAdjust < 8);
14028+ PermMask |= (ByteI + SrcByteAdjust) << (i * 8);
1401514029 }
14030+
1401614031 SDLoc DL(N);
14017- SDValue Op = *PermNodes[FirstSrc.first].Src ;
14018- Op = getDWordFromOffset(DAG, DL, Op, FirstSrc.second );
14032+ SDValue Op = Src0 ;
14033+ Op = getDWordFromOffset(DAG, DL, Op, DWord0 );
1401914034 assert(Op.getValueSizeInBits() == 32);
1402014035
1402114036 // Check that we are not just extracting the bytes in order from an op
14022- if (!SecondSrc ) {
14037+ if (!Src1 ) {
1402314038 int Low16 = PermMask & 0xffff;
1402414039 int Hi16 = (PermMask & 0xffff0000) >> 16;
1402514040
@@ -14031,12 +14046,12 @@ static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
1403114046 return DAG.getBitcast(MVT::getIntegerVT(32), Op);
1403214047 }
1403314048
14034- SDValue OtherOp = SecondSrc ? *PermNodes[SecondSrc->first].Src : Op;
14035-
14036- if (SecondSrc) {
14037- OtherOp = getDWordFromOffset(DAG, DL, OtherOp, SecondSrc->second);
14049+ SDValue OtherOp;
14050+ if (Src1) {
14051+ OtherOp = getDWordFromOffset(DAG, DL, Src1, DWord1);
1403814052 assert(OtherOp.getValueSizeInBits() == 32);
14039- }
14053+ } else
14054+ OtherOp = Op;
1404014055
1404114056 if (hasNon16BitAccesses(PermMask, Op, OtherOp)) {
1404214057
0 commit comments