Skip to content

Commit 2ed09c1

Browse files
committed
[DRAFT] AMDGPU: teach matchPERM to handle constant zero
1 parent 8050157 commit 2ed09c1

File tree

1 file changed

+49
-34
lines changed

1 file changed

+49
-34
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 49 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13874,6 +13874,9 @@ static bool addresses16Bits(int Mask) {
1387413874
int Low8 = Mask & 0xff;
1387513875
int Hi8 = (Mask & 0xff00) >> 8;
1387613876

13877+
if (Hi8 == 0x0c || Low8 == 0x0c)
13878+
return false;
13879+
1387713880
assert(Low8 < 8 && Hi8 < 8);
1387813881
// Are the bytes contiguous in the order of increasing addresses.
1387913882
bool IsConsecutive = (Hi8 - Low8 == 1);
@@ -13968,58 +13971,70 @@ static SDValue getDWordFromOffset(SelectionDAG &DAG, SDLoc SL, SDValue Src,
1396813971

1396913972
static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
1397013973
SelectionDAG &DAG = DCI.DAG;
13974+
assert(!DAG.getDataLayout().isBigEndian());
13975+
1397113976
[[maybe_unused]] EVT VT = N->getValueType(0);
13972-
SmallVector<ByteProvider<SDValue>, 8> PermNodes;
13977+
SmallVector<ByteProvider<SDValue>, 4> PermNodes;
1397313978

1397413979
// VT is known to be MVT::i32, so we need to provide 4 bytes.
1397513980
assert(VT == MVT::i32);
1397613981
for (int i = 0; i < 4; i++) {
1397713982
// Find the ByteProvider that provides the ith byte of the result of OR
1397813983
std::optional<ByteProvider<SDValue>> P =
1397913984
calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
13980-
// TODO support constantZero
13981-
if (!P || P->isConstantZero())
13985+
if (!P)
1398213986
return SDValue();
1398313987

1398413988
PermNodes.push_back(*P);
1398513989
}
13986-
if (PermNodes.size() != 4)
13987-
return SDValue();
1398813990

13989-
std::pair<unsigned, unsigned> FirstSrc(0, PermNodes[0].SrcOffset / 4);
13990-
std::optional<std::pair<unsigned, unsigned>> SecondSrc;
13991+
static auto isSameSrc = [](SDValue SrcA, unsigned DWordA, SDValue SrcB,
13992+
unsigned DWordB) {
13993+
// If the Src uses a byte from a different DWORD, then it corresponds
13994+
// with a difference source
13995+
return SrcA == SrcB && DWordA == DWordB;
13996+
};
13997+
13998+
SDValue Src0, Src1;
13999+
unsigned DWord0, DWord1;
1399114000
uint64_t PermMask = 0x00000000;
1399214001
for (size_t i = 0; i < PermNodes.size(); i++) {
13993-
auto PermOp = PermNodes[i];
13994-
// Since the mask is applied to Src1:Src2, Src1 bytes must be offset
13995-
// by sizeof(Src2) = 4
13996-
int SrcByteAdjust = 4;
14002+
ByteProvider<SDValue> PermOp = PermNodes[i];
14003+
if (PermOp.isConstantZero()) {
14004+
PermMask |= 0x0c << (i * 8);
14005+
continue;
14006+
}
1399714007

13998-
// If the Src uses a byte from a different DWORD, then it corresponds
13999-
// with a difference source
14000-
if (!PermOp.hasSameSrc(PermNodes[FirstSrc.first]) ||
14001-
((PermOp.SrcOffset / 4) != FirstSrc.second)) {
14002-
if (SecondSrc)
14003-
if (!PermOp.hasSameSrc(PermNodes[SecondSrc->first]) ||
14004-
((PermOp.SrcOffset / 4) != SecondSrc->second))
14005-
return SDValue();
14008+
const SDValue SrcI = PermOp.Src.value();
14009+
const unsigned DWordI = PermOp.SrcOffset / 4;
14010+
const unsigned ByteI = PermOp.SrcOffset % 4;
14011+
if (!Src0) {
14012+
Src0 = SrcI;
14013+
DWord0 = DWordI;
14014+
}
1400614015

14007-
// Set the index of the second distinct Src node
14008-
SecondSrc = {i, PermNodes[i].SrcOffset / 4};
14009-
assert(!(PermNodes[SecondSrc->first].Src->getValueSizeInBits() % 8));
14010-
SrcByteAdjust = 0;
14016+
if (!isSameSrc(Src0, DWord0, SrcI, DWordI)) {
14017+
if (!Src1) {
14018+
Src1 = SrcI;
14019+
DWord1 = DWordI;
14020+
} else if (!isSameSrc(Src1, DWord1, SrcI, DWordI))
14021+
return SDValue();
1401114022
}
14012-
assert((PermOp.SrcOffset % 4) + SrcByteAdjust < 8);
14013-
assert(!DAG.getDataLayout().isBigEndian());
14014-
PermMask |= ((PermOp.SrcOffset % 4) + SrcByteAdjust) << (i * 8);
14023+
14024+
// Since the mask is applied to Src0:Src1, Src0 bytes must be offset
14025+
// by sizeof(Src1) = 4
14026+
const int SrcByteAdjust = SrcI == Src0 ? 4 : 0;
14027+
assert(ByteI + SrcByteAdjust < 8);
14028+
PermMask |= (ByteI + SrcByteAdjust) << (i * 8);
1401514029
}
14030+
1401614031
SDLoc DL(N);
14017-
SDValue Op = *PermNodes[FirstSrc.first].Src;
14018-
Op = getDWordFromOffset(DAG, DL, Op, FirstSrc.second);
14032+
SDValue Op = Src0;
14033+
Op = getDWordFromOffset(DAG, DL, Op, DWord0);
1401914034
assert(Op.getValueSizeInBits() == 32);
1402014035

1402114036
// Check that we are not just extracting the bytes in order from an op
14022-
if (!SecondSrc) {
14037+
if (!Src1) {
1402314038
int Low16 = PermMask & 0xffff;
1402414039
int Hi16 = (PermMask & 0xffff0000) >> 16;
1402514040

@@ -14031,12 +14046,12 @@ static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
1403114046
return DAG.getBitcast(MVT::getIntegerVT(32), Op);
1403214047
}
1403314048

14034-
SDValue OtherOp = SecondSrc ? *PermNodes[SecondSrc->first].Src : Op;
14035-
14036-
if (SecondSrc) {
14037-
OtherOp = getDWordFromOffset(DAG, DL, OtherOp, SecondSrc->second);
14049+
SDValue OtherOp;
14050+
if (Src1) {
14051+
OtherOp = getDWordFromOffset(DAG, DL, Src1, DWord1);
1403814052
assert(OtherOp.getValueSizeInBits() == 32);
14039-
}
14053+
} else
14054+
OtherOp = Op;
1404014055

1404114056
if (hasNon16BitAccesses(PermMask, Op, OtherOp)) {
1404214057

0 commit comments

Comments
 (0)