Skip to content

Commit 3f76497

Browse files
committed
[X86] Move combineIncDecVector logic from Select to PreprocessISelDAG.
This allows it to work properly with masked inc/dec for avx512. Those would have a vselect as the root node so didn't get a chance to call combineIncDecVector. This also simplifies the logic because we don't have to manage the topological ordering.
1 parent f8045b2 commit 3f76497

File tree

2 files changed

+58
-51
lines changed

2 files changed

+58
-51
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 32 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,6 @@ namespace {
513513
bool shrinkAndImmediate(SDNode *N);
514514
bool isMaskZeroExtended(SDNode *N) const;
515515
bool tryShiftAmountMod(SDNode *N);
516-
bool combineIncDecVector(SDNode *Node);
517516
bool tryShrinkShlLogicImm(SDNode *N);
518517
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
519518
bool tryMatchBitSelect(SDNode *N);
@@ -809,6 +808,38 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
809808
continue;
810809
}
811810

811+
/// Convert vector increment or decrement to sub/add with an all-ones
812+
/// constant:
813+
/// add X, <1, 1...> --> sub X, <-1, -1...>
814+
/// sub X, <1, 1...> --> add X, <-1, -1...>
815+
/// The all-ones vector constant can be materialized using a pcmpeq
816+
/// instruction that is commonly recognized as an idiom (has no register
817+
/// dependency), so that's better/smaller than loading a splat 1 constant.
818+
if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
819+
N->getSimpleValueType(0).isVector()) {
820+
821+
APInt SplatVal;
822+
if (X86::isConstantSplat(N->getOperand(1), SplatVal) &&
823+
SplatVal.isOneValue()) {
824+
SDLoc DL(N);
825+
826+
MVT VT = N->getSimpleValueType(0);
827+
unsigned NumElts = VT.getSizeInBits() / 32;
828+
SDValue AllOnes =
829+
CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts));
830+
AllOnes = CurDAG->getBitcast(VT, AllOnes);
831+
832+
unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
833+
SDValue Res =
834+
CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes);
835+
--I;
836+
CurDAG->ReplaceAllUsesWith(N, Res.getNode());
837+
++I;
838+
CurDAG->DeleteNode(N);
839+
continue;
840+
}
841+
}
842+
812843
switch (N->getOpcode()) {
813844
case ISD::FP_ROUND:
814845
case ISD::STRICT_FP_ROUND:
@@ -3899,52 +3930,6 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
38993930
return true;
39003931
}
39013932

3902-
/// Convert vector increment or decrement to sub/add with an all-ones constant:
3903-
/// add X, <1, 1...> --> sub X, <-1, -1...>
3904-
/// sub X, <1, 1...> --> add X, <-1, -1...>
3905-
/// The all-ones vector constant can be materialized using a pcmpeq instruction
3906-
/// that is commonly recognized as an idiom (has no register dependency), so
3907-
/// that's better/smaller than loading a splat 1 constant.
3908-
bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) {
3909-
assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) &&
3910-
"Unexpected opcode for increment/decrement transform");
3911-
3912-
EVT VT = Node->getValueType(0);
3913-
assert(VT.isVector() && "Should only be called for vectors.");
3914-
3915-
SDValue X = Node->getOperand(0);
3916-
SDValue OneVec = Node->getOperand(1);
3917-
3918-
APInt SplatVal;
3919-
if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue())
3920-
return false;
3921-
3922-
SDLoc DL(Node);
3923-
SDValue OneConstant, AllOnesVec;
3924-
3925-
APInt Ones = APInt::getAllOnesValue(32);
3926-
assert(VT.getSizeInBits() % 32 == 0 &&
3927-
"Expected bit count to be a multiple of 32");
3928-
OneConstant = CurDAG->getConstant(Ones, DL, MVT::i32);
3929-
insertDAGNode(*CurDAG, X, OneConstant);
3930-
3931-
unsigned NumElts = VT.getSizeInBits() / 32;
3932-
assert(NumElts > 0 && "Expected to get non-empty vector.");
3933-
AllOnesVec = CurDAG->getSplatBuildVector(MVT::getVectorVT(MVT::i32, NumElts),
3934-
DL, OneConstant);
3935-
insertDAGNode(*CurDAG, X, AllOnesVec);
3936-
3937-
AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec);
3938-
insertDAGNode(*CurDAG, X, AllOnesVec);
3939-
3940-
unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
3941-
SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec);
3942-
3943-
ReplaceNode(Node, NewNode.getNode());
3944-
SelectCode(NewNode.getNode());
3945-
return true;
3946-
}
3947-
39483933
/// If the high bits of an 'and' operand are known zero, try setting the
39493934
/// high bits of an 'and' constant operand to produce a smaller encoding by
39503935
/// creating a small, sign-extended negative immediate rather than a large
@@ -4579,10 +4564,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
45794564
LLVM_FALLTHROUGH;
45804565
case ISD::ADD:
45814566
case ISD::SUB: {
4582-
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() &&
4583-
combineIncDecVector(Node))
4584-
return;
4585-
45864567
// Try to avoid folding immediates with multiple uses for optsize.
45874568
// This code tries to select to register form directly to avoid going
45884569
// through the isel table which might fold the immediate. We can't change

llvm/test/CodeGen/X86/avx512-arith.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,3 +1153,29 @@ define <16 x float> @fabs_v16f32(<16 x float> %p)
11531153
ret <16 x float> %t
11541154
}
11551155
declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1156+
1157+
define <16 x i32> @masked_inc_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
1158+
; CHECK-LABEL: masked_inc_test:
1159+
; CHECK: # %bb.0:
1160+
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
1161+
; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
1162+
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1}
1163+
; CHECK-NEXT: retq
1164+
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
1165+
%x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1166+
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
1167+
ret <16 x i32> %r
1168+
}
1169+
1170+
define <16 x i32> @masked_dec_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
1171+
; CHECK-LABEL: masked_dec_test:
1172+
; CHECK: # %bb.0:
1173+
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
1174+
; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
1175+
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
1176+
; CHECK-NEXT: retq
1177+
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
1178+
%x = sub <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1179+
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
1180+
ret <16 x i32> %r
1181+
}

0 commit comments

Comments
 (0)