Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 83e97cb

Browse files
committed
[X86] Don't emit *_extend_vector_inreg nodes when both the input and output types are legal with AVX1
We already have custom lowering for the AVX case in LegalizeVectorOps. So its better to keep the regular extend op around as long as possible. I had to qualify one place in DAG combine that created illegal vector extending load operations. This change by itself had no effect on any tests which is why its included here. I've made a few cleanups to the custom lowering. The sign extend code no longer creates an identity shuffle with undef elements. The zero extend code now emits a zero_extend_vector_inreg instead of an unpckl with a zero vector. For the high half of the custom lowering of zero_extend/any_extend, we're now using an unpckh with a zero vector or undef. Previously we used used a pshufd to move the upper 64-bits to the lower 64-bits and then used a zero_extend_vector_inreg. I think the zero vector should require less execution resources and be smaller code size. Differential Revision: https://reviews.llvm.org/D54024 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346043 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent e29a02a commit 83e97cb

File tree

12 files changed

+315
-348
lines changed

12 files changed

+315
-348
lines changed

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8391,7 +8391,7 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
83918391

83928392
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
83938393
EVT MemVT = LN0->getMemoryVT();
8394-
if ((LegalOperations || LN0->isVolatile()) &&
8394+
if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
83958395
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
83968396
return {};
83978397

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17446,27 +17446,26 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
1744617446
// Optimize vectors in AVX mode:
1744717447
//
1744817448
// v8i16 -> v8i32
17449-
// Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
17449+
// Use vpmovzwd for 4 lower elements v8i16 -> v4i32.
1745017450
// Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
1745117451
// Concat upper and lower parts.
1745217452
//
1745317453
// v4i32 -> v4i64
17454-
// Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
17454+
// Use vpmovzdq for 4 lower elements v4i32 -> v2i64.
1745517455
// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
1745617456
// Concat upper and lower parts.
1745717457
//
1745817458

17459-
SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
17459+
MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
17460+
VT.getVectorNumElements() / 2);
17461+
17462+
SDValue OpLo = DAG.getZeroExtendVectorInReg(In, dl, HalfVT);
17463+
17464+
SDValue ZeroVec = DAG.getConstant(0, dl, InVT);
1746017465
SDValue Undef = DAG.getUNDEF(InVT);
1746117466
bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
17462-
SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
1746317467
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
17464-
17465-
MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
17466-
VT.getVectorNumElements()/2);
17467-
17468-
OpLo = DAG.getBitcast(HVT, OpLo);
17469-
OpHi = DAG.getBitcast(HVT, OpHi);
17468+
OpHi = DAG.getBitcast(HalfVT, OpHi);
1747017469

1747117470
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
1747217471
}
@@ -19878,29 +19877,21 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
1987819877
// v4i32 to v4i64
1987919878
//
1988019879
// Divide input vector into two parts
19881-
// for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
19880+
// for v4i32 the high shuffle mask will be {2, 3, -1, -1}
1988219881
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
1988319882
// concat the vectors to original VT
1988419883

19885-
unsigned NumElems = InVT.getVectorNumElements();
19886-
SDValue Undef = DAG.getUNDEF(InVT);
19887-
19888-
SmallVector<int,8> ShufMask1(NumElems, -1);
19889-
for (unsigned i = 0; i != NumElems/2; ++i)
19890-
ShufMask1[i] = i;
19884+
MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
19885+
VT.getVectorNumElements() / 2);
1989119886

19892-
SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask1);
19887+
SDValue OpLo = DAG.getSignExtendVectorInReg(In, dl, HalfVT);
1989319888

19894-
SmallVector<int,8> ShufMask2(NumElems, -1);
19889+
unsigned NumElems = InVT.getVectorNumElements();
19890+
SmallVector<int,8> ShufMask(NumElems, -1);
1989519891
for (unsigned i = 0; i != NumElems/2; ++i)
19896-
ShufMask2[i] = i + NumElems/2;
19897-
19898-
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask2);
19899-
19900-
MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
19901-
VT.getVectorNumElements() / 2);
19892+
ShufMask[i] = i + NumElems/2;
1990219893

19903-
OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT);
19894+
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
1990419895
OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT);
1990519896

1990619897
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
@@ -38323,7 +38314,7 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
3832338314

3832438315
// On AVX2+ targets, if the input/output types are both legal then we will be
3832538316
// able to use SIGN_EXTEND/ZERO_EXTEND directly.
38326-
if (Subtarget.hasInt256() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
38317+
if (DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
3832738318
DAG.getTargetLoweringInfo().isTypeLegal(InVT))
3832838319
return SDValue();
3832938320

0 commit comments

Comments
 (0)