diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 4896b8ed2595b..3a49f95d3f117 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1859,13 +1859,19 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { if (!FrontU) return false; + // Helper to peek through bitcasts to the same value. + auto IsEquiv = [&](Value *X, Value *Y) { + return X->getType() == Y->getType() && + peekThroughBitcasts(X) == peekThroughBitcasts(Y); + }; + // Look for an identity value. if (FrontLane == 0 && cast(FrontU->get()->getType())->getNumElements() == Ty->getNumElements() && - all_of(drop_begin(enumerate(Item)), [Item](const auto &E) { + all_of(drop_begin(enumerate(Item)), [IsEquiv, Item](const auto &E) { Value *FrontV = Item.front().first->get(); - return !E.value().first || (E.value().first->get() == FrontV && + return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) && E.value().second == (int)E.index()); })) { IdentityLeafs.insert(FrontU); diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll index 011d51600b51f..c2ed7b9c84523 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll @@ -88,20 +88,10 @@ define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64> ; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32> ; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP5]], <4 x i32> [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP11]], <4 x i32> [[TMP9]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP13]] to <4 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]] +; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[RES]] ; %a.bc = bitcast <4 x i64> %a to <32 x i8> @@ -129,20 +119,10 @@ define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64 ; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16> ; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP5]], <8 x i16> [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[A]] to <16 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP8]], <16 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[B]] to <16 x i16> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i16> [[TMP11]], <8 x i16> [[TMP9]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> [[TMP12]], <16 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP13]] to <4 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]] +; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[RES]] ; %a.bc = bitcast <4 x i64> %a to <32 x i8> @@ -276,20 +256,10 @@ define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64 ; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> ; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP5]], <8 x i32> [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[A]] to <16 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[B]] to <16 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP11]], <8 x i32> [[TMP9]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP12]], <16 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP13]] to <8 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]] +; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64> ; CHECK-NEXT: ret <8 x i64> [[RES]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> @@ -317,20 +287,10 @@ define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i6 ; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16> ; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]] -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP5]], <16 x i16> [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[A]] to <32 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i16> [[TMP8]], <32 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[B]] to <32 x i16> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <32 x i16> [[TMP10]], <32 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i16> [[TMP11]], <16 x i16> [[TMP9]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> [[TMP12]], <32 x i32> -; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP13]] to <8 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]] +; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64> ; CHECK-NEXT: ret <8 x i64> [[RES]] ; %a.bc = bitcast <8 x i64> %a to <64 x i8> diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 1775c9b7a3a9f..af04fb0ab4621 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -993,25 +993,15 @@ define void @maximal_legal_fpmath(ptr %addr1, ptr %addr2, ptr %result, float %va ret void } -; TODO: Peek through (repeated) bitcasts to find a common source value. +; Peek through (repeated) bitcasts to find a common source value. define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: @bitcast_smax_v8i32_v4i32( ; CHECK-NEXT: [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> ; CHECK-NEXT: [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]] -; CHECK-NEXT: [[CMP_LO:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[CMP_HI:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> ; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> ; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> -; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC1]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC1]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[LO:%.*]] = select <4 x i1> [[CMP_LO]], <4 x i32> [[B_LO]], <4 x i32> [[A_LO]] -; CHECK-NEXT: [[A_BC2:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> -; CHECK-NEXT: [[B_BC2:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> -; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <8 x i32> [[A_BC2]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <8 x i32> [[B_BC2]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[HI:%.*]] = select <4 x i1> [[CMP_HI]], <4 x i32> [[B_HI]], <4 x i32> [[A_HI]] -; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> +; CHECK-NEXT: [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]] ; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[RES]] ;