-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[DAG] combineTruncationShuffle - ensure the *_EXTEND_VECTOR_INREG node didn't come from a smaller type #164160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…e didn't come from a smaller type The *_EXTEND_VECTOR_INREG source vector must be the same size as the destination We already have a similar TODO to handle more types. Fixes llvm#164107
|
@llvm/pr-subscribers-llvm-selectiondag Author: Simon Pilgrim (RKSimon) ChangesThe *_EXTEND_VECTOR_INREG source vector must be the same size as the destination We already have a similar TODO to handle more types. Fixes #164107 Full diff: https://github.com/llvm/llvm-project/pull/164160.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c97300d64d455..6bf9008c3d677 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26876,6 +26876,8 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
// TODO: handle more extension/truncation cases as cases arise.
if (EltSizeInBits != ExtSrcSizeInBits)
return SDValue();
+ if (VT.getSizeInBits() != N00.getValueSizeInBits())
+ return SDValue();
// We can remove *extend_vector_inreg only if the truncation happens at
// the same scale as the extension.
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 3279a50a1265b..7a08f3ef116bd 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -624,6 +624,52 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, ptr n
ret void
}
+define i32 @PR164107(<16 x i1> %0) {
+; AVX1-LABEL: PR164107:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX2-LABEL: PR164107:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: PR164107:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
+; AVX512-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k1
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
+; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: ret{{[l|q]}}
+ %cmp = shufflevector <16 x i1> %0, <16 x i1> zeroinitializer, <16 x i32> zeroinitializer
+ %sext = sext <16 x i1> %cmp to <16 x i64>
+ %bc.1 = bitcast <16 x i64> %sext to <64 x i16>
+ %vecinit15.i = shufflevector <64 x i16> %bc.1, <64 x i16> zeroinitializer, <16 x i32> <i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
+ %conv16.i = sext <16 x i16> %vecinit15.i to <16 x i64>
+ %bc.2 = bitcast <16 x i64> %conv16.i to <32 x i32>
+ %conv22.i = extractelement <32 x i32> %bc.2, i64 4
+ ret i32 %conv22.i
+}
+
define <4 x i64> @concat_self_v4i64(<2 x i64> %x) {
; AVX1-LABEL: concat_self_v4i64:
; AVX1: # %bb.0:
|
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesThe *_EXTEND_VECTOR_INREG source vector must be the same size as the destination We already have a similar TODO to handle more types. Fixes #164107 Full diff: https://github.com/llvm/llvm-project/pull/164160.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c97300d64d455..6bf9008c3d677 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26876,6 +26876,8 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
// TODO: handle more extension/truncation cases as cases arise.
if (EltSizeInBits != ExtSrcSizeInBits)
return SDValue();
+ if (VT.getSizeInBits() != N00.getValueSizeInBits())
+ return SDValue();
// We can remove *extend_vector_inreg only if the truncation happens at
// the same scale as the extension.
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 3279a50a1265b..7a08f3ef116bd 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -624,6 +624,52 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, ptr n
ret void
}
+define i32 @PR164107(<16 x i1> %0) {
+; AVX1-LABEL: PR164107:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX2-LABEL: PR164107:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
+; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: PR164107:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
+; AVX512-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k1
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
+; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm0
+; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: ret{{[l|q]}}
+ %cmp = shufflevector <16 x i1> %0, <16 x i1> zeroinitializer, <16 x i32> zeroinitializer
+ %sext = sext <16 x i1> %cmp to <16 x i64>
+ %bc.1 = bitcast <16 x i64> %sext to <64 x i16>
+ %vecinit15.i = shufflevector <64 x i16> %bc.1, <64 x i16> zeroinitializer, <16 x i32> <i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
+ %conv16.i = sext <16 x i16> %vecinit15.i to <16 x i64>
+ %bc.2 = bitcast <16 x i64> %conv16.i to <32 x i32>
+ %conv22.i = extractelement <32 x i32> %bc.2, i64 4
+ ret i32 %conv22.i
+}
+
define <4 x i64> @concat_self_v4i64(<2 x i64> %x) {
; AVX1-LABEL: concat_self_v4i64:
; AVX1: # %bb.0:
|
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/19885 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/22068 Here is the relevant piece of the build log for the reference |
…e didn't come from a smaller type (llvm#164160) The *_EXTEND_VECTOR_INREG source vector must be the same size as the destination We already have a similar TODO to handle more types. Fixes llvm#164107
…e didn't come from a smaller type (llvm#164160) The *_EXTEND_VECTOR_INREG source vector must be the same size as the destination We already have a similar TODO to handle more types. Fixes llvm#164107
The *_EXTEND_VECTOR_INREG source vector must be the same size as the destination
We already have a similar TODO to handle more types.
Fixes #164107