[X86] Prevent shuffle combining from creating an identical X86ISD::SHUF128.

topperc · topperc · commit 35b35a373d01 · 2020-09-04T14:12:49.000-07:00
This can cause an infinite loop if SimplifiedDemandedElts asks for the node to replace itself. A similar protection exists in other places in shuffle combining. Fixes ISPC ispc/ispc#1864
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34909,6 +34909,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
         (Mask[1] < 0 || Mask[3] < 0 || Mask[1] == (Mask[3] % 2));
 
     if (!isAnyZero(Mask) && !PreferPERMQ) {
+      if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
+        return SDValue(); // Nothing to do!
       if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG))
         return DAG.getBitcast(RootVT, V);
     }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -764,3 +764,47 @@ define <16 x float> @mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_
   %res = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   ret <16 x float> %res
 }
+
+%struct.foo = type { [4 x double], [3 x [4 x double]], [4 x double] }
+
+; This test previously hung in shuffle combining. https://github.com/ispc/ispc/issues/1864
+define void @ispc_1864(<16 x float>* %arg) {
+; ALL-LABEL: ispc_1864:
+; ALL:       # %bb.0: # %bb
+; ALL-NEXT:    pushq %rbp
+; ALL-NEXT:    .cfi_def_cfa_offset 16
+; ALL-NEXT:    .cfi_offset %rbp, -16
+; ALL-NEXT:    movq %rsp, %rbp
+; ALL-NEXT:    .cfi_def_cfa_register %rbp
+; ALL-NEXT:    andq $-64, %rsp
+; ALL-NEXT:    subq $4864, %rsp # imm = 0x1300
+; ALL-NEXT:    vbroadcastss {{.*#+}} ymm0 = [-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0]
+; ALL-NEXT:    vmulps 32(%rdi), %ymm0, %ymm0
+; ALL-NEXT:    vcvtps2pd %ymm0, %zmm0
+; ALL-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,4,5,0,1,0,1]
+; ALL-NEXT:    vmovapd %ymm0, {{[0-9]+}}(%rsp)
+; ALL-NEXT:    movq %rbp, %rsp
+; ALL-NEXT:    popq %rbp
+; ALL-NEXT:    .cfi_def_cfa %rsp, 8
+; ALL-NEXT:    vzeroupper
+; ALL-NEXT:    retq
+bb:
+  %tmp = alloca [30 x %struct.foo], align 64
+  %tmp1 = load <16 x float>, <16 x float>* %arg, align 4
+  %tmp2 = fmul <16 x float> %tmp1, <float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00>
+  %tmp3 = fpext <16 x float> %tmp2 to <16 x double>
+  %tmp4 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 0
+  %tmp5 = extractelement <16 x double> %tmp3, i32 10
+  store double %tmp5, double* %tmp4, align 32
+  %tmp6 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 1
+  %tmp7 = extractelement <16 x double> %tmp3, i32 11
+  store double %tmp7, double* %tmp6, align 8
+  %tmp8 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 2
+  %tmp9 = extractelement <16 x double> %tmp3, i32 12
+  store double %tmp9, double* %tmp8, align 16
+  %tmp10 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 3
+  %tmp11 = extractelement <16 x double> %tmp3, i32 13
+  store double %tmp11, double* %tmp10, align 8
+  ret void
+}
+

Original file line number	Diff line number	Diff line change
`@@ -34909,6 +34909,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,`
`34909`	`34909`	`(Mask[1] < 0 \|\| Mask[3] < 0 \|\| Mask[1] == (Mask[3] % 2));`
`34910`	`34910`
`34911`	`34911`	`if (!isAnyZero(Mask) && !PreferPERMQ) {`
	`34912`	`+ if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)`
	`34913`	`+ return SDValue(); // Nothing to do!`
`34912`	`34914`	`if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG))`
`34913`	`34915`	`return DAG.getBitcast(RootVT, V);`
`34914`	`34916`	`}`