@@ -764,3 +764,47 @@ define <16 x float> @mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_
764764 %res = shufflevector <4 x float > %a , <4 x float > undef , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 0 , i32 1 , i32 2 , i32 3 , i32 0 , i32 1 , i32 2 , i32 3 , i32 0 , i32 1 , i32 2 , i32 3 >
765765 ret <16 x float > %res
766766}
767+
768+ %struct.foo = type { [4 x double ], [3 x [4 x double ]], [4 x double ] }
769+
770+ ; This test previously hung in shuffle combining. https://github.com/ispc/ispc/issues/1864
771+ define void @ispc_1864 (<16 x float >* %arg ) {
772+ ; ALL-LABEL: ispc_1864:
773+ ; ALL: # %bb.0: # %bb
774+ ; ALL-NEXT: pushq %rbp
775+ ; ALL-NEXT: .cfi_def_cfa_offset 16
776+ ; ALL-NEXT: .cfi_offset %rbp, -16
777+ ; ALL-NEXT: movq %rsp, %rbp
778+ ; ALL-NEXT: .cfi_def_cfa_register %rbp
779+ ; ALL-NEXT: andq $-64, %rsp
780+ ; ALL-NEXT: subq $4864, %rsp # imm = 0x1300
781+ ; ALL-NEXT: vbroadcastss {{.*#+}} ymm0 = [-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0]
782+ ; ALL-NEXT: vmulps 32(%rdi), %ymm0, %ymm0
783+ ; ALL-NEXT: vcvtps2pd %ymm0, %zmm0
784+ ; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,4,5,0,1,0,1]
785+ ; ALL-NEXT: vmovapd %ymm0, {{[0-9]+}}(%rsp)
786+ ; ALL-NEXT: movq %rbp, %rsp
787+ ; ALL-NEXT: popq %rbp
788+ ; ALL-NEXT: .cfi_def_cfa %rsp, 8
789+ ; ALL-NEXT: vzeroupper
790+ ; ALL-NEXT: retq
791+ bb:
792+ %tmp = alloca [30 x %struct.foo ], align 64
793+ %tmp1 = load <16 x float >, <16 x float >* %arg , align 4
794+ %tmp2 = fmul <16 x float > %tmp1 , <float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 , float -5 .000000e+00 >
795+ %tmp3 = fpext <16 x float > %tmp2 to <16 x double >
796+ %tmp4 = getelementptr inbounds [30 x %struct.foo ], [30 x %struct.foo ]* %tmp , i64 0 , i64 3 , i32 2 , i64 0
797+ %tmp5 = extractelement <16 x double > %tmp3 , i32 10
798+ store double %tmp5 , double * %tmp4 , align 32
799+ %tmp6 = getelementptr inbounds [30 x %struct.foo ], [30 x %struct.foo ]* %tmp , i64 0 , i64 3 , i32 2 , i64 1
800+ %tmp7 = extractelement <16 x double > %tmp3 , i32 11
801+ store double %tmp7 , double * %tmp6 , align 8
802+ %tmp8 = getelementptr inbounds [30 x %struct.foo ], [30 x %struct.foo ]* %tmp , i64 0 , i64 3 , i32 2 , i64 2
803+ %tmp9 = extractelement <16 x double > %tmp3 , i32 12
804+ store double %tmp9 , double * %tmp8 , align 16
805+ %tmp10 = getelementptr inbounds [30 x %struct.foo ], [30 x %struct.foo ]* %tmp , i64 0 , i64 3 , i32 2 , i64 3
806+ %tmp11 = extractelement <16 x double > %tmp3 , i32 13
807+ store double %tmp11 , double * %tmp10 , align 8
808+ ret void
809+ }
810+
0 commit comments