@@ -2680,9 +2680,10 @@ define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) {
26802680
26812681define <8 x i16 > @sse2_psra_w_128_masked (<8 x i16 > %v , <8 x i16 > %a ) {
26822682; CHECK-LABEL: @sse2_psra_w_128_masked(
2683- ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
2684- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
2685- ; CHECK-NEXT: ret <8 x i16> [[TMP2]]
2683+ ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
2684+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> zeroinitializer
2685+ ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[TMP2]]
2686+ ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
26862687;
26872688 %1 = and <8 x i16 > %a , <i16 15 , i16 0 , i16 0 , i16 0 , i16 undef , i16 undef , i16 undef , i16 undef >
26882689 %2 = tail call <8 x i16 > @llvm.x86.sse2.psra.w (<8 x i16 > %v , <8 x i16 > %1 )
@@ -2691,9 +2692,10 @@ define <8 x i16> @sse2_psra_w_128_masked(<8 x i16> %v, <8 x i16> %a) {
26912692
26922693define <8 x i32 > @avx2_psra_d_256_masked (<8 x i32 > %v , <4 x i32 > %a ) {
26932694; CHECK-LABEL: @avx2_psra_d_256_masked(
2694- ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 0, i32 undef, i32 undef>
2695- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
2696- ; CHECK-NEXT: ret <8 x i32> [[TMP2]]
2695+ ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 undef, i32 undef, i32 undef>
2696+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <8 x i32> zeroinitializer
2697+ ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP2]]
2698+ ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
26972699;
26982700 %1 = and <4 x i32 > %a , <i32 31 , i32 0 , i32 undef , i32 undef >
26992701 %2 = tail call <8 x i32 > @llvm.x86.avx2.psra.d (<8 x i32 > %v , <4 x i32 > %1 )
@@ -2703,8 +2705,9 @@ define <8 x i32> @avx2_psra_d_256_masked(<8 x i32> %v, <4 x i32> %a) {
27032705define <8 x i64 > @avx512_psra_q_512_masked (<8 x i64 > %v , <2 x i64 > %a ) {
27042706; CHECK-LABEL: @avx512_psra_q_512_masked(
27052707; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 undef>
2706- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[TMP1]])
2707- ; CHECK-NEXT: ret <8 x i64> [[TMP2]]
2708+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> undef, <8 x i32> zeroinitializer
2709+ ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[TMP2]]
2710+ ; CHECK-NEXT: ret <8 x i64> [[TMP3]]
27082711;
27092712 %1 = and <2 x i64 > %a , <i64 63 , i64 undef >
27102713 %2 = tail call <8 x i64 > @llvm.x86.avx512.psra.q.512 (<8 x i64 > %v , <2 x i64 > %1 )
@@ -2713,9 +2716,10 @@ define <8 x i64> @avx512_psra_q_512_masked(<8 x i64> %v, <2 x i64> %a) {
27132716
27142717define <4 x i32 > @sse2_psrl_d_128_masked (<4 x i32 > %v , <4 x i32 > %a ) {
27152718; CHECK-LABEL: @sse2_psrl_d_128_masked(
2716- ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 0, i32 undef, i32 undef>
2717- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
2718- ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
2719+ ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 undef, i32 undef, i32 undef>
2720+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
2721+ ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[V:%.*]], [[TMP2]]
2722+ ; CHECK-NEXT: ret <4 x i32> [[TMP3]]
27192723;
27202724 %1 = and <4 x i32 > %a , <i32 31 , i32 0 , i32 undef , i32 undef >
27212725 %2 = tail call <4 x i32 > @llvm.x86.sse2.psrl.d (<4 x i32 > %v , <4 x i32 > %1 )
@@ -2725,8 +2729,9 @@ define <4 x i32> @sse2_psrl_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
27252729define <4 x i64 > @avx2_psrl_q_256_masked (<4 x i64 > %v , <2 x i64 > %a ) {
27262730; CHECK-LABEL: @avx2_psrl_q_256_masked(
27272731; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 undef>
2728- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[V:%.*]], <2 x i64> [[TMP1]])
2729- ; CHECK-NEXT: ret <4 x i64> [[TMP2]]
2732+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> undef, <4 x i32> zeroinitializer
2733+ ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[TMP2]]
2734+ ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
27302735;
27312736 %1 = and <2 x i64 > %a , <i64 63 , i64 undef >
27322737 %2 = tail call <4 x i64 > @llvm.x86.avx2.psrl.q (<4 x i64 > %v , <2 x i64 > %1 )
@@ -2735,9 +2740,10 @@ define <4 x i64> @avx2_psrl_q_256_masked(<4 x i64> %v, <2 x i64> %a) {
27352740
27362741define <32 x i16 > @avx512_psrl_w_512_masked (<32 x i16 > %v , <8 x i16 > %a ) {
27372742; CHECK-LABEL: @avx512_psrl_w_512_masked(
2738- ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
2739- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
2740- ; CHECK-NEXT: ret <32 x i16> [[TMP2]]
2743+ ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
2744+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <32 x i32> zeroinitializer
2745+ ; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[TMP2]]
2746+ ; CHECK-NEXT: ret <32 x i16> [[TMP3]]
27412747;
27422748 %1 = and <8 x i16 > %a , <i16 15 , i16 0 , i16 0 , i16 0 , i16 undef , i16 undef , i16 undef , i16 undef >
27432749 %2 = tail call <32 x i16 > @llvm.x86.avx512.psrl.w.512 (<32 x i16 > %v , <8 x i16 > %1 )
@@ -2747,8 +2753,9 @@ define <32 x i16> @avx512_psrl_w_512_masked(<32 x i16> %v, <8 x i16> %a) {
27472753define <2 x i64 > @sse2_psll_q_128_masked (<2 x i64 > %v , <2 x i64 > %a ) {
27482754; CHECK-LABEL: @sse2_psll_q_128_masked(
27492755; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 undef>
2750- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[TMP1]])
2751- ; CHECK-NEXT: ret <2 x i64> [[TMP2]]
2756+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> undef, <2 x i32> zeroinitializer
2757+ ; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP2]]
2758+ ; CHECK-NEXT: ret <2 x i64> [[TMP3]]
27522759;
27532760 %1 = and <2 x i64 > %a , <i64 63 , i64 undef >
27542761 %2 = tail call <2 x i64 > @llvm.x86.sse2.psll.q (<2 x i64 > %v , <2 x i64 > %1 )
@@ -2757,9 +2764,10 @@ define <2 x i64> @sse2_psll_q_128_masked(<2 x i64> %v, <2 x i64> %a) {
27572764
27582765define <16 x i16 > @avx2_psll_w_256_masked (<16 x i16 > %v , <8 x i16 > %a ) {
27592766; CHECK-LABEL: @avx2_psll_w_256_masked(
2760- ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
2761- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
2762- ; CHECK-NEXT: ret <16 x i16> [[TMP2]]
2767+ ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
2768+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <16 x i32> zeroinitializer
2769+ ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[TMP2]]
2770+ ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
27632771;
27642772 %1 = and <8 x i16 > %a , <i16 15 , i16 0 , i16 0 , i16 0 , i16 undef , i16 undef , i16 undef , i16 undef >
27652773 %2 = tail call <16 x i16 > @llvm.x86.avx2.psll.w (<16 x i16 > %v , <8 x i16 > %1 )
@@ -2768,9 +2776,10 @@ define <16 x i16> @avx2_psll_w_256_masked(<16 x i16> %v, <8 x i16> %a) {
27682776
27692777define <16 x i32 > @avx512_psll_d_512_masked (<16 x i32 > %v , <4 x i32 > %a ) {
27702778; CHECK-LABEL: @avx512_psll_d_512_masked(
2771- ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 0, i32 undef, i32 undef>
2772- ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
2773- ; CHECK-NEXT: ret <16 x i32> [[TMP2]]
2779+ ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 undef, i32 undef, i32 undef>
2780+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <16 x i32> zeroinitializer
2781+ ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i32> [[V:%.*]], [[TMP2]]
2782+ ; CHECK-NEXT: ret <16 x i32> [[TMP3]]
27742783;
27752784 %1 = and <4 x i32 > %a , <i32 31 , i32 0 , i32 undef , i32 undef >
27762785 %2 = tail call <16 x i32 > @llvm.x86.avx512.psll.d.512 (<16 x i32 > %v , <4 x i32 > %1 )
0 commit comments