@@ -755,3 +755,64 @@ define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) {
755755 %r = udiv <4 x i1 > %x , %y
756756 ret <4 x i1 > %r
757757}
758+
759+ define <4 x i32 > @vector_div_leading_zeros (<4 x i32 > %x ) {
760+ ; SSE2-LABEL: vector_div_leading_zeros:
761+ ; SSE2: # %bb.0:
762+ ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
763+ ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
764+ ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
765+ ; SSE2-NEXT: pmuludq %xmm1, %xmm0
766+ ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
767+ ; SSE2-NEXT: pmuludq %xmm1, %xmm2
768+ ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
769+ ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
770+ ; SSE2-NEXT: retq
771+ ;
772+ ; SSE41-LABEL: vector_div_leading_zeros:
773+ ; SSE41: # %bb.0:
774+ ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
775+ ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
776+ ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
777+ ; SSE41-NEXT: pmuludq %xmm2, %xmm1
778+ ; SSE41-NEXT: pmuludq %xmm2, %xmm0
779+ ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
780+ ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
781+ ; SSE41-NEXT: retq
782+ ;
783+ ; AVX1-LABEL: vector_div_leading_zeros:
784+ ; AVX1: # %bb.0:
785+ ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
786+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
787+ ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
788+ ; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
789+ ; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
790+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
791+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
792+ ; AVX1-NEXT: retq
793+ ;
794+ ; AVX2-LABEL: vector_div_leading_zeros:
795+ ; AVX2: # %bb.0:
796+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
797+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
798+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
799+ ; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
800+ ; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
801+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
802+ ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
803+ ; AVX2-NEXT: retq
804+ ;
805+ ; XOP-LABEL: vector_div_leading_zeros:
806+ ; XOP: # %bb.0:
807+ ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
808+ ; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
809+ ; XOP-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
810+ ; XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
811+ ; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
812+ ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
813+ ; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
814+ ; XOP-NEXT: retq
815+ %a = and <4 x i32 > %x , <i32 255 , i32 255 , i32 255 , i32 255 >
816+ %b = udiv <4 x i32 > %a , <i32 7 , i32 7 , i32 7 , i32 7 >
817+ ret <4 x i32 > %b
818+ }
0 commit comments