@@ -755,3 +755,85 @@ define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) {
755755 %r = udiv <4 x i1 > %x , %y
756756 ret <4 x i1 > %r
757757}
758+
759+ define <4 x i32 > @vector_div_leading_zeros (<4 x i32 > %x ) {
760+ ; SSE2-LABEL: vector_div_leading_zeros:
761+ ; SSE2: # %bb.0:
762+ ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
763+ ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
764+ ; SSE2-NEXT: movdqa %xmm0, %xmm2
765+ ; SSE2-NEXT: pmuludq %xmm1, %xmm2
766+ ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
767+ ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
768+ ; SSE2-NEXT: pmuludq %xmm1, %xmm3
769+ ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
770+ ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
771+ ; SSE2-NEXT: psubd %xmm2, %xmm0
772+ ; SSE2-NEXT: psrld $1, %xmm0
773+ ; SSE2-NEXT: paddd %xmm2, %xmm0
774+ ; SSE2-NEXT: psrld $2, %xmm0
775+ ; SSE2-NEXT: retq
776+ ;
777+ ; SSE41-LABEL: vector_div_leading_zeros:
778+ ; SSE41: # %bb.0:
779+ ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
780+ ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
781+ ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
782+ ; SSE41-NEXT: pmuludq %xmm2, %xmm1
783+ ; SSE41-NEXT: pmuludq %xmm0, %xmm2
784+ ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
785+ ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
786+ ; SSE41-NEXT: psubd %xmm2, %xmm0
787+ ; SSE41-NEXT: psrld $1, %xmm0
788+ ; SSE41-NEXT: paddd %xmm2, %xmm0
789+ ; SSE41-NEXT: psrld $2, %xmm0
790+ ; SSE41-NEXT: retq
791+ ;
792+ ; AVX1-LABEL: vector_div_leading_zeros:
793+ ; AVX1: # %bb.0:
794+ ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
795+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
796+ ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
797+ ; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
798+ ; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
799+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
800+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
801+ ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
802+ ; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
803+ ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
804+ ; AVX1-NEXT: vpsrld $2, %xmm0, %xmm0
805+ ; AVX1-NEXT: retq
806+ ;
807+ ; AVX2-LABEL: vector_div_leading_zeros:
808+ ; AVX2: # %bb.0:
809+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
810+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
811+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
812+ ; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
813+ ; AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
814+ ; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
815+ ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
816+ ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
817+ ; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
818+ ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
819+ ; AVX2-NEXT: vpsrld $2, %xmm0, %xmm0
820+ ; AVX2-NEXT: retq
821+ ;
822+ ; XOP-LABEL: vector_div_leading_zeros:
823+ ; XOP: # %bb.0:
824+ ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
825+ ; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
826+ ; XOP-NEXT: vbroadcastss {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
827+ ; XOP-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
828+ ; XOP-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
829+ ; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
830+ ; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
831+ ; XOP-NEXT: vpsubd %xmm1, %xmm0, %xmm0
832+ ; XOP-NEXT: vpsrld $1, %xmm0, %xmm0
833+ ; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm0
834+ ; XOP-NEXT: vpsrld $2, %xmm0, %xmm0
835+ ; XOP-NEXT: retq
836+ %a = and <4 x i32 > %x , <i32 255 , i32 255 , i32 255 , i32 255 >
837+ %b = udiv <4 x i32 > %a , <i32 7 , i32 7 , i32 7 , i32 7 >
838+ ret <4 x i32 > %b
839+ }
0 commit comments