@@ -350,10 +350,8 @@ define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind {
350350; RV32I-LABEL: ctpop_i32_eq_one:
351351; RV32I: # %bb.0:
352352; RV32I-NEXT: addi a1, a0, -1
353- ; RV32I-NEXT: and a1, a0, a1
354- ; RV32I-NEXT: seqz a1, a1
355- ; RV32I-NEXT: snez a0, a0
356- ; RV32I-NEXT: and a0, a0, a1
353+ ; RV32I-NEXT: xor a0, a0, a1
354+ ; RV32I-NEXT: sltu a0, a1, a0
357355; RV32I-NEXT: ret
358356;
359357; RV32ZBB-LABEL: ctpop_i32_eq_one:
@@ -371,10 +369,9 @@ define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind {
371369; RV32I-LABEL: ctpop_i32_ne_one:
372370; RV32I: # %bb.0:
373371; RV32I-NEXT: addi a1, a0, -1
374- ; RV32I-NEXT: and a1, a0, a1
375- ; RV32I-NEXT: snez a1, a1
376- ; RV32I-NEXT: seqz a0, a0
377- ; RV32I-NEXT: or a0, a0, a1
372+ ; RV32I-NEXT: xor a0, a0, a1
373+ ; RV32I-NEXT: sltu a0, a1, a0
374+ ; RV32I-NEXT: xori a0, a0, 1
378375; RV32I-NEXT: ret
379376;
380377; RV32ZBB-LABEL: ctpop_i32_ne_one:
@@ -508,15 +505,11 @@ define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
508505; RV32I-LABEL: ctpop_v2i32_eq_one:
509506; RV32I: # %bb.0:
510507; RV32I-NEXT: addi a2, a0, -1
511- ; RV32I-NEXT: and a2, a0, a2
512- ; RV32I-NEXT: seqz a2, a2
513- ; RV32I-NEXT: snez a0, a0
514- ; RV32I-NEXT: and a0, a0, a2
508+ ; RV32I-NEXT: xor a0, a0, a2
509+ ; RV32I-NEXT: sltu a0, a2, a0
515510; RV32I-NEXT: addi a2, a1, -1
516- ; RV32I-NEXT: and a2, a1, a2
517- ; RV32I-NEXT: seqz a2, a2
518- ; RV32I-NEXT: snez a1, a1
519- ; RV32I-NEXT: and a1, a1, a2
511+ ; RV32I-NEXT: xor a1, a1, a2
512+ ; RV32I-NEXT: sltu a1, a2, a1
520513; RV32I-NEXT: ret
521514;
522515; RV32ZBB-LABEL: ctpop_v2i32_eq_one:
@@ -537,15 +530,13 @@ define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
537530; RV32I-LABEL: ctpop_v2i32_ne_one:
538531; RV32I: # %bb.0:
539532; RV32I-NEXT: addi a2, a0, -1
540- ; RV32I-NEXT: and a2, a0, a2
541- ; RV32I-NEXT: snez a2, a2
542- ; RV32I-NEXT: seqz a0, a0
543- ; RV32I-NEXT: or a0, a0, a2
533+ ; RV32I-NEXT: xor a0, a0, a2
534+ ; RV32I-NEXT: sltu a0, a2, a0
535+ ; RV32I-NEXT: xori a0, a0, 1
544536; RV32I-NEXT: addi a2, a1, -1
545- ; RV32I-NEXT: and a2, a1, a2
546- ; RV32I-NEXT: snez a2, a2
547- ; RV32I-NEXT: seqz a1, a1
548- ; RV32I-NEXT: or a1, a1, a2
537+ ; RV32I-NEXT: xor a1, a1, a2
538+ ; RV32I-NEXT: sltu a1, a2, a1
539+ ; RV32I-NEXT: xori a1, a1, 1
549540; RV32I-NEXT: ret
550541;
551542; RV32ZBB-LABEL: ctpop_v2i32_ne_one:
@@ -685,16 +676,17 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
685676define i1 @ctpop_i64_eq_one (i64 %a ) nounwind {
686677; RV32I-LABEL: ctpop_i64_eq_one:
687678; RV32I: # %bb.0:
688- ; RV32I-NEXT: addi a2, a0, -1
689- ; RV32I-NEXT: and a2, a0, a2
690- ; RV32I-NEXT: seqz a3, a0
691- ; RV32I-NEXT: sub a3, a1, a3
692- ; RV32I-NEXT: and a3, a1, a3
693- ; RV32I-NEXT: or a2, a2, a3
694- ; RV32I-NEXT: seqz a2, a2
695- ; RV32I-NEXT: or a0, a0, a1
696- ; RV32I-NEXT: snez a0, a0
697- ; RV32I-NEXT: and a0, a0, a2
679+ ; RV32I-NEXT: beqz a1, .LBB17_2
680+ ; RV32I-NEXT: # %bb.1:
681+ ; RV32I-NEXT: seqz a0, a0
682+ ; RV32I-NEXT: sub a0, a1, a0
683+ ; RV32I-NEXT: xor a1, a1, a0
684+ ; RV32I-NEXT: sltu a0, a0, a1
685+ ; RV32I-NEXT: ret
686+ ; RV32I-NEXT: .LBB17_2:
687+ ; RV32I-NEXT: addi a1, a0, -1
688+ ; RV32I-NEXT: xor a0, a0, a1
689+ ; RV32I-NEXT: sltu a0, a1, a0
698690; RV32I-NEXT: ret
699691;
700692; RV32ZBB-LABEL: ctpop_i64_eq_one:
@@ -713,16 +705,19 @@ define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
713705define i1 @ctpop_i64_ne_one (i64 %a ) nounwind {
714706; RV32I-LABEL: ctpop_i64_ne_one:
715707; RV32I: # %bb.0:
716- ; RV32I-NEXT: addi a2, a0, -1
717- ; RV32I-NEXT: and a2, a0, a2
718- ; RV32I-NEXT: seqz a3, a0
719- ; RV32I-NEXT: sub a3, a1, a3
720- ; RV32I-NEXT: and a3, a1, a3
721- ; RV32I-NEXT: or a2, a2, a3
722- ; RV32I-NEXT: snez a2, a2
723- ; RV32I-NEXT: or a0, a0, a1
708+ ; RV32I-NEXT: beqz a1, .LBB18_2
709+ ; RV32I-NEXT: # %bb.1:
724710; RV32I-NEXT: seqz a0, a0
725- ; RV32I-NEXT: or a0, a0, a2
711+ ; RV32I-NEXT: sub a0, a1, a0
712+ ; RV32I-NEXT: xor a1, a1, a0
713+ ; RV32I-NEXT: sltu a0, a0, a1
714+ ; RV32I-NEXT: xori a0, a0, 1
715+ ; RV32I-NEXT: ret
716+ ; RV32I-NEXT: .LBB18_2:
717+ ; RV32I-NEXT: addi a1, a0, -1
718+ ; RV32I-NEXT: xor a0, a0, a1
719+ ; RV32I-NEXT: sltu a0, a1, a0
720+ ; RV32I-NEXT: xori a0, a0, 1
726721; RV32I-NEXT: ret
727722;
728723; RV32ZBB-LABEL: ctpop_i64_ne_one:
@@ -950,30 +945,34 @@ define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
950945define <2 x i1 > @ctpop_v2i64_eq_one (<2 x i64 > %a ) nounwind {
951946; RV32I-LABEL: ctpop_v2i64_eq_one:
952947; RV32I: # %bb.0:
953- ; RV32I-NEXT: lw a1, 0(a0)
948+ ; RV32I-NEXT: mv a1, a0
954949; RV32I-NEXT: lw a2, 12(a0)
955- ; RV32I-NEXT: lw a3, 8(a0)
956950; RV32I-NEXT: lw a0, 4(a0)
957- ; RV32I-NEXT: addi a4, a1, -1
958- ; RV32I-NEXT: and a4, a1, a4
959- ; RV32I-NEXT: seqz a5, a1
960- ; RV32I-NEXT: sub a5, a0, a5
961- ; RV32I-NEXT: and a5, a0, a5
962- ; RV32I-NEXT: or a4, a4, a5
963- ; RV32I-NEXT: seqz a4, a4
964- ; RV32I-NEXT: or a0, a1, a0
965- ; RV32I-NEXT: snez a0, a0
966- ; RV32I-NEXT: and a0, a0, a4
967- ; RV32I-NEXT: addi a1, a3, -1
968- ; RV32I-NEXT: and a1, a3, a1
969- ; RV32I-NEXT: seqz a4, a3
970- ; RV32I-NEXT: sub a4, a2, a4
971- ; RV32I-NEXT: and a4, a2, a4
972- ; RV32I-NEXT: or a1, a1, a4
951+ ; RV32I-NEXT: lw a3, 0(a1)
952+ ; RV32I-NEXT: beqz a0, .LBB22_3
953+ ; RV32I-NEXT: # %bb.1:
954+ ; RV32I-NEXT: seqz a3, a3
955+ ; RV32I-NEXT: sub a3, a0, a3
956+ ; RV32I-NEXT: xor a0, a0, a3
957+ ; RV32I-NEXT: sltu a0, a3, a0
958+ ; RV32I-NEXT: lw a1, 8(a1)
959+ ; RV32I-NEXT: bnez a2, .LBB22_4
960+ ; RV32I-NEXT: .LBB22_2:
961+ ; RV32I-NEXT: addi a2, a1, -1
962+ ; RV32I-NEXT: xor a1, a1, a2
963+ ; RV32I-NEXT: sltu a1, a2, a1
964+ ; RV32I-NEXT: ret
965+ ; RV32I-NEXT: .LBB22_3:
966+ ; RV32I-NEXT: addi a0, a3, -1
967+ ; RV32I-NEXT: xor a3, a3, a0
968+ ; RV32I-NEXT: sltu a0, a0, a3
969+ ; RV32I-NEXT: lw a1, 8(a1)
970+ ; RV32I-NEXT: beqz a2, .LBB22_2
971+ ; RV32I-NEXT: .LBB22_4:
973972; RV32I-NEXT: seqz a1, a1
974- ; RV32I-NEXT: or a2, a3, a2
975- ; RV32I-NEXT: snez a2, a2
976- ; RV32I-NEXT: and a1, a2, a1
973+ ; RV32I-NEXT: sub a1, a2, a1
974+ ; RV32I-NEXT: xor a2, a2, a1
975+ ; RV32I-NEXT: sltu a1, a1, a2
977976; RV32I-NEXT: ret
978977;
979978; RV32ZBB-LABEL: ctpop_v2i64_eq_one:
@@ -1001,30 +1000,36 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
10011000define <2 x i1 > @ctpop_v2i64_ne_one (<2 x i64 > %a ) nounwind {
10021001; RV32I-LABEL: ctpop_v2i64_ne_one:
10031002; RV32I: # %bb.0:
1004- ; RV32I-NEXT: lw a1, 0(a0)
1005- ; RV32I-NEXT: lw a2, 12(a0)
1003+ ; RV32I-NEXT: lw a1, 12(a0)
1004+ ; RV32I-NEXT: lw a2, 4(a0)
1005+ ; RV32I-NEXT: lw a3, 0(a0)
1006+ ; RV32I-NEXT: beqz a2, .LBB23_2
1007+ ; RV32I-NEXT: # %bb.1:
1008+ ; RV32I-NEXT: seqz a3, a3
1009+ ; RV32I-NEXT: sub a3, a2, a3
1010+ ; RV32I-NEXT: xor a2, a2, a3
1011+ ; RV32I-NEXT: sltu a2, a3, a2
1012+ ; RV32I-NEXT: j .LBB23_3
1013+ ; RV32I-NEXT: .LBB23_2:
1014+ ; RV32I-NEXT: addi a2, a3, -1
1015+ ; RV32I-NEXT: xor a3, a3, a2
1016+ ; RV32I-NEXT: sltu a2, a2, a3
1017+ ; RV32I-NEXT: .LBB23_3:
10061018; RV32I-NEXT: lw a3, 8(a0)
1007- ; RV32I-NEXT: lw a0, 4(a0)
1008- ; RV32I-NEXT: addi a4, a1, -1
1009- ; RV32I-NEXT: and a4, a1, a4
1010- ; RV32I-NEXT: seqz a5, a1
1011- ; RV32I-NEXT: sub a5, a0, a5
1012- ; RV32I-NEXT: and a5, a0, a5
1013- ; RV32I-NEXT: or a4, a4, a5
1014- ; RV32I-NEXT: snez a4, a4
1015- ; RV32I-NEXT: or a0, a1, a0
1016- ; RV32I-NEXT: seqz a0, a0
1017- ; RV32I-NEXT: or a0, a0, a4
1019+ ; RV32I-NEXT: xori a0, a2, 1
1020+ ; RV32I-NEXT: beqz a1, .LBB23_5
1021+ ; RV32I-NEXT: # %bb.4:
1022+ ; RV32I-NEXT: seqz a2, a3
1023+ ; RV32I-NEXT: sub a2, a1, a2
1024+ ; RV32I-NEXT: xor a1, a1, a2
1025+ ; RV32I-NEXT: sltu a1, a2, a1
1026+ ; RV32I-NEXT: xori a1, a1, 1
1027+ ; RV32I-NEXT: ret
1028+ ; RV32I-NEXT: .LBB23_5:
10181029; RV32I-NEXT: addi a1, a3, -1
1019- ; RV32I-NEXT: and a1, a3, a1
1020- ; RV32I-NEXT: seqz a4, a3
1021- ; RV32I-NEXT: sub a4, a2, a4
1022- ; RV32I-NEXT: and a4, a2, a4
1023- ; RV32I-NEXT: or a1, a1, a4
1024- ; RV32I-NEXT: snez a1, a1
1025- ; RV32I-NEXT: or a2, a3, a2
1026- ; RV32I-NEXT: seqz a2, a2
1027- ; RV32I-NEXT: or a1, a2, a1
1030+ ; RV32I-NEXT: xor a3, a3, a1
1031+ ; RV32I-NEXT: sltu a1, a1, a3
1032+ ; RV32I-NEXT: xori a1, a1, 1
10281033; RV32I-NEXT: ret
10291034;
10301035; RV32ZBB-LABEL: ctpop_v2i64_ne_one:
0 commit comments