Skip to content

Commit 806028a

Browse files
authored
[GlobaISel] Allow expanding of sdiv -> mul by constant (#146504)
Allows expand of sdiv->mul by constant combine for the general case. Previously this was only occurring in the exact case. This is part of the resolution to issue #118090
1 parent df38766 commit 806028a

File tree

10 files changed

+2406
-295
lines changed

10 files changed

+2406
-295
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,10 @@ class CombinerHelper {
143143
/// Query is legal on the target.
144144
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const;
145145

146+
/// \return true if \p Query is legal on the target, or if \p Query will
147+
/// perform WidenScalar action on the target.
148+
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const;
149+
146150
/// \return true if the combine is running prior to legalization, or if \p Ty
147151
/// is a legal integer constant type on the target.
148152
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const;

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,13 +1131,13 @@ def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
11311131

11321132
def udiv_by_const : GICombineRule<
11331133
(defs root:$root),
1134-
(match (wip_match_opcode G_UDIV):$root,
1134+
(match (G_UDIV $dst, $x, $y):$root,
11351135
[{ return Helper.matchUDivorURemByConst(*${root}); }]),
11361136
(apply [{ Helper.applyUDivorURemByConst(*${root}); }])>;
11371137

11381138
def sdiv_by_const : GICombineRule<
11391139
(defs root:$root),
1140-
(match (wip_match_opcode G_SDIV):$root,
1140+
(match (G_SDIV $dst, $x, $y):$root,
11411141
[{ return Helper.matchSDivByConst(*${root}); }]),
11421142
(apply [{ Helper.applySDivByConst(*${root}); }])>;
11431143

@@ -1153,8 +1153,8 @@ def udiv_by_pow2 : GICombineRule<
11531153
[{ return Helper.matchDivByPow2(*${root}, /*IsSigned=*/false); }]),
11541154
(apply [{ Helper.applyUDivByPow2(*${root}); }])>;
11551155

1156-
def intdiv_combines : GICombineGroup<[udiv_by_const, sdiv_by_const,
1157-
sdiv_by_pow2, udiv_by_pow2]>;
1156+
def intdiv_combines : GICombineGroup<[udiv_by_pow2, sdiv_by_pow2,
1157+
udiv_by_const, sdiv_by_const,]>;
11581158

11591159
def urem_by_const : GICombineRule<
11601160
(defs root:$root),
@@ -2054,9 +2054,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
20542054
div_rem_to_divrem, funnel_shift_combines, bitreverse_shift, commute_shift,
20552055
form_bitfield_extract, constant_fold_binops, constant_fold_fma,
20562056
constant_fold_cast_op, fabs_fneg_fold,
2057-
intdiv_combines, mulh_combines, redundant_neg_operands,
2057+
mulh_combines, redundant_neg_operands,
20582058
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
2059-
intrem_combines, sub_add_reg, select_to_minmax,
2059+
intrem_combines, intdiv_combines, sub_add_reg, select_to_minmax,
20602060
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
20612061
simplify_neg_minmax, combine_concat_vector,
20622062
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 114 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,11 @@ bool CombinerHelper::isLegalOrBeforeLegalizer(
162162
return isPreLegalize() || isLegal(Query);
163163
}
164164

165+
bool CombinerHelper::isLegalOrHasWidenScalar(const LegalityQuery &Query) const {
166+
return isLegal(Query) ||
167+
LI->getAction(Query).Action == LegalizeActions::WidenScalar;
168+
}
169+
165170
bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const {
166171
if (!Ty.isVector())
167172
return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
@@ -5522,6 +5527,8 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) const {
55225527
Register Dst = MI.getOperand(0).getReg();
55235528
Register RHS = MI.getOperand(2).getReg();
55245529
LLT DstTy = MRI.getType(Dst);
5530+
auto SizeInBits = DstTy.getScalarSizeInBits();
5531+
LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
55255532

55265533
auto &MF = *MI.getMF();
55275534
AttributeList Attr = MF.getFunction().getAttributes();
@@ -5541,8 +5548,21 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) const {
55415548
MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
55425549
}
55435550

5544-
// Don't support the general case for now.
5545-
return false;
5551+
auto *RHSDef = MRI.getVRegDef(RHS);
5552+
if (!isConstantOrConstantVector(*RHSDef, MRI))
5553+
return false;
5554+
5555+
// Don't do this if the types are not going to be legal.
5556+
if (LI) {
5557+
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5558+
return false;
5559+
if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5560+
!isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5561+
return false;
5562+
}
5563+
5564+
return matchUnaryPredicate(
5565+
MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
55465566
}
55475567

55485568
void CombinerHelper::applySDivByConst(MachineInstr &MI) const {
@@ -5558,21 +5578,22 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) const {
55585578
Register RHS = SDiv.getReg(2);
55595579
LLT Ty = MRI.getType(Dst);
55605580
LLT ScalarTy = Ty.getScalarType();
5581+
const unsigned EltBits = ScalarTy.getScalarSizeInBits();
55615582
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
55625583
LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
55635584
auto &MIB = Builder;
55645585

55655586
bool UseSRA = false;
5566-
SmallVector<Register, 16> Shifts, Factors;
5587+
SmallVector<Register, 16> ExactShifts, ExactFactors;
55675588

5568-
auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5569-
bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5589+
auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5590+
bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
55705591

5571-
auto BuildSDIVPattern = [&](const Constant *C) {
5592+
auto BuildExactSDIVPattern = [&](const Constant *C) {
55725593
// Don't recompute inverses for each splat element.
5573-
if (IsSplat && !Factors.empty()) {
5574-
Shifts.push_back(Shifts[0]);
5575-
Factors.push_back(Factors[0]);
5594+
if (IsSplat && !ExactFactors.empty()) {
5595+
ExactShifts.push_back(ExactShifts[0]);
5596+
ExactFactors.push_back(ExactFactors[0]);
55765597
return true;
55775598
}
55785599

@@ -5587,31 +5608,104 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) const {
55875608
// Calculate the multiplicative inverse modulo BW.
55885609
// 2^W requires W + 1 bits, so we have to extend and then truncate.
55895610
APInt Factor = Divisor.multiplicativeInverse();
5590-
Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5591-
Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5611+
ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5612+
ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
55925613
return true;
55935614
};
55945615

5595-
// Collect all magic values from the build vector.
5616+
if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5617+
// Collect all magic values from the build vector.
5618+
bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5619+
(void)Matched;
5620+
assert(Matched && "Expected unary predicate match to succeed");
5621+
5622+
Register Shift, Factor;
5623+
if (Ty.isVector()) {
5624+
Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5625+
Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5626+
} else {
5627+
Shift = ExactShifts[0];
5628+
Factor = ExactFactors[0];
5629+
}
5630+
5631+
Register Res = LHS;
5632+
5633+
if (UseSRA)
5634+
Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5635+
5636+
return MIB.buildMul(Ty, Res, Factor);
5637+
}
5638+
5639+
SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5640+
5641+
auto BuildSDIVPattern = [&](const Constant *C) {
5642+
auto *CI = cast<ConstantInt>(C);
5643+
const APInt &Divisor = CI->getValue();
5644+
5645+
SignedDivisionByConstantInfo Magics =
5646+
SignedDivisionByConstantInfo::get(Divisor);
5647+
int NumeratorFactor = 0;
5648+
int ShiftMask = -1;
5649+
5650+
if (Divisor.isOne() || Divisor.isAllOnes()) {
5651+
// If d is +1/-1, we just multiply the numerator by +1/-1.
5652+
NumeratorFactor = Divisor.getSExtValue();
5653+
Magics.Magic = 0;
5654+
Magics.ShiftAmount = 0;
5655+
ShiftMask = 0;
5656+
} else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5657+
// If d > 0 and m < 0, add the numerator.
5658+
NumeratorFactor = 1;
5659+
} else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5660+
// If d < 0 and m > 0, subtract the numerator.
5661+
NumeratorFactor = -1;
5662+
}
5663+
5664+
MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5665+
Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5666+
Shifts.push_back(
5667+
MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5668+
ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5669+
5670+
return true;
5671+
};
5672+
5673+
// Collect the shifts/magic values from each element.
55965674
bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
55975675
(void)Matched;
55985676
assert(Matched && "Expected unary predicate match to succeed");
55995677

5600-
Register Shift, Factor;
5601-
if (Ty.isVector()) {
5602-
Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5678+
Register MagicFactor, Factor, Shift, ShiftMask;
5679+
auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5680+
if (RHSDef) {
5681+
MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
56035682
Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5683+
Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5684+
ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
56045685
} else {
5605-
Shift = Shifts[0];
5686+
assert(MRI.getType(RHS).isScalar() &&
5687+
"Non-build_vector operation should have been a scalar");
5688+
MagicFactor = MagicFactors[0];
56065689
Factor = Factors[0];
5690+
Shift = Shifts[0];
5691+
ShiftMask = ShiftMasks[0];
56075692
}
56085693

5609-
Register Res = LHS;
5694+
Register Q = LHS;
5695+
Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5696+
5697+
// (Optionally) Add/subtract the numerator using Factor.
5698+
Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
5699+
Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
56105700

5611-
if (UseSRA)
5612-
Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5701+
// Shift right algebraic by shift value.
5702+
Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
56135703

5614-
return MIB.buildMul(Ty, Res, Factor);
5704+
// Extract the sign bit, mask it and add it to the quotient.
5705+
auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
5706+
auto T = MIB.buildLShr(Ty, Q, SignShift);
5707+
T = MIB.buildAnd(Ty, T, ShiftMask);
5708+
return MIB.buildAdd(Ty, Q, T);
56155709
}
56165710

56175711
bool CombinerHelper::matchDivByPow2(MachineInstr &MI, bool IsSigned) const {

llvm/test/CodeGen/AArch64/GlobalISel/combine-sdiv.mir

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,14 @@ body: |
4545
; CHECK: liveins: $w0
4646
; CHECK-NEXT: {{ $}}
4747
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
48-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 104
49-
; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[C]]
50-
; CHECK-NEXT: $w0 = COPY [[SDIV]](s32)
48+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1321528399
49+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
50+
; CHECK-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[COPY]], [[C]]
51+
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SMULH]], [[C1]](s32)
52+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
53+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ASHR]], [[C2]](s32)
54+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[LSHR]]
55+
; CHECK-NEXT: $w0 = COPY [[ADD]](s32)
5156
; CHECK-NEXT: RET_ReallyLR implicit $w0
5257
%0:_(s32) = COPY $w0
5358
%1:_(s32) = G_CONSTANT i32 104

llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll

Lines changed: 24 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -15,56 +15,13 @@ define <16 x i8> @div16xi8(<16 x i8> %x) {
1515
;
1616
; CHECK-GI-LABEL: div16xi8:
1717
; CHECK-GI: // %bb.0:
18-
; CHECK-GI-NEXT: smov w9, v0.b[0]
19-
; CHECK-GI-NEXT: mov w8, #25 // =0x19
20-
; CHECK-GI-NEXT: smov w10, v0.b[1]
21-
; CHECK-GI-NEXT: smov w11, v0.b[2]
22-
; CHECK-GI-NEXT: smov w12, v0.b[3]
23-
; CHECK-GI-NEXT: smov w13, v0.b[4]
24-
; CHECK-GI-NEXT: smov w14, v0.b[5]
25-
; CHECK-GI-NEXT: smov w15, v0.b[6]
26-
; CHECK-GI-NEXT: smov w16, v0.b[7]
27-
; CHECK-GI-NEXT: smov w17, v0.b[8]
28-
; CHECK-GI-NEXT: smov w18, v0.b[9]
29-
; CHECK-GI-NEXT: sdiv w9, w9, w8
30-
; CHECK-GI-NEXT: sdiv w10, w10, w8
31-
; CHECK-GI-NEXT: fmov s1, w9
32-
; CHECK-GI-NEXT: sdiv w11, w11, w8
33-
; CHECK-GI-NEXT: mov v1.b[1], w10
34-
; CHECK-GI-NEXT: smov w10, v0.b[10]
35-
; CHECK-GI-NEXT: sdiv w12, w12, w8
36-
; CHECK-GI-NEXT: mov v1.b[2], w11
37-
; CHECK-GI-NEXT: smov w11, v0.b[11]
38-
; CHECK-GI-NEXT: sdiv w13, w13, w8
39-
; CHECK-GI-NEXT: mov v1.b[3], w12
40-
; CHECK-GI-NEXT: smov w12, v0.b[12]
41-
; CHECK-GI-NEXT: sdiv w14, w14, w8
42-
; CHECK-GI-NEXT: mov v1.b[4], w13
43-
; CHECK-GI-NEXT: smov w13, v0.b[13]
44-
; CHECK-GI-NEXT: sdiv w15, w15, w8
45-
; CHECK-GI-NEXT: mov v1.b[5], w14
46-
; CHECK-GI-NEXT: sdiv w16, w16, w8
47-
; CHECK-GI-NEXT: mov v1.b[6], w15
48-
; CHECK-GI-NEXT: sdiv w17, w17, w8
49-
; CHECK-GI-NEXT: mov v1.b[7], w16
50-
; CHECK-GI-NEXT: sdiv w9, w18, w8
51-
; CHECK-GI-NEXT: mov v1.b[8], w17
52-
; CHECK-GI-NEXT: sdiv w10, w10, w8
53-
; CHECK-GI-NEXT: mov v1.b[9], w9
54-
; CHECK-GI-NEXT: smov w9, v0.b[14]
55-
; CHECK-GI-NEXT: sdiv w11, w11, w8
56-
; CHECK-GI-NEXT: mov v1.b[10], w10
57-
; CHECK-GI-NEXT: smov w10, v0.b[15]
58-
; CHECK-GI-NEXT: sdiv w12, w12, w8
59-
; CHECK-GI-NEXT: mov v1.b[11], w11
60-
; CHECK-GI-NEXT: sdiv w13, w13, w8
61-
; CHECK-GI-NEXT: mov v1.b[12], w12
62-
; CHECK-GI-NEXT: sdiv w9, w9, w8
63-
; CHECK-GI-NEXT: mov v1.b[13], w13
64-
; CHECK-GI-NEXT: sdiv w8, w10, w8
65-
; CHECK-GI-NEXT: mov v1.b[14], w9
66-
; CHECK-GI-NEXT: mov v1.b[15], w8
67-
; CHECK-GI-NEXT: mov v0.16b, v1.16b
18+
; CHECK-GI-NEXT: movi v1.16b, #41
19+
; CHECK-GI-NEXT: smull2 v2.8h, v0.16b, v1.16b
20+
; CHECK-GI-NEXT: smull v0.8h, v0.8b, v1.8b
21+
; CHECK-GI-NEXT: uzp2 v1.16b, v0.16b, v2.16b
22+
; CHECK-GI-NEXT: sshr v0.16b, v1.16b, #2
23+
; CHECK-GI-NEXT: ushr v0.16b, v0.16b, #7
24+
; CHECK-GI-NEXT: ssra v0.16b, v1.16b, #2
6825
; CHECK-GI-NEXT: ret
6926
%div = sdiv <16 x i8> %x, <i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25>
7027
ret <16 x i8> %div
@@ -85,32 +42,15 @@ define <8 x i16> @div8xi16(<8 x i16> %x) {
8542
;
8643
; CHECK-GI-LABEL: div8xi16:
8744
; CHECK-GI: // %bb.0:
88-
; CHECK-GI-NEXT: smov w9, v0.h[0]
89-
; CHECK-GI-NEXT: mov w8, #6577 // =0x19b1
90-
; CHECK-GI-NEXT: smov w10, v0.h[1]
91-
; CHECK-GI-NEXT: smov w11, v0.h[2]
92-
; CHECK-GI-NEXT: smov w12, v0.h[3]
93-
; CHECK-GI-NEXT: smov w13, v0.h[4]
94-
; CHECK-GI-NEXT: smov w14, v0.h[5]
95-
; CHECK-GI-NEXT: sdiv w9, w9, w8
96-
; CHECK-GI-NEXT: sdiv w10, w10, w8
97-
; CHECK-GI-NEXT: fmov s1, w9
98-
; CHECK-GI-NEXT: sdiv w11, w11, w8
99-
; CHECK-GI-NEXT: mov v1.h[1], w10
100-
; CHECK-GI-NEXT: smov w10, v0.h[6]
101-
; CHECK-GI-NEXT: sdiv w12, w12, w8
102-
; CHECK-GI-NEXT: mov v1.h[2], w11
103-
; CHECK-GI-NEXT: smov w11, v0.h[7]
104-
; CHECK-GI-NEXT: sdiv w13, w13, w8
105-
; CHECK-GI-NEXT: mov v1.h[3], w12
106-
; CHECK-GI-NEXT: sdiv w9, w14, w8
107-
; CHECK-GI-NEXT: mov v1.h[4], w13
108-
; CHECK-GI-NEXT: sdiv w10, w10, w8
109-
; CHECK-GI-NEXT: mov v1.h[5], w9
110-
; CHECK-GI-NEXT: sdiv w8, w11, w8
111-
; CHECK-GI-NEXT: mov v1.h[6], w10
112-
; CHECK-GI-NEXT: mov v1.h[7], w8
113-
; CHECK-GI-NEXT: mov v0.16b, v1.16b
45+
; CHECK-GI-NEXT: adrp x8, .LCPI1_0
46+
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
47+
; CHECK-GI-NEXT: smull2 v2.4s, v0.8h, v1.8h
48+
; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h
49+
; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h
50+
; CHECK-GI-NEXT: add v1.8h, v1.8h, v0.8h
51+
; CHECK-GI-NEXT: sshr v0.8h, v1.8h, #12
52+
; CHECK-GI-NEXT: ushr v0.8h, v0.8h, #15
53+
; CHECK-GI-NEXT: ssra v0.8h, v1.8h, #12
11454
; CHECK-GI-NEXT: ret
11555
%div = sdiv <8 x i16> %x, <i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577>
11656
ret <8 x i16> %div
@@ -131,20 +71,14 @@ define <4 x i32> @div32xi4(<4 x i32> %x) {
13171
;
13272
; CHECK-GI-LABEL: div32xi4:
13373
; CHECK-GI: // %bb.0:
134-
; CHECK-GI-NEXT: fmov w9, s0
135-
; CHECK-GI-NEXT: mov w8, #39957 // =0x9c15
136-
; CHECK-GI-NEXT: mov w10, v0.s[1]
137-
; CHECK-GI-NEXT: movk w8, #145, lsl #16
138-
; CHECK-GI-NEXT: mov w11, v0.s[2]
139-
; CHECK-GI-NEXT: mov w12, v0.s[3]
140-
; CHECK-GI-NEXT: sdiv w9, w9, w8
141-
; CHECK-GI-NEXT: sdiv w10, w10, w8
142-
; CHECK-GI-NEXT: fmov s0, w9
143-
; CHECK-GI-NEXT: sdiv w11, w11, w8
144-
; CHECK-GI-NEXT: mov v0.s[1], w10
145-
; CHECK-GI-NEXT: sdiv w8, w12, w8
146-
; CHECK-GI-NEXT: mov v0.s[2], w11
147-
; CHECK-GI-NEXT: mov v0.s[3], w8
74+
; CHECK-GI-NEXT: adrp x8, .LCPI2_0
75+
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
76+
; CHECK-GI-NEXT: smull2 v2.2d, v0.4s, v1.4s
77+
; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
78+
; CHECK-GI-NEXT: uzp2 v1.4s, v0.4s, v2.4s
79+
; CHECK-GI-NEXT: sshr v0.4s, v1.4s, #22
80+
; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #31
81+
; CHECK-GI-NEXT: ssra v0.4s, v1.4s, #22
14882
; CHECK-GI-NEXT: ret
14983
%div = sdiv <4 x i32> %x, <i32 9542677, i32 9542677, i32 9542677, i32 9542677>
15084
ret <4 x i32> %div

0 commit comments

Comments
 (0)