Skip to content

Commit e745f7c

Browse files
committed
[LegalizeTypes] Improve ExpandIntRes_XMULO codegen.
The code previously used two BUILD_PAIRs to concatenate the two UMULO results with 0s in the lower bits to match original VT. Then it created an ADD and a UADDO with the original bit width. Each of those operations need to be expanded since they have illegal types. Since we put 0s in the lower bits before the ADD, the lower half of the ADD result will be 0. So the lower half of the UADDO result is solely determined by the other operand. Since the UADDO need to be split in half, we don't really needd an operation for the lower bits. Unfortunately, we don't see that in type legalization and end up creating something more complicated and DAG combine or lowering aren't always able to recover it. This patch directly generates the narrower ADD and UADDO to avoid needing to legalize them. Now only the MUL is done on the original type. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D97440
1 parent 5a2141e commit e745f7c

File tree

6 files changed

+262
-318
lines changed

6 files changed

+262
-318
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3939,33 +3939,32 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
39393939
// %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
39403940
// %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
39413941
// %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
3942-
// %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh
3943-
// %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 )
3942+
// %4 = add iNh %1.0, %2.0 as iN
3943+
// %5 = { iNh, i1 } @uadd.with.overflow.iNh(iNh %4, iNh %3.HIGH)
39443944
//
3945-
// %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 }
3945+
// %lo = %3.LO
3946+
// %hi = %5.0
3947+
// %ovf = %0 || %1.1 || %2.1 || %5.1
39463948
SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
39473949
SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
39483950
GetExpandedInteger(LHS, LHSLow, LHSHigh);
39493951
GetExpandedInteger(RHS, RHSLow, RHSHigh);
39503952
EVT HalfVT = LHSLow.getValueType();
39513953
EVT BitVT = N->getValueType(1);
3952-
SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT);
3953-
SDVTList VTFullAddO = DAG.getVTList(VT, BitVT);
3954+
SDVTList VTHalfWithO = DAG.getVTList(HalfVT, BitVT);
39543955

39553956
SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
39563957
SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
39573958
DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
39583959
DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));
39593960

3960-
SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow);
3961+
SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, LHSHigh, RHSLow);
39613962
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));
3962-
SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
3963-
One.getValue(0));
39643963

3965-
SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow);
3964+
SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, RHSHigh, LHSLow);
39663965
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));
3967-
SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
3968-
Two.getValue(0));
3966+
3967+
SDValue HighSum = DAG.getNode(ISD::ADD, dl, HalfVT, One, Two);
39693968

39703969
// Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
39713970
// know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this
@@ -3976,10 +3975,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
39763975
SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
39773976
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
39783977
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
3979-
SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh);
3980-
SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four);
3981-
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1));
3982-
SplitInteger(Five, Lo, Hi);
3978+
SplitInteger(Three, Lo, Hi);
3979+
3980+
Hi = DAG.getNode(ISD::UADDO, dl, VTHalfWithO, Hi, HighSum);
3981+
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Hi.getValue(1));
39833982
ReplaceValueWith(SDValue(N, 1), Overflow);
39843983
return;
39853984
}

llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,27 @@
44
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
55
; AARCH-LABEL: muloti_test:
66
; AARCH: // %bb.0: // %start
7-
; AARCH-NEXT: mul x8, x3, x0
8-
; AARCH-NEXT: umulh x9, x0, x2
9-
; AARCH-NEXT: madd x11, x1, x2, x8
10-
; AARCH-NEXT: add x8, x9, x11
11-
; AARCH-NEXT: cmp x8, x9
12-
; AARCH-NEXT: cset w9, lo
13-
; AARCH-NEXT: cmp x11, #0 // =0
14-
; AARCH-NEXT: csel w9, wzr, w9, eq
157
; AARCH-NEXT: cmp x3, #0 // =0
16-
; AARCH-NEXT: umulh x10, x1, x2
17-
; AARCH-NEXT: cset w12, ne
8+
; AARCH-NEXT: umulh x8, x1, x2
9+
; AARCH-NEXT: cset w10, ne
1810
; AARCH-NEXT: cmp x1, #0 // =0
19-
; AARCH-NEXT: umulh x11, x3, x0
20-
; AARCH-NEXT: cset w13, ne
21-
; AARCH-NEXT: cmp xzr, x10
22-
; AARCH-NEXT: and w10, w13, w12
23-
; AARCH-NEXT: cset w12, ne
24-
; AARCH-NEXT: cmp xzr, x11
25-
; AARCH-NEXT: orr w10, w10, w12
11+
; AARCH-NEXT: mul x9, x3, x0
2612
; AARCH-NEXT: cset w11, ne
13+
; AARCH-NEXT: cmp xzr, x8
14+
; AARCH-NEXT: umulh x8, x3, x0
15+
; AARCH-NEXT: madd x9, x1, x2, x9
16+
; AARCH-NEXT: and w10, w11, w10
17+
; AARCH-NEXT: cset w11, ne
18+
; AARCH-NEXT: cmp xzr, x8
19+
; AARCH-NEXT: umulh x8, x0, x2
2720
; AARCH-NEXT: orr w10, w10, w11
28-
; AARCH-NEXT: orr w9, w10, w9
21+
; AARCH-NEXT: cset w11, ne
22+
; AARCH-NEXT: adds x1, x8, x9
23+
; AARCH-NEXT: orr w8, w10, w11
24+
; AARCH-NEXT: cset w9, hs
25+
; AARCH-NEXT: orr w8, w8, w9
2926
; AARCH-NEXT: mul x0, x0, x2
30-
; AARCH-DAG: mov x1, x8
31-
; AARCH-DAG: mov w2, w9
27+
; AARCH-NEXT: mov w2, w8
3228
; AARCH-NEXT: ret
3329
start:
3430
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2

llvm/test/CodeGen/AArch64/vec_umulo.ll

Lines changed: 37 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -316,59 +316,53 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
316316
define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
317317
; CHECK-LABEL: umulo_v2i128:
318318
; CHECK: // %bb.0:
319-
; CHECK-NEXT: mul x9, x7, x2
320-
; CHECK-NEXT: umulh x10, x2, x6
321-
; CHECK-NEXT: madd x9, x3, x6, x9
322-
; CHECK-NEXT: add x15, x10, x9
323-
; CHECK-NEXT: cmp x15, x10
324-
; CHECK-NEXT: cset w10, lo
325-
; CHECK-NEXT: cmp x9, #0 // =0
326-
; CHECK-NEXT: csel w10, wzr, w10, eq
327319
; CHECK-NEXT: cmp x7, #0 // =0
328-
; CHECK-NEXT: umulh x11, x3, x6
329-
; CHECK-NEXT: mul x13, x5, x0
330-
; CHECK-NEXT: cset w17, ne
320+
; CHECK-NEXT: umulh x8, x3, x6
321+
; CHECK-NEXT: cset w13, ne
331322
; CHECK-NEXT: cmp x3, #0 // =0
332-
; CHECK-NEXT: umulh x12, x7, x2
333-
; CHECK-NEXT: umulh x9, x0, x4
334-
; CHECK-NEXT: madd x13, x1, x4, x13
335-
; CHECK-NEXT: cset w18, ne
336-
; CHECK-NEXT: cmp xzr, x11
323+
; CHECK-NEXT: umulh x9, x7, x2
324+
; CHECK-NEXT: mul x10, x7, x2
325+
; CHECK-NEXT: cset w14, ne
326+
; CHECK-NEXT: cmp xzr, x8
337327
; CHECK-NEXT: ldr x8, [sp]
338-
; CHECK-NEXT: add x11, x9, x13
339-
; CHECK-NEXT: and w17, w18, w17
340-
; CHECK-NEXT: cset w18, ne
341-
; CHECK-NEXT: cmp xzr, x12
342-
; CHECK-NEXT: orr w12, w17, w18
343-
; CHECK-NEXT: cset w17, ne
344-
; CHECK-NEXT: cmp x11, x9
345-
; CHECK-NEXT: orr w9, w12, w17
346-
; CHECK-NEXT: cset w12, lo
347-
; CHECK-NEXT: cmp x13, #0 // =0
348-
; CHECK-NEXT: mul x14, x2, x6
349-
; CHECK-NEXT: csel w12, wzr, w12, eq
350-
; CHECK-NEXT: cmp x5, #0 // =0
351-
; CHECK-NEXT: stp x14, x15, [x8, #16]
352-
; CHECK-NEXT: umulh x14, x1, x4
353-
; CHECK-NEXT: cset w13, ne
354-
; CHECK-NEXT: cmp x1, #0 // =0
355-
; CHECK-NEXT: umulh x16, x5, x0
356-
; CHECK-NEXT: cset w17, ne
357-
; CHECK-NEXT: cmp xzr, x14
358-
; CHECK-NEXT: and w13, w17, w13
328+
; CHECK-NEXT: umulh x11, x2, x6
329+
; CHECK-NEXT: madd x10, x3, x6, x10
330+
; CHECK-NEXT: and w13, w14, w13
359331
; CHECK-NEXT: cset w14, ne
360-
; CHECK-NEXT: cmp xzr, x16
332+
; CHECK-NEXT: cmp xzr, x9
361333
; CHECK-NEXT: orr w13, w13, w14
362334
; CHECK-NEXT: cset w14, ne
335+
; CHECK-NEXT: adds x10, x11, x10
336+
; CHECK-NEXT: mul x12, x2, x6
363337
; CHECK-NEXT: orr w13, w13, w14
364-
; CHECK-NEXT: orr w12, w13, w12
338+
; CHECK-NEXT: cset w14, hs
339+
; CHECK-NEXT: cmp x5, #0 // =0
340+
; CHECK-NEXT: umulh x17, x1, x4
341+
; CHECK-NEXT: stp x12, x10, [x8, #16]
342+
; CHECK-NEXT: cset w10, ne
343+
; CHECK-NEXT: cmp x1, #0 // =0
344+
; CHECK-NEXT: umulh x9, x5, x0
345+
; CHECK-NEXT: mul x11, x5, x0
346+
; CHECK-NEXT: cset w12, ne
347+
; CHECK-NEXT: cmp xzr, x17
348+
; CHECK-NEXT: umulh x15, x0, x4
349+
; CHECK-NEXT: madd x11, x1, x4, x11
350+
; CHECK-NEXT: and w10, w12, w10
351+
; CHECK-NEXT: cset w12, ne
352+
; CHECK-NEXT: cmp xzr, x9
353+
; CHECK-NEXT: orr w9, w10, w12
354+
; CHECK-NEXT: cset w10, ne
355+
; CHECK-NEXT: adds x11, x15, x11
356+
; CHECK-NEXT: orr w9, w9, w10
357+
; CHECK-NEXT: cset w10, hs
365358
; CHECK-NEXT: orr w9, w9, w10
366-
; CHECK-NEXT: fmov s0, w12
367-
; CHECK-NEXT: mov v0.s[1], w9
359+
; CHECK-NEXT: orr w10, w13, w14
360+
; CHECK-NEXT: fmov s0, w9
361+
; CHECK-NEXT: mov v0.s[1], w10
368362
; CHECK-NEXT: shl v0.2s, v0.2s, #31
369-
; CHECK-NEXT: mul x15, x0, x4
363+
; CHECK-NEXT: mul x16, x0, x4
370364
; CHECK-NEXT: sshr v0.2s, v0.2s, #31
371-
; CHECK-NEXT: stp x15, x11, [x8]
365+
; CHECK-NEXT: stp x16, x11, [x8]
372366
; CHECK-NEXT: ret
373367
%t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
374368
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0

llvm/test/CodeGen/PowerPC/pr45448.ll

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,14 @@ define hidden void @julia_tryparse_internal_45896() #0 {
2323
; CHECK-NEXT: cmpdi r3, 0
2424
; CHECK-NEXT: sradi r4, r3, 63
2525
; CHECK-NEXT: rldic r5, r5, 4, 32
26-
; CHECK-NEXT: crnot 4*cr5+gt, eq
26+
; CHECK-NEXT: crnot 4*cr5+lt, eq
2727
; CHECK-NEXT: mulhdu r3, r3, r5
2828
; CHECK-NEXT: maddld r6, r4, r5, r3
29-
; CHECK-NEXT: cmpld r6, r3
30-
; CHECK-NEXT: mulld r3, r4, r5
31-
; CHECK-NEXT: cmpldi cr1, r3, 0
32-
; CHECK-NEXT: crandc 4*cr5+lt, lt, 4*cr1+eq
29+
; CHECK-NEXT: cmpld cr1, r6, r3
3330
; CHECK-NEXT: mulhdu. r3, r4, r5
34-
; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_10
31+
; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10
3532
; CHECK-NEXT: # %bb.8: # %L670
36-
; CHECK-NEXT: crorc 4*cr5+lt, 4*cr5+lt, eq
33+
; CHECK-NEXT: crorc 4*cr5+lt, 4*cr1+lt, eq
3734
; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10
3835
; CHECK-NEXT: # %bb.9: # %L917
3936
; CHECK-NEXT: .LBB0_10: # %L994

0 commit comments

Comments
 (0)