diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h index 13a8faf955a7e..2b0eb71f88082 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -49,6 +49,10 @@ struct CombinerInfo { bool EnableOptSize; /// Whether we're optimizing for minsize (-Oz). bool EnableMinSize; + + /// The maximum number of times the Combiner will iterate over the + /// MachineFunction. Setting this to 0 enables fixed-point iteration. + unsigned MaxIterations = 0; }; } // namespace llvm diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index d18e65a83484f..3310ce5455c97 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" @@ -27,6 +28,11 @@ using namespace llvm; +STATISTIC(NumOneIteration, "Number of functions with one iteration"); +STATISTIC(NumTwoIterations, "Number of functions with two iterations"); +STATISTIC(NumThreeOrMoreIterations, + "Number of functions with three or more iterations"); + namespace llvm { cl::OptionCategory GICombinerOptionCategory( "GlobalISel Combiner", @@ -135,7 +141,11 @@ bool Combiner::combineMachineInstrs() { bool MFChanged = false; bool Changed; - do { + unsigned Iteration = 0; + while (true) { + ++Iteration; + LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n'); + WorkList.clear(); // Collect all instructions. Do a post order traversal for basic blocks and @@ -166,7 +176,28 @@ bool Combiner::combineMachineInstrs() { WLObserver->reportFullyCreatedInstrs(); } MFChanged |= Changed; - } while (Changed); + + if (!Changed) { + LLVM_DEBUG(dbgs() << "\nCombiner reached fixed-point after iteration #" + << Iteration << '\n'); + break; + } + // Iterate until a fixed-point is reached if MaxIterations == 0, + // otherwise limit the number of iterations. + if (CInfo.MaxIterations && Iteration >= CInfo.MaxIterations) { + LLVM_DEBUG( + dbgs() << "\nCombiner reached iteration limit after iteration #" + << Iteration << '\n'); + break; + } + } + + if (Iteration == 1) + ++NumOneIteration; + else if (Iteration == 2) + ++NumTwoIterations; + else + ++NumThreeOrMoreIterations; #ifndef NDEBUG if (CSEInfo) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp index 17dd8f2314a2b..0ba3a543d114a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp @@ -165,6 +165,10 @@ bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false, F.hasOptSize(), F.hasMinSize()); + // Disable fixed-point iteration in the Combiner. This improves compile-time + // at the cost of possibly missing optimizations. See PR#94291 for details. + CInfo.MaxIterations = 1; + AArch64O0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, /*CSEInfo*/ nullptr, RuleConfig, ST); return Impl.combineMachineInstrs(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll index 5ab086ffd2c13..c4e07de265edd 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll @@ -28,6 +28,7 @@ define i32 @foo() { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -35,19 +36,19 @@ define i32 @foo() { ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: G_STORE [[C4]](s32), [[GV3]](p0) :: (store (s32) into @var2) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV3]](p0) :: (store (s32) into @var2) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1 - ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1) + ; CHECK-NEXT: G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1) ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3 - ; CHECK-NEXT: G_STORE [[C4]](s32), [[GV5]](p0) :: (store (s32) into @var3) - ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1) + ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV5]](p0) :: (store (s32) into @var3) + ; CHECK-NEXT: G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1) ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.end: - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $w0 = COPY [[C6]](s32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY [[C7]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: %0 = load i32, ptr @var1, align 4 @@ -84,6 +85,7 @@ define i32 @darwin_tls() { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -96,8 +98,8 @@ define i32 @darwin_tls() { ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.end: - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $w0 = COPY [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY [[C3]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: %0 = load i32, ptr @var1, align 4 @@ -127,6 +129,7 @@ define i32 @imm_cost_too_large_cost_of_2() { ; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -147,8 +150,8 @@ define i32 @imm_cost_too_large_cost_of_2() { ; CHECK-NEXT: bb.4.if.end: ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3 ; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s32), [[GV5]](p0) :: (store (s32) into @var3) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $w0 = COPY [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY [[C4]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: %0 = load i32, ptr @var1, align 4 @@ -183,6 +186,7 @@ define i64 @imm_cost_too_large_cost_of_4() { ; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -203,8 +207,8 @@ define i64 @imm_cost_too_large_cost_of_4() { ; CHECK-NEXT: bb.4.if.end: ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64 ; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s64), [[GV5]](p0) :: (store (s64) into @var3_64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: $x0 = COPY [[C3]](s64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: $x0 = COPY [[C4]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 entry: %0 = load i64, ptr @var1_64, align 4 @@ -239,6 +243,7 @@ define i64 @f64_imm_cost_too_high(double %a) { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -259,8 +264,8 @@ define i64 @f64_imm_cost_too_high(double %a) { ; CHECK-NEXT: bb.4.if.end: ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64 ; CHECK-NEXT: G_STORE [[C]](s64), [[GV5]](p0) :: (store (s64) into @var3_64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: $x0 = COPY [[C3]](s64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: $x0 = COPY [[C4]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 entry: %0 = load i64, ptr @var1_64, align 4 @@ -294,6 +299,7 @@ define i64 @f64_imm_cheap(double %a) { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -301,24 +307,24 @@ define i64 @f64_imm_cheap(double %a) { ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; CHECK-NEXT: G_STORE [[C3]](s64), [[GV3]](p0) :: (store (s64) into @var2_64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: G_STORE [[C4]](s64), [[GV3]](p0) :: (store (s64) into @var2_64) ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.then2: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 ; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64 - ; CHECK-NEXT: G_STORE [[C4]](s64), [[GV4]](p0) :: (store (s64) into @var1_64) + ; CHECK-NEXT: G_STORE [[C5]](s64), [[GV4]](p0) :: (store (s64) into @var1_64) ; CHECK-NEXT: G_BR %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.if.end: ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64 - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; CHECK-NEXT: G_STORE [[C5]](s64), [[GV5]](p0) :: (store (s64) into @var3_64) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: $x0 = COPY [[C6]](s64) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: G_STORE [[C6]](s64), [[GV5]](p0) :: (store (s64) into @var3_64) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: $x0 = COPY [[C7]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 entry: %0 = load i64, ptr @var1_64, align 4