From fad46451b7bb195947dac461db69f2ffc48c4f5c Mon Sep 17 00:00:00 2001 From: Madhur Amilkanthwar Date: Wed, 22 Jan 2025 00:50:29 -0800 Subject: [PATCH 1/6] [LoopInterchange] Hoist isCompuatableLoopNest() in the control flow The profiling of the LLVM Test-suite reveals that a significant portion, specifically 14,090 out of 139,323, loop nests were identified as non-viable candidates for transformation, leading to the transform exiting from isComputableLoopNest() without any action. More importantly, dependence information was computed for these loop nests before reaching the function isComputableLoopNest(), which does not require DI and relies solely on scalar evolution (SE). To enhance compile-time efficiency, this patch moves the call to isComputableLoopNest() earlier in the control-flow, thereby avoiding unnecessary dependence calculations. The impact of this change is evident on the compile-time-tracker, with the overall geometric mean improvement recorded at 0.11%, while the lencode benchmark gets a more substantial benefit of 0.44%. This improvement can be tracked in the isc-ln-exp-2 branch under my repo. --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index ca125d2c0c490..8c1f1428e37f7 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -276,6 +276,26 @@ static bool hasSupportedLoopDepth(SmallVectorImpl &LoopList, } return true; } + +static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef LoopList) { + for (Loop *L : LoopList) { + const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); + if (isa(ExitCountOuter)) { + LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n"); + return false; + } + if (L->getNumBackEdges() != 1) { + LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n"); + return false; + } + if (!L->getExitingBlock()) { + LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n"); + return false; + } + } + return true; +} + namespace { /// LoopInterchangeLegality checks if it is legal to interchange the loop. @@ -431,25 +451,6 @@ struct LoopInterchange { return processLoopList(LoopList); } - bool isComputableLoopNest(ArrayRef LoopList) { - for (Loop *L : LoopList) { - const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); - if (isa(ExitCountOuter)) { - LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n"); - return false; - } - if (L->getNumBackEdges() != 1) { - LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n"); - return false; - } - if (!L->getExitingBlock()) { - LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n"); - return false; - } - } - return true; - } - unsigned selectLoopForInterchange(ArrayRef LoopList) { // TODO: Add a better heuristic to select the loop to be interchanged based // on the dependence matrix. Currently we select the innermost loop. @@ -464,10 +465,6 @@ struct LoopInterchange { "Unsupported depth of loop nest."); unsigned LoopNestDepth = LoopList.size(); - if (!isComputableLoopNest(LoopList)) { - LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); - return false; - } LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth << "\n"); @@ -1761,10 +1758,17 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, // Ensure minimum depth of the loop nest to do the interchange. if (!hasSupportedLoopDepth(LoopList, ORE)) return PreservedAnalyses::all(); + + // Ensure computable loop nest. + if (!isComputableLoopNest(&AR.SE, LoopList)) { + LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); + return PreservedAnalyses::all(); + } + DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); std::unique_ptr CC = CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI); - + if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN)) return PreservedAnalyses::all(); U.markLoopNestChanged(true); From 1d9e54ac9b217d1d22c440ed4b8df22d6f6faf1a Mon Sep 17 00:00:00 2001 From: Madhur Amilkanthwar Date: Mon, 27 Jan 2025 22:08:55 -0800 Subject: [PATCH 2/6] Update tests to account for new remarks --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 9 +- .../loop-interchange-optimization-remarks.ll | 48 ++++++++ .../LoopInterchange/no-dependence-info.ll | 107 ++++++++++++++++++ .../pr43326-ideal-access-pattern.ll | 8 ++ .../Transforms/LoopInterchange/pr43326.ll | 8 ++ .../Transforms/LoopInterchange/pr48212.ll | 8 ++ .../reductions-across-inner-and-outer-loop.ll | 56 +++++++++ 7 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopInterchange/no-dependence-info.ll diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 8c1f1428e37f7..95985396580df 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -277,7 +277,8 @@ static bool hasSupportedLoopDepth(SmallVectorImpl &LoopList, return true; } -static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef LoopList) { +static bool isComputableLoopNest(ScalarEvolution *SE, + ArrayRef LoopList) { for (Loop *L : LoopList) { const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); if (isa(ExitCountOuter)) { @@ -1765,6 +1766,12 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, return PreservedAnalyses::all(); } + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Dependence", + LN.getOutermostLoop().getStartLoc(), LN.getOutermostLoop().getHeader()) + << "Computed dependence info, invoking the transform."; + }); + DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); std::unique_ptr CC = CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI); diff --git a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll index 3c7828a49477c..ded5c344af7b7 100644 --- a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll +++ b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll @@ -58,6 +58,14 @@ for.end19: ret void } +; CHECK: --- !Passed +; CHECK-NEXT: Pass: loop-interchange +; CHECK-NEXT: Name: Dependence +; CHECK-NEXT: Function: test01 +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: Computed dependence info, invoking the transform. +; CHECK-NEXT: ... + ; CHECK: --- !Missed ; CHECK-NEXT: Pass: loop-interchange ; CHECK-NEXT: Name: Dependence @@ -66,6 +74,14 @@ for.end19: ; CHECK-NEXT: - String: Cannot interchange loops due to dependences. ; CHECK-NEXT: ... +; DELIN: --- !Passed +; DELIN-NEXT: Pass: loop-interchange +; DELIN-NEXT: Name: Dependence +; DELIN-NEXT: Function: test01 +; DELIN-NEXT: Args: +; DELIN-NEXT: - String: Computed dependence info, invoking the transform. +; DELIN-NEXT: ... + ; DELIN: --- !Missed ; DELIN-NEXT: Pass: loop-interchange ; DELIN-NEXT: Name: InterchangeNotProfitable @@ -118,6 +134,14 @@ define void @test02(i32 %k, i32 %N) { ret void } +; CHECK: --- !Passed +; CHECK-NEXT: Pass: loop-interchange +; CHECK-NEXT: Name: Dependence +; CHECK-NEXT: Function: test02 +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: Computed dependence info, invoking the transform. +; CHECK-NEXT: ... + ; CHECK: --- !Missed ; CHECK-NEXT: Pass: loop-interchange ; CHECK-NEXT: Name: Dependence @@ -126,6 +150,14 @@ define void @test02(i32 %k, i32 %N) { ; CHECK-NEXT: - String: Cannot interchange loops due to dependences. ; CHECK-NEXT: ... +; DELIN: --- !Passed +; DELIN-NEXT: Pass: loop-interchange +; DELIN-NEXT: Name: Dependence +; DELIN-NEXT: Function: test02 +; DELIN-NEXT: Args: +; DELIN-NEXT: - String: Computed dependence info, invoking the transform. +; DELIN-NEXT: ... + ; DELIN: --- !Passed ; DELIN-NEXT: Pass: loop-interchange ; DELIN-NEXT: Name: Interchanged @@ -174,6 +206,14 @@ for.body4: ; preds = %for.body4, %for.con br i1 %exitcond, label %for.body4, label %for.cond.loopexit } +; CHECK: --- !Passed +; CHECK-NEXT: Pass: loop-interchange +; CHECK-NEXT: Name: Dependence +; CHECK-NEXT: Function: test03 +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: Computed dependence info, invoking the transform. +; CHECK-NEXT: ... + ; CHECK: --- !Passed ; CHECK-NEXT: Pass: loop-interchange ; CHECK-NEXT: Name: Interchanged @@ -182,6 +222,14 @@ for.body4: ; preds = %for.body4, %for.con ; CHECK-NEXT: - String: Loop interchanged with enclosing loop. ; CHECK-NEXT: ... +; DELIN: --- !Passed +; DELIN-NEXT: Pass: loop-interchange +; DELIN-NEXT: Name: Dependence +; DELIN-NEXT: Function: test03 +; DELIN-NEXT: Args: +; DELIN-NEXT: - String: Computed dependence info, invoking the transform. +; DELIN-NEXT: ... + ; DELIN: --- !Passed ; DELIN-NEXT: Pass: loop-interchange ; DELIN-NEXT: Name: Interchanged diff --git a/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll b/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll new file mode 100644 index 0000000000000..cde9deeccbb83 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll @@ -0,0 +1,107 @@ +; RUN: opt %s -passes='loop-interchange' -pass-remarks=loop-interchange -disable-output 2>&1 | FileCheck --allow-empty %s + +target triple = "aarch64-unknown-linux-gnu" + +; CHECK-NOT: Computed dependence info, invoking the transform. + +define dso_local void @_foo(ptr noundef %a, ptr noundef %neg, ptr noundef %pos) { +entry: + %a.addr = alloca ptr, align 8 + %neg.addr = alloca ptr, align 8 + %pos.addr = alloca ptr, align 8 + %p = alloca i32, align 4 + %q = alloca i32, align 4 + %i = alloca i32, align 4 + %cleanup.dest.slot = alloca i32, align 4 + %j = alloca i32, align 4 + store ptr %a, ptr %a.addr, align 8 + store ptr %neg, ptr %neg.addr, align 8 + store ptr %pos, ptr %pos.addr, align 8 + store i32 0, ptr %p, align 4 + store i32 0, ptr %q, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc16, %entry + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 32 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + store i32 2, ptr %cleanup.dest.slot, align 4 + br label %for.end18 + +for.body: ; preds = %for.cond + store i32 0, ptr %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %1 = load i32, ptr %j, align 4 + %cmp2 = icmp slt i32 %1, 32 + br i1 %cmp2, label %for.body4, label %for.cond.cleanup3 + +for.cond.cleanup3: ; preds = %for.cond1 + store i32 5, ptr %cleanup.dest.slot, align 4 + br label %for.end + +for.body4: ; preds = %for.cond1 + %2 = load ptr, ptr %a.addr, align 8 + %3 = load i32, ptr %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom + %4 = load i32, ptr %arrayidx, align 4 + %cmp5 = icmp slt i32 %4, 0 + br i1 %cmp5, label %if.then, label %if.else + +if.then: ; preds = %for.body4 + %5 = load ptr, ptr %a.addr, align 8 + %6 = load i32, ptr %i, align 4 + %idxprom6 = sext i32 %6 to i64 + %arrayidx7 = getelementptr inbounds i32, ptr %5, i64 %idxprom6 + %7 = load i32, ptr %arrayidx7, align 4 + %8 = load ptr, ptr %neg.addr, align 8 + %9 = load i32, ptr %p, align 4 + %inc = add nsw i32 %9, 1 + store i32 %inc, ptr %p, align 4 + %idxprom8 = sext i32 %9 to i64 + %arrayidx9 = getelementptr inbounds i32, ptr %8, i64 %idxprom8 + store i32 %7, ptr %arrayidx9, align 4 + br label %if.end + +if.else: ; preds = %for.body4 + %10 = load ptr, ptr %a.addr, align 8 + %11 = load i32, ptr %i, align 4 + %idxprom10 = sext i32 %11 to i64 + %arrayidx11 = getelementptr inbounds i32, ptr %10, i64 %idxprom10 + %12 = load i32, ptr %arrayidx11, align 4 + %13 = load ptr, ptr %pos.addr, align 8 + %14 = load i32, ptr %q, align 4 + %inc12 = add nsw i32 %14, 1 + store i32 %inc12, ptr %q, align 4 + %idxprom13 = sext i32 %14 to i64 + %arrayidx14 = getelementptr inbounds i32, ptr %13, i64 %idxprom13 + store i32 %12, ptr %arrayidx14, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %for.inc + +for.inc: ; preds = %if.end + %15 = load i32, ptr %j, align 4 + %inc15 = add nsw i32 %15, 1 + store i32 %inc15, ptr %j, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond.cleanup3 + br label %for.inc16 + +for.inc16: ; preds = %for.end + %16 = load i32, ptr %i, align 4 + %inc17 = add nsw i32 %16, 1 + store i32 %inc17, ptr %i, align 4 + br label %for.cond + +for.end18: ; preds = %for.cond.cleanup + ret void +} + diff --git a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll index def68ca3cd07e..644788b756222 100644 --- a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll @@ -14,6 +14,14 @@ ; } ; } +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Dependence +; REMARKS-NEXT: Function: pr43326-triply-nested +; REMARKS-NEXT: Args: +; REMARKS-NEXT: - String: Computed dependence info, invoking the transform. +; REMARKS-NEXT: ... + ; REMARKS: --- !Passed ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Interchanged diff --git a/llvm/test/Transforms/LoopInterchange/pr43326.ll b/llvm/test/Transforms/LoopInterchange/pr43326.ll index 4dbb06780a898..2381f182ef3c9 100644 --- a/llvm/test/Transforms/LoopInterchange/pr43326.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43326.ll @@ -8,6 +8,14 @@ @d = global i32 0 @e = global [1 x [1 x i32]] zeroinitializer +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Dependence +; REMARKS-NEXT: Function: pr43326 +; REMARKS-NEXT: Args: +; REMARKS-NEXT: - String: Computed dependence info, invoking the transform. +; REMARKS-NEXT: ... + ; REMARKS: --- !Passed ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Interchanged diff --git a/llvm/test/Transforms/LoopInterchange/pr48212.ll b/llvm/test/Transforms/LoopInterchange/pr48212.ll index b580794ab7d36..6bb7bfd8260b6 100644 --- a/llvm/test/Transforms/LoopInterchange/pr48212.ll +++ b/llvm/test/Transforms/LoopInterchange/pr48212.ll @@ -2,6 +2,14 @@ ; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 ; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Dependence +; REMARKS-NEXT: Function: pr48212 +; REMARKS-NEXT: Args: +; REMARKS-NEXT: - String: Computed dependence info, invoking the transform. +; REMARKS-NEXT: ... + ; REMARKS: --- !Passed ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Interchanged diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll index eea0c2635d595..68a84b17ca4f6 100644 --- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll +++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll @@ -5,6 +5,14 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Dependence +; REMARKS-NEXT: Function: test1 +; REMARKS-NEXT: Args: +; REMARKS-NEXT: - String: Computed dependence info, invoking the transform. +; REMARKS-NEXT: ... + ; REMARKS: --- !Passed ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Interchanged @@ -77,6 +85,14 @@ for1.loopexit: ; preds = %for1.inc ; In this test case, the inner reduction PHI %inner does not involve the outer ; reduction PHI %sum.outer, do not interchange. +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Dependence +; REMARKS-NEXT: Function: test2 +; REMARKS-NEXT: Args: +; REMARKS-NEXT: - String: Computed dependence info, invoking the transform. +; REMARKS-NEXT: ... + ; REMARKS: --- !Missed ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: UnsupportedPHIOuter @@ -114,6 +130,14 @@ for1.loopexit: ; preds = %for1.inc ; Check that we do not interchange if there is an additional instruction ; between the outer and inner reduction PHIs. +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Dependence +; REMARKS-NEXT: Function: test3 +; REMARKS-NEXT: Args: +; REMARKS-NEXT: - String: Computed dependence info, invoking the transform. +; REMARKS-NEXT: ... + ; REMARKS: --- !Missed ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: UnsupportedPHIOuter @@ -151,6 +175,14 @@ for1.loopexit: ; preds = %for1.inc } ; Check that we do not interchange if reduction is stored in an invariant address inside inner loop +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Dependence +; REMARKS-NEXT: Function: test4 +; REMARKS-NEXT: Args: +; REMARKS-NEXT: - String: Computed dependence info, invoking the transform. +; REMARKS-NEXT: ... + ; REMARKS: --- !Missed ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence @@ -190,6 +222,14 @@ for1.loopexit: ; preds = %for1.inc ; Check that we do not interchange or crash if the PHI in the outer loop gets a ; constant from the inner loop. +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Dependence +; REMARKS-NEXT: Function: test_constant_inner_loop_res +; REMARKS-NEXT: Args: +; REMARKS-NEXT: - String: Computed dependence info, invoking the transform. +; REMARKS-NEXT: ... + ; REMARKS: --- !Missed ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: UnsupportedPHIOuter @@ -229,6 +269,14 @@ for1.loopexit: ; preds = %for1.inc ; Floating point reductions are interchanged if all the fp instructions ; involved allow reassociation. +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Dependence +; REMARKS-NEXT: Function: test5 +; REMARKS-NEXT: Args: +; REMARKS-NEXT: - String: Computed dependence info, invoking the transform. +; REMARKS-NEXT: ... + ; REMARKS: --- !Passed ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Interchanged @@ -269,6 +317,14 @@ for.exit: ; preds = %outer.inc ; Floating point reductions are not interchanged if not all the fp instructions ; involved allow reassociation. +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Dependence +; REMARKS-NEXT: Function: test6 +; REMARKS-NEXT: Args: +; REMARKS-NEXT: - String: Computed dependence info, invoking the transform. +; REMARKS-NEXT: ... + ; REMARKS: --- !Missed ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: UnsupportedPHIOuter From 7e2710cc0f96e1ed4dade4a1708ac0494b977e83 Mon Sep 17 00:00:00 2001 From: Madhur Amilkanthwar Date: Tue, 28 Jan 2025 00:11:20 -0800 Subject: [PATCH 3/6] Address formatting issues --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 95985396580df..d1921ed9a3d4c 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1759,16 +1759,16 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, // Ensure minimum depth of the loop nest to do the interchange. if (!hasSupportedLoopDepth(LoopList, ORE)) return PreservedAnalyses::all(); - // Ensure computable loop nest. if (!isComputableLoopNest(&AR.SE, LoopList)) { - LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); - return PreservedAnalyses::all(); + LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); + return PreservedAnalyses::all(); } ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "Dependence", - LN.getOutermostLoop().getStartLoc(), LN.getOutermostLoop().getHeader()) + LN.getOutermostLoop().getStartLoc(), + LN.getOutermostLoop().getHeader()) << "Computed dependence info, invoking the transform."; }); From 31f9942d2b5721c8dd60f1dfc48c40e92120f266 Mon Sep 17 00:00:00 2001 From: Madhur Amilkanthwar Date: Tue, 28 Jan 2025 23:34:16 -0800 Subject: [PATCH 4/6] Address review comments --- .../LoopInterchange/no-dependence-info.ll | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll b/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll index cde9deeccbb83..1e2066c6925c4 100644 --- a/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll +++ b/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll @@ -2,6 +2,24 @@ target triple = "aarch64-unknown-linux-gnu" +; For the below test, backedge count cannot be computed. +; Computing backedge count requires only SCEV and should +; not require dependence info. +; +; void foo(int *a, int *neg, int *pos) { +; int p = 0, q = 0; +; for (unsigned int i = 0; i < 32; ++i) { +; for (unsigned int j = 0; j < 32; ++j) { +; if (a[i] < 0){ +; neg[p++] = a[i]; +; } +; else { +; pos[q++] = a[i]; +; } +; } +; } +;} + ; CHECK-NOT: Computed dependence info, invoking the transform. define dso_local void @_foo(ptr noundef %a, ptr noundef %neg, ptr noundef %pos) { From 1e95541048912d15d3ef39707866d0c34b0fad9f Mon Sep 17 00:00:00 2001 From: Madhur Amilkanthwar Date: Wed, 29 Jan 2025 23:31:05 -0800 Subject: [PATCH 5/6] Change to RemarksAnalysis --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 6 +++--- .../loop-interchange-optimization-remarks.ll | 12 ++++++------ .../pr43326-ideal-access-pattern.ll | 2 +- llvm/test/Transforms/LoopInterchange/pr43326.ll | 2 +- llvm/test/Transforms/LoopInterchange/pr48212.ll | 2 +- .../reductions-across-inner-and-outer-loop.ll | 14 +++++++------- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index d1921ed9a3d4c..d88fdf41db7a8 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1766,9 +1766,9 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, } ORE.emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Dependence", - LN.getOutermostLoop().getStartLoc(), - LN.getOutermostLoop().getHeader()) + return OptimizationRemarkAnalysis(DEBUG_TYPE, "Dependence", + LN.getOutermostLoop().getStartLoc(), + LN.getOutermostLoop().getHeader()) << "Computed dependence info, invoking the transform."; }); diff --git a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll index ded5c344af7b7..73a566a310157 100644 --- a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll +++ b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll @@ -58,7 +58,7 @@ for.end19: ret void } -; CHECK: --- !Passed +; CHECK: --- !Analysis ; CHECK-NEXT: Pass: loop-interchange ; CHECK-NEXT: Name: Dependence ; CHECK-NEXT: Function: test01 @@ -74,7 +74,7 @@ for.end19: ; CHECK-NEXT: - String: Cannot interchange loops due to dependences. ; CHECK-NEXT: ... -; DELIN: --- !Passed +; DELIN: --- !Analysis ; DELIN-NEXT: Pass: loop-interchange ; DELIN-NEXT: Name: Dependence ; DELIN-NEXT: Function: test01 @@ -134,7 +134,7 @@ define void @test02(i32 %k, i32 %N) { ret void } -; CHECK: --- !Passed +; CHECK: --- !Analysis ; CHECK-NEXT: Pass: loop-interchange ; CHECK-NEXT: Name: Dependence ; CHECK-NEXT: Function: test02 @@ -150,7 +150,7 @@ define void @test02(i32 %k, i32 %N) { ; CHECK-NEXT: - String: Cannot interchange loops due to dependences. ; CHECK-NEXT: ... -; DELIN: --- !Passed +; DELIN: --- !Analysis ; DELIN-NEXT: Pass: loop-interchange ; DELIN-NEXT: Name: Dependence ; DELIN-NEXT: Function: test02 @@ -206,7 +206,7 @@ for.body4: ; preds = %for.body4, %for.con br i1 %exitcond, label %for.body4, label %for.cond.loopexit } -; CHECK: --- !Passed +; CHECK: --- !Analysis ; CHECK-NEXT: Pass: loop-interchange ; CHECK-NEXT: Name: Dependence ; CHECK-NEXT: Function: test03 @@ -222,7 +222,7 @@ for.body4: ; preds = %for.body4, %for.con ; CHECK-NEXT: - String: Loop interchanged with enclosing loop. ; CHECK-NEXT: ... -; DELIN: --- !Passed +; DELIN: --- !Analysis ; DELIN-NEXT: Pass: loop-interchange ; DELIN-NEXT: Name: Dependence ; DELIN-NEXT: Function: test03 diff --git a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll index 644788b756222..520e1ee3506da 100644 --- a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll @@ -14,7 +14,7 @@ ; } ; } -; REMARKS: --- !Passed +; REMARKS: --- !Analysis ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence ; REMARKS-NEXT: Function: pr43326-triply-nested diff --git a/llvm/test/Transforms/LoopInterchange/pr43326.ll b/llvm/test/Transforms/LoopInterchange/pr43326.ll index 2381f182ef3c9..c25c4fadd3042 100644 --- a/llvm/test/Transforms/LoopInterchange/pr43326.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43326.ll @@ -8,7 +8,7 @@ @d = global i32 0 @e = global [1 x [1 x i32]] zeroinitializer -; REMARKS: --- !Passed +; REMARKS: --- !Analysis ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence ; REMARKS-NEXT: Function: pr43326 diff --git a/llvm/test/Transforms/LoopInterchange/pr48212.ll b/llvm/test/Transforms/LoopInterchange/pr48212.ll index 6bb7bfd8260b6..936c53e217540 100644 --- a/llvm/test/Transforms/LoopInterchange/pr48212.ll +++ b/llvm/test/Transforms/LoopInterchange/pr48212.ll @@ -2,7 +2,7 @@ ; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 ; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s -; REMARKS: --- !Passed +; REMARKS: --- !Analysis ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence ; REMARKS-NEXT: Function: pr48212 diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll index 68a84b17ca4f6..27d99e05e84ee 100644 --- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll +++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll @@ -5,7 +5,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -; REMARKS: --- !Passed +; REMARKS: --- !Analysis ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence ; REMARKS-NEXT: Function: test1 @@ -85,7 +85,7 @@ for1.loopexit: ; preds = %for1.inc ; In this test case, the inner reduction PHI %inner does not involve the outer ; reduction PHI %sum.outer, do not interchange. -; REMARKS: --- !Passed +; REMARKS: --- !Analysis ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence ; REMARKS-NEXT: Function: test2 @@ -130,7 +130,7 @@ for1.loopexit: ; preds = %for1.inc ; Check that we do not interchange if there is an additional instruction ; between the outer and inner reduction PHIs. -; REMARKS: --- !Passed +; REMARKS: --- !Analysis ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence ; REMARKS-NEXT: Function: test3 @@ -175,7 +175,7 @@ for1.loopexit: ; preds = %for1.inc } ; Check that we do not interchange if reduction is stored in an invariant address inside inner loop -; REMARKS: --- !Passed +; REMARKS: --- !Analysis ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence ; REMARKS-NEXT: Function: test4 @@ -222,7 +222,7 @@ for1.loopexit: ; preds = %for1.inc ; Check that we do not interchange or crash if the PHI in the outer loop gets a ; constant from the inner loop. -; REMARKS: --- !Passed +; REMARKS: --- !Analysis ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence ; REMARKS-NEXT: Function: test_constant_inner_loop_res @@ -269,7 +269,7 @@ for1.loopexit: ; preds = %for1.inc ; Floating point reductions are interchanged if all the fp instructions ; involved allow reassociation. -; REMARKS: --- !Passed +; REMARKS: --- !Analysis ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence ; REMARKS-NEXT: Function: test5 @@ -317,7 +317,7 @@ for.exit: ; preds = %outer.inc ; Floating point reductions are not interchanged if not all the fp instructions ; involved allow reassociation. -; REMARKS: --- !Passed +; REMARKS: --- !Analysis ; REMARKS-NEXT: Pass: loop-interchange ; REMARKS-NEXT: Name: Dependence ; REMARKS-NEXT: Function: test6 From d351384a468ae84148c9573fc3c9df1a5326acf9 Mon Sep 17 00:00:00 2001 From: Madhur Amilkanthwar Date: Tue, 4 Feb 2025 02:49:10 -0800 Subject: [PATCH 6/6] Add simplified test --- .../LoopInterchange/no-dependence-info.ll | 129 ++++-------------- 1 file changed, 28 insertions(+), 101 deletions(-) diff --git a/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll b/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll index 1e2066c6925c4..d37fb46fc5d68 100644 --- a/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll +++ b/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll @@ -2,124 +2,51 @@ target triple = "aarch64-unknown-linux-gnu" +; CHECK-NOT: Computed dependence info, invoking the transform. + ; For the below test, backedge count cannot be computed. ; Computing backedge count requires only SCEV and should ; not require dependence info. ; -; void foo(int *a, int *neg, int *pos) { -; int p = 0, q = 0; -; for (unsigned int i = 0; i < 32; ++i) { -; for (unsigned int j = 0; j < 32; ++j) { -; if (a[i] < 0){ -; neg[p++] = a[i]; -; } -; else { -; pos[q++] = a[i]; -; } +; void bar(int m, int n) { +; for (unsigned int i = 0; i < m; ++i) { +; for (unsigned int j = 0; j < m; ++j) { +; // dummy code ; } ; } ;} -; CHECK-NOT: Computed dependence info, invoking the transform. - -define dso_local void @_foo(ptr noundef %a, ptr noundef %neg, ptr noundef %pos) { +define void @bar(i32 %m, i32 %n) +{ entry: - %a.addr = alloca ptr, align 8 - %neg.addr = alloca ptr, align 8 - %pos.addr = alloca ptr, align 8 - %p = alloca i32, align 4 - %q = alloca i32, align 4 - %i = alloca i32, align 4 - %cleanup.dest.slot = alloca i32, align 4 - %j = alloca i32, align 4 - store ptr %a, ptr %a.addr, align 8 - store ptr %neg, ptr %neg.addr, align 8 - store ptr %pos, ptr %pos.addr, align 8 - store i32 0, ptr %p, align 4 - store i32 0, ptr %q, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc16, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 32 - br i1 %cmp, label %for.body, label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond - store i32 2, ptr %cleanup.dest.slot, align 4 - br label %for.end18 - -for.body: ; preds = %for.cond - store i32 0, ptr %j, align 4 - br label %for.cond1 - -for.cond1: ; preds = %for.inc, %for.body - %1 = load i32, ptr %j, align 4 - %cmp2 = icmp slt i32 %1, 32 - br i1 %cmp2, label %for.body4, label %for.cond.cleanup3 + br label %outer.header -for.cond.cleanup3: ; preds = %for.cond1 - store i32 5, ptr %cleanup.dest.slot, align 4 - br label %for.end +outer.header: + %m_temp1 = phi i32 [%m, %entry], [%m_temp, %outer.latch] + br label %inner.header -for.body4: ; preds = %for.cond1 - %2 = load ptr, ptr %a.addr, align 8 - %3 = load i32, ptr %i, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom - %4 = load i32, ptr %arrayidx, align 4 - %cmp5 = icmp slt i32 %4, 0 - br i1 %cmp5, label %if.then, label %if.else -if.then: ; preds = %for.body4 - %5 = load ptr, ptr %a.addr, align 8 - %6 = load i32, ptr %i, align 4 - %idxprom6 = sext i32 %6 to i64 - %arrayidx7 = getelementptr inbounds i32, ptr %5, i64 %idxprom6 - %7 = load i32, ptr %arrayidx7, align 4 - %8 = load ptr, ptr %neg.addr, align 8 - %9 = load i32, ptr %p, align 4 - %inc = add nsw i32 %9, 1 - store i32 %inc, ptr %p, align 4 - %idxprom8 = sext i32 %9 to i64 - %arrayidx9 = getelementptr inbounds i32, ptr %8, i64 %idxprom8 - store i32 %7, ptr %arrayidx9, align 4 - br label %if.end +inner.header: + %n_temp1 = phi i32 [%n, %outer.header], [%n_temp, %inner.latch] -if.else: ; preds = %for.body4 - %10 = load ptr, ptr %a.addr, align 8 - %11 = load i32, ptr %i, align 4 - %idxprom10 = sext i32 %11 to i64 - %arrayidx11 = getelementptr inbounds i32, ptr %10, i64 %idxprom10 - %12 = load i32, ptr %arrayidx11, align 4 - %13 = load ptr, ptr %pos.addr, align 8 - %14 = load i32, ptr %q, align 4 - %inc12 = add nsw i32 %14, 1 - store i32 %inc12, ptr %q, align 4 - %idxprom13 = sext i32 %14 to i64 - %arrayidx14 = getelementptr inbounds i32, ptr %13, i64 %idxprom13 - store i32 %12, ptr %arrayidx14, align 4 - br label %if.end + br label %body -if.end: ; preds = %if.else, %if.then - br label %for.inc +body: + ; dummy code -for.inc: ; preds = %if.end - %15 = load i32, ptr %j, align 4 - %inc15 = add nsw i32 %15, 1 - store i32 %inc15, ptr %j, align 4 - br label %for.cond1 +br label %inner.latch -for.end: ; preds = %for.cond.cleanup3 - br label %for.inc16 +inner.latch: +%n_temp = add i32 %n_temp1, 1 +%cmp2 = icmp eq i32 %n_temp, 1 +br i1 %cmp2, label %outer.latch, label %inner.header -for.inc16: ; preds = %for.end - %16 = load i32, ptr %i, align 4 - %inc17 = add nsw i32 %16, 1 - store i32 %inc17, ptr %i, align 4 - br label %for.cond +outer.latch: +%m_temp = add i32 %n, 1 +%cmp3 = icmp eq i32 %m_temp, 1 +br i1 %cmp3, label %exit, label %outer.header -for.end18: ; preds = %for.cond.cleanup - ret void +exit: +ret void }