Skip to content

Commit 677fb7b

Browse files
[ArgPromotion] Handle pointer arguments of recursive calls
Argument promotion doesn't handle recursive function calls to promote arguments. This patch adds functionality to handle self recursive function calls, i.e. whose SCC size is 1. Due to complexity of Value Tracking in recursive calls with SCC size greater than 1, we bail out in such cases.
1 parent f367eaa commit 677fb7b

File tree

2 files changed

+124
-3
lines changed

2 files changed

+124
-3
lines changed

llvm/lib/Transforms/IPO/ArgumentPromotion.cpp

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ static bool allCallersPassValidPointerForArgument(Argument *Arg,
446446
/// parts it can be promoted into.
447447
static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
448448
unsigned MaxElements, bool IsRecursive,
449+
bool IsSelfRecursive,
449450
SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
450451
// Quick exit for unused arguments
451452
if (Arg->use_empty())
@@ -610,13 +611,61 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
610611
// unknown users
611612
}
612613

614+
auto *CB = dyn_cast<CallBase>(V);
615+
Value *PtrArg = dyn_cast<Value>(U);
616+
if (IsSelfRecursive && CB && PtrArg) {
617+
Type *PtrTy = PtrArg->getType();
618+
Align PtrAlign = PtrArg->getPointerAlignment(DL);
619+
APInt Offset(DL.getIndexTypeSizeInBits(PtrArg->getType()), 0);
620+
PtrArg = PtrArg->stripAndAccumulateConstantOffsets(
621+
DL, Offset,
622+
/* AllowNonInbounds= */ true);
623+
if (PtrArg != Arg)
624+
return false;
625+
626+
if (Offset.getSignificantBits() >= 64)
627+
return false;
628+
629+
int64_t Off = Offset.getSExtValue();
630+
auto Pair = ArgParts.try_emplace(Off, ArgPart{PtrTy, PtrAlign, nullptr});
631+
ArgPart &Part = Pair.first->second;
632+
633+
// We limit promotion to only promoting up to a fixed number of elements
634+
// of the aggregate.
635+
if (MaxElements > 0 && ArgParts.size() > MaxElements) {
636+
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
637+
<< "more than " << MaxElements << " parts\n");
638+
return false;
639+
}
640+
641+
Part.Alignment = std::max(Part.Alignment, PtrAlign);
642+
continue;
643+
}
613644
// Unknown user.
614645
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
615646
<< "unknown user " << *V << "\n");
616647
return false;
617648
}
618649

619-
if (NeededDerefBytes || NeededAlign > 1) {
650+
// Incase of functions with recursive calls, this check will fail when it
651+
// tries to look at the first caller of this function. The caller may or may
652+
// not have a load, incase it doesn't load the pointer being passed, this
653+
// check will fail. So, it's safe to skip the check incase we know that we
654+
// are dealing with a recursive call.
655+
//
656+
// def fun(ptr %a) {
657+
// ...
658+
// %loadres = load i32, ptr %a, align 4
659+
// %res = call i32 @fun(ptr %a)
660+
// ...
661+
// }
662+
//
663+
// def bar(ptr %x) {
664+
// ...
665+
// %resbar = call i32 @fun(ptr %x)
666+
// ...
667+
// }
668+
if (!IsRecursive && (NeededDerefBytes || NeededAlign > 1)) {
620669
// Try to prove a required deref / aligned requirement.
621670
if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
622671
NeededDerefBytes)) {
@@ -699,6 +748,10 @@ static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
699748
/// calls the DoPromotion method.
700749
static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
701750
unsigned MaxElements, bool IsRecursive) {
751+
// Due to complexity of handling cases where the SCC has more than one
752+
// component. We want to limit argument promotion of recursive calls to
753+
// just functions that directly call themselves.
754+
bool IsSelfRecursive = false;
702755
// Don't perform argument promotion for naked functions; otherwise we can end
703756
// up removing parameters that are seemingly 'not used' as they are referred
704757
// to in the assembly.
@@ -744,8 +797,10 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
744797
if (CB->isMustTailCall())
745798
return nullptr;
746799

747-
if (CB->getFunction() == F)
800+
if (CB->getFunction() == F) {
748801
IsRecursive = true;
802+
IsSelfRecursive = true;
803+
}
749804
}
750805

751806
// Can't change signature of musttail caller
@@ -779,7 +834,8 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
779834
// If we can promote the pointer to its value.
780835
SmallVector<OffsetAndArgPart, 4> ArgParts;
781836

782-
if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
837+
if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, IsSelfRecursive,
838+
ArgParts)) {
783839
SmallVector<Type *, 4> Types;
784840
for (const auto &Pair : ArgParts)
785841
Types.push_back(Pair.second.Ty);
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
3+
define internal i32 @foo(ptr %x, i32 %n, i32 %m) {
4+
; CHECK-LABEL: define internal i32 @foo(
5+
; CHECK-SAME: i32 [[X_0_VAL:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
6+
; CHECK-NEXT: [[ENTRY:.*:]]
7+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[N]], 0
8+
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
9+
; CHECK: [[COND_TRUE]]:
10+
; CHECK-NEXT: br label %[[RETURN:.*]]
11+
; CHECK: [[COND_FALSE]]:
12+
; CHECK-NEXT: [[SUBVAL:%.*]] = sub i32 [[N]], 1
13+
; CHECK-NEXT: [[CALLRET:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL]], i32 [[X_0_VAL]])
14+
; CHECK-NEXT: [[SUBVAL2:%.*]] = sub i32 [[N]], 2
15+
; CHECK-NEXT: [[CALLRET2:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL2]], i32 [[M]])
16+
; CHECK-NEXT: [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
17+
; CHECK-NEXT: br label %[[RETURN]]
18+
; CHECK: [[COND_NEXT:.*]]:
19+
; CHECK-NEXT: br label %[[RETURN]]
20+
; CHECK: [[RETURN]]:
21+
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[X_0_VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ undef, %[[COND_NEXT]] ]
22+
; CHECK-NEXT: ret i32 [[RETVAL_0]]
23+
;
24+
entry:
25+
%cmp = icmp ne i32 %n, 0
26+
br i1 %cmp, label %cond_true, label %cond_false
27+
28+
cond_true: ; preds = %entry
29+
%val = load i32, ptr %x, align 4
30+
br label %return
31+
32+
cond_false: ; preds = %entry
33+
%val2 = load i32, ptr %x, align 4
34+
%subval = sub i32 %n, 1
35+
%callret = call i32 @foo(ptr %x, i32 %subval, i32 %val2)
36+
%subval2 = sub i32 %n, 2
37+
%callret2 = call i32 @foo(ptr %x, i32 %subval2, i32 %m)
38+
%cmp2 = add i32 %callret, %callret2
39+
br label %return
40+
41+
cond_next: ; No predecessors!
42+
br label %return
43+
44+
return: ; preds = %cond_next, %cond_false, %cond_true
45+
%retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ undef, %cond_next ]
46+
ret i32 %retval.0
47+
}
48+
49+
define i32 @bar(ptr %x, i32 %n, i32 %m) {
50+
; CHECK-LABEL: define i32 @bar(
51+
; CHECK-SAME: ptr [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
52+
; CHECK-NEXT: [[ENTRY:.*:]]
53+
; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr [[X]], align 4
54+
; CHECK-NEXT: [[CALLRET3:%.*]] = call i32 @foo(i32 [[X_VAL]], i32 [[N]], i32 [[M]])
55+
; CHECK-NEXT: br label %[[RETURN:.*]]
56+
; CHECK: [[RETURN]]:
57+
; CHECK-NEXT: ret i32 [[CALLRET3]]
58+
;
59+
entry:
60+
%callret3 = call i32 @foo(ptr %x, i32 %n, i32 %m)
61+
br label %return
62+
63+
return: ; preds = %entry
64+
ret i32 %callret3
65+
}

0 commit comments

Comments
 (0)