Skip to content

Commit 425333c

Browse files
omarahmed1111jdoerfert
authored andcommitted
[Attributor] Improve the alignment of the loads
This patch introduces an improvement in the Alignment of the loads generated in createReplacementValues() by querying AAAlign attribute for the best Alignment for the base. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D76550
1 parent 6045a80 commit 425333c

File tree

14 files changed

+174
-53
lines changed

14 files changed

+174
-53
lines changed

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5053,6 +5053,11 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
50535053
if (!PrivatizableType.getValue())
50545054
return indicatePessimisticFixpoint();
50555055

5056+
// The dependence is optional so we don't give up once we give up on the
5057+
// alignment.
5058+
A.getAAFor<AAAlign>(*this, IRPosition::value(getAssociatedValue()),
5059+
/* TrackDependence */ true, DepClassTy::OPTIONAL);
5060+
50565061
// Avoid arguments with padding for now.
50575062
if (!getIRPosition().hasAttr(Attribute::ByVal) &&
50585063
!ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
@@ -5267,8 +5272,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
52675272

52685273
/// Extract values from \p Base according to the type \p PrivType at the
52695274
/// call position \p ACS. The values are appended to \p ReplacementValues.
5270-
void createReplacementValues(Type *PrivType, AbstractCallSite ACS,
5271-
Value *Base,
5275+
void createReplacementValues(Align Alignment, Type *PrivType,
5276+
AbstractCallSite ACS, Value *Base,
52725277
SmallVectorImpl<Value *> &ReplacementValues) {
52735278
assert(Base && "Expected base value!");
52745279
assert(PrivType && "Expected privatizable type!");
@@ -5281,7 +5286,6 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
52815286
Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(),
52825287
"", ACS.getInstruction());
52835288

5284-
// TODO: Improve the alignment of the loads.
52855289
// Traverse the type, build GEPs and loads.
52865290
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
52875291
const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
@@ -5291,7 +5295,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
52915295
constructPointer(PointeeTy->getPointerTo(), Base,
52925296
PrivStructLayout->getElementOffset(u), IRB, DL);
52935297
LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
5294-
L->setAlignment(Align(1));
5298+
L->setAlignment(Alignment);
52955299
ReplacementValues.push_back(L);
52965300
}
52975301
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
@@ -5302,12 +5306,12 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
53025306
Value *Ptr =
53035307
constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL);
53045308
LoadInst *L = new LoadInst(PointeePtrTy, Ptr, "", IP);
5305-
L->setAlignment(Align(1));
5309+
L->setAlignment(Alignment);
53065310
ReplacementValues.push_back(L);
53075311
}
53085312
} else {
53095313
LoadInst *L = new LoadInst(PrivType, Base, "", IP);
5310-
L->setAlignment(Align(1));
5314+
L->setAlignment(Alignment);
53115315
ReplacementValues.push_back(L);
53125316
}
53135317
}
@@ -5333,6 +5337,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
53335337
return ChangeStatus::UNCHANGED;
53345338

53355339
Argument *Arg = getAssociatedArgument();
5340+
// Query AAAlign attribute for alignment of associated argument to
5341+
// determine the best alignment of loads.
5342+
const auto &AlignAA = A.getAAFor<AAAlign>(*this, IRPosition::value(*Arg));
53365343

53375344
// Callback to repair the associated function. A new alloca is placed at the
53385345
// beginning and initialized with the values passed through arguments. The
@@ -5356,9 +5363,13 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
53565363
// of the privatizable type are loaded prior to the call and passed to the
53575364
// new function version.
53585365
Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB =
5359-
[=](const Attributor::ArgumentReplacementInfo &ARI,
5360-
AbstractCallSite ACS, SmallVectorImpl<Value *> &NewArgOperands) {
5366+
[=, &AlignAA](const Attributor::ArgumentReplacementInfo &ARI,
5367+
AbstractCallSite ACS,
5368+
SmallVectorImpl<Value *> &NewArgOperands) {
5369+
// When no alignment is specified for the load instruction,
5370+
// natural alignment is assumed.
53615371
createReplacementValues(
5372+
assumeAligned(AlignAA.getAssumedAlign()),
53625373
PrivatizableType.getValue(), ACS,
53635374
ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
53645375
NewArgOperands);

llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ define i32 @f(i32 %x) {
4444
; IS__TUNIT_NPM-NEXT: entry:
4545
; IS__TUNIT_NPM-NEXT: [[X_ADDR:%.*]] = alloca i32
4646
; IS__TUNIT_NPM-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4
47-
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 1
47+
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
4848
; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[TMP0]])
4949
; IS__TUNIT_NPM-NEXT: ret i32 [[TMP1]]
5050
;

llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ define void @promote(<4 x i64>* %arg) #0 {
132132
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
133133
; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
134134
; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false)
135-
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 1
135+
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32
136136
; IS__TUNIT_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]])
137137
; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
138138
; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2

llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>*
5454
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
5555
; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
5656
; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
57-
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
57+
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
5858
; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
5959
; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
6060
; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -141,7 +141,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>*
141141
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
142142
; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
143143
; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
144-
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
144+
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
145145
; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
146146
; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
147147
; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -228,7 +228,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>*
228228
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
229229
; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
230230
; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
231-
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
231+
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
232232
; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
233233
; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
234234
; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -315,7 +315,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>*
315315
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
316316
; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
317317
; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
318-
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
318+
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
319319
; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
320320
; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
321321
; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -570,7 +570,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar
570570
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
571571
; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
572572
; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
573-
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
573+
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
574574
; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
575575
; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
576576
; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
@@ -657,7 +657,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar
657657
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
658658
; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
659659
; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
660-
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
660+
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64
661661
; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
662662
; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
663663
; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2

llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
2-
; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
3-
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
2+
; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
3+
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
44
; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
55
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
66

@@ -51,3 +51,113 @@ define internal void @g(i32* %a) {
5151
}
5252

5353
declare void @z(i32)
54+
55+
; Test2
56+
; Different alignemnt privatizable arguments
57+
define internal i32 @test(i32* %X, i64* %Y) {
58+
; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test
59+
; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[Y:%.*]])
60+
; IS__TUNIT_OPM-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4
61+
; IS__TUNIT_OPM-NEXT: [[B:%.*]] = load i64, i64* [[Y]], align 8
62+
; IS__TUNIT_OPM-NEXT: [[C:%.*]] = add i32 [[A]], 1
63+
; IS__TUNIT_OPM-NEXT: [[D:%.*]] = add i64 [[B]], 1
64+
; IS__TUNIT_OPM-NEXT: [[COND:%.*]] = icmp sgt i64 [[D]], -1
65+
; IS__TUNIT_OPM-NEXT: br i1 [[COND]], label [[RETURN1:%.*]], label [[RETURN2:%.*]]
66+
; IS__TUNIT_OPM: Return1:
67+
; IS__TUNIT_OPM-NEXT: ret i32 [[C]]
68+
; IS__TUNIT_OPM: Return2:
69+
; IS__TUNIT_OPM-NEXT: ret i32 [[A]]
70+
;
71+
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test
72+
; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
73+
; IS__TUNIT_NPM-NEXT: [[Y_PRIV:%.*]] = alloca i64
74+
; IS__TUNIT_NPM-NEXT: store i64 [[TMP1]], i64* [[Y_PRIV]]
75+
; IS__TUNIT_NPM-NEXT: [[X_PRIV:%.*]] = alloca i32
76+
; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[X_PRIV]]
77+
; IS__TUNIT_NPM-NEXT: [[A:%.*]] = load i32, i32* [[X_PRIV]], align 4
78+
; IS__TUNIT_NPM-NEXT: [[B:%.*]] = load i64, i64* [[Y_PRIV]], align 8
79+
; IS__TUNIT_NPM-NEXT: [[C:%.*]] = add i32 [[A]], 1
80+
; IS__TUNIT_NPM-NEXT: [[D:%.*]] = add i64 [[B]], 1
81+
; IS__TUNIT_NPM-NEXT: [[COND:%.*]] = icmp sgt i64 [[D]], -1
82+
; IS__TUNIT_NPM-NEXT: br i1 [[COND]], label [[RETURN1:%.*]], label [[RETURN2:%.*]]
83+
; IS__TUNIT_NPM: Return1:
84+
; IS__TUNIT_NPM-NEXT: ret i32 [[C]]
85+
; IS__TUNIT_NPM: Return2:
86+
; IS__TUNIT_NPM-NEXT: ret i32 [[A]]
87+
;
88+
; IS__CGSCC____-LABEL: define {{[^@]+}}@test
89+
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[Y:%.*]])
90+
; IS__CGSCC____-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4
91+
; IS__CGSCC____-NEXT: [[B:%.*]] = load i64, i64* [[Y]], align 8
92+
; IS__CGSCC____-NEXT: [[C:%.*]] = add i32 [[A]], 1
93+
; IS__CGSCC____-NEXT: [[D:%.*]] = add i64 [[B]], 1
94+
; IS__CGSCC____-NEXT: [[COND:%.*]] = icmp sgt i64 [[D]], -1
95+
; IS__CGSCC____-NEXT: br i1 [[COND]], label [[RETURN1:%.*]], label [[RETURN2:%.*]]
96+
; IS__CGSCC____: Return1:
97+
; IS__CGSCC____-NEXT: ret i32 [[C]]
98+
; IS__CGSCC____: Return2:
99+
; IS__CGSCC____-NEXT: ret i32 [[A]]
100+
;
101+
%A = load i32, i32* %X
102+
%B = load i64, i64* %Y
103+
%C = add i32 %A, 1
104+
%D = add i64 %B, 1
105+
%cond = icmp sgt i64 %D, -1
106+
br i1 %cond, label %Return1, label %Return2
107+
Return1:
108+
ret i32 %C
109+
Return2:
110+
ret i32 %A
111+
}
112+
113+
define internal i32 @caller(i32* %A) {
114+
; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@caller
115+
; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]])
116+
; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i64
117+
; IS__TUNIT_OPM-NEXT: store i64 1, i64* [[B]], align 8
118+
; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[B]])
119+
; IS__TUNIT_OPM-NEXT: ret i32 [[C]]
120+
;
121+
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller
122+
; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]])
123+
; IS__TUNIT_NPM-NEXT: [[A_PRIV:%.*]] = alloca i32
124+
; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[A_PRIV]]
125+
; IS__TUNIT_NPM-NEXT: [[B:%.*]] = alloca i64
126+
; IS__TUNIT_NPM-NEXT: store i64 1, i64* [[B]], align 8
127+
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_PRIV]], align 4
128+
; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[B]], align 8
129+
; IS__TUNIT_NPM-NEXT: [[C:%.*]] = call i32 @test(i32 [[TMP2]], i64 [[TMP3]])
130+
; IS__TUNIT_NPM-NEXT: ret i32 [[C]]
131+
;
132+
; IS__CGSCC____-LABEL: define {{[^@]+}}@caller
133+
; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]])
134+
; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i64
135+
; IS__CGSCC____-NEXT: store i64 1, i64* [[B]], align 8
136+
; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[B]])
137+
; IS__CGSCC____-NEXT: ret i32 [[C]]
138+
;
139+
%B = alloca i64
140+
store i64 1, i64* %B
141+
%C = call i32 @test(i32* %A, i64* %B)
142+
ret i32 %C
143+
}
144+
145+
define i32 @callercaller() {
146+
; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callercaller()
147+
; NOT_TUNIT_NPM-NEXT: [[B:%.*]] = alloca i32
148+
; NOT_TUNIT_NPM-NEXT: store i32 2, i32* [[B]], align 4
149+
; NOT_TUNIT_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]])
150+
; NOT_TUNIT_NPM-NEXT: ret i32 [[X]]
151+
;
152+
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callercaller()
153+
; IS__TUNIT_NPM-NEXT: [[B:%.*]] = alloca i32
154+
; IS__TUNIT_NPM-NEXT: store i32 2, i32* [[B]], align 4
155+
; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
156+
; IS__TUNIT_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32 [[TMP1]])
157+
; IS__TUNIT_NPM-NEXT: ret i32 [[X]]
158+
;
159+
%B = alloca i32
160+
store i32 2, i32* %B
161+
%X = call i32 @caller(i32* %B)
162+
ret i32 %X
163+
}

0 commit comments

Comments
 (0)