Skip to content

Commit 5ca5b93

Browse files
committed
MemCpyOpt: replace an AA query with MSSA query
An AA query in processStoreOfLoad misses certain cases, and has been marked as a TODO. Replace it with an MSSA query to increase MemCpyOpt's power, fixing the long-standing TODO.
1 parent d4f6ad5 commit 5ca5b93

File tree

2 files changed

+26
-30
lines changed

2 files changed

+26
-30
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -647,19 +647,18 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
647647
(EnableMemCpyOptWithoutLibcalls ||
648648
(TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
649649
MemoryLocation LoadLoc = MemoryLocation::get(LI);
650-
651-
// We use alias analysis to check if an instruction may store to
652-
// the memory we load from in between the load and the store. If
653-
// such an instruction is found, we try to promote there instead
654-
// of at the store position.
655-
// TODO: Can use MSSA for this.
656-
Instruction *P = SI;
657-
for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
658-
if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
659-
P = &I;
660-
break;
661-
}
662-
}
650+
MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI),
651+
*StoreAccess = MSSA->getMemoryAccess(SI);
652+
653+
// We use MSSA to check if an instruction may store to the memory we load
654+
// from in between the load and the store. If such an instruction is found,
655+
// we try to promote there instead of at the store position.
656+
BatchAAResults BAA(*AA);
657+
auto *Clobber =
658+
cast<MemoryUseOrDef>(MSSA->getWalker()->getClobberingMemoryAccess(
659+
StoreAccess, LoadLoc, BAA));
660+
Instruction *P =
661+
MSSA->dominates(LoadAccess, Clobber) ? Clobber->getMemoryInst() : SI;
663662

664663
// If we found an instruction that may write to the loaded memory,
665664
// we can try to promote at this position instead of the store

llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,8 @@ define void @noaliasdst(ptr %src, ptr noalias %dst) {
3838

3939
define void @destroysrc(ptr %src, ptr %dst) {
4040
; CHECK-LABEL: @destroysrc(
41-
; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
42-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
43-
; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST:%.*]], align 8
41+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, i64 16, i1 false)
42+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
4443
; CHECK-NEXT: ret void
4544
;
4645
%1 = load %S, ptr %src
@@ -51,8 +50,8 @@ define void @destroysrc(ptr %src, ptr %dst) {
5150

5251
define void @destroynoaliassrc(ptr noalias %src, ptr %dst) {
5352
; CHECK-LABEL: @destroynoaliassrc(
54-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
55-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, i64 16, i1 false)
53+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
54+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
5655
; CHECK-NEXT: ret void
5756
;
5857
%1 = load %S, ptr %src
@@ -63,9 +62,8 @@ define void @destroynoaliassrc(ptr noalias %src, ptr %dst) {
6362

6463
define void @copyalias(ptr %src, ptr %dst) {
6564
; CHECK-LABEL: @copyalias(
66-
; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
67-
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
68-
; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST]], align 8
65+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
66+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST]], ptr align 8 [[SRC]], i64 16, i1 false)
6967
; CHECK-NEXT: ret void
7068
;
7169
%1 = load %S, ptr %src
@@ -79,9 +77,9 @@ define void @copyalias(ptr %src, ptr %dst) {
7977
; sure we lift the computation as well if needed and possible.
8078
define void @addrproducer(ptr %src, ptr %dst) {
8179
; CHECK-LABEL: @addrproducer(
80+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
8281
; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i64 1
8382
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
84-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
8583
; CHECK-NEXT: ret void
8684
;
8785
%1 = load %S, ptr %src
@@ -93,11 +91,10 @@ define void @addrproducer(ptr %src, ptr %dst) {
9391

9492
define void @aliasaddrproducer(ptr %src, ptr %dst, ptr %dstidptr) {
9593
; CHECK-LABEL: @aliasaddrproducer(
96-
; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
9794
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
9895
; CHECK-NEXT: [[DSTINDEX:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
99-
; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S]], ptr [[DST]], i32 [[DSTINDEX]]
100-
; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST2]], align 8
96+
; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i32 [[DSTINDEX]]
97+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
10198
; CHECK-NEXT: ret void
10299
;
103100
%1 = load %S, ptr %src
@@ -110,11 +107,11 @@ define void @aliasaddrproducer(ptr %src, ptr %dst, ptr %dstidptr) {
110107

111108
define void @noaliasaddrproducer(ptr %src, ptr noalias %dst, ptr noalias %dstidptr) {
112109
; CHECK-LABEL: @noaliasaddrproducer(
113-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
114-
; CHECK-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP2]], 1
110+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
111+
; CHECK-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP1]], 1
115112
; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i32 [[DSTINDEX]]
116-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC]], i64 16, i1 false)
117-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 undef, i64 16, i1 false)
113+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
114+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 undef, i64 16, i1 false)
118115
; CHECK-NEXT: ret void
119116
;
120117
%1 = load %S, ptr %src
@@ -130,7 +127,7 @@ define void @throwing_call(ptr noalias %src, ptr %dst) {
130127
; CHECK-LABEL: @throwing_call(
131128
; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
132129
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
133-
; CHECK-NEXT: call void @call() [[ATTR2:#.*]]
130+
; CHECK-NEXT: call void @call() #[[ATTR2:[0-9]+]]
134131
; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST:%.*]], align 8
135132
; CHECK-NEXT: ret void
136133
;

0 commit comments

Comments
 (0)