Skip to content

Commit 7019cea

Browse files
author
Jean-Michel Gorius
committed
[CodeGen] Add support for multiple memory operands in MachineInstr::mayAlias
Summary: To support all targets, the mayAlias member function needs to support instructions with multiple operands. This revision also changes the order of the emitted instructions in some test cases. Reviewers: efriedma, hfinkel, craig.topper, dmgreen Reviewed By: efriedma Subscribers: MatzeB, dmgreen, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80161
1 parent 689e616 commit 7019cea

13 files changed

+245
-90
lines changed

llvm/lib/CodeGen/MachineInstr.cpp

Lines changed: 72 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,81 +1228,88 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
12281228
if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
12291229
return false;
12301230

1231-
// FIXME: Need to handle multiple memory operands to support all targets.
1232-
if (!hasOneMemOperand() || !Other.hasOneMemOperand())
1231+
if (memoperands_empty() || Other.memoperands_empty())
12331232
return true;
12341233

1235-
MachineMemOperand *MMOa = *memoperands_begin();
1236-
MachineMemOperand *MMOb = *Other.memoperands_begin();
1237-
1238-
// The following interface to AA is fashioned after DAGCombiner::isAlias
1239-
// and operates with MachineMemOperand offset with some important
1240-
// assumptions:
1241-
// - LLVM fundamentally assumes flat address spaces.
1242-
// - MachineOperand offset can *only* result from legalization and
1243-
// cannot affect queries other than the trivial case of overlap
1244-
// checking.
1245-
// - These offsets never wrap and never step outside
1246-
// of allocated objects.
1247-
// - There should never be any negative offsets here.
1248-
//
1249-
// FIXME: Modify API to hide this math from "user"
1250-
// Even before we go to AA we can reason locally about some
1251-
// memory objects. It can save compile time, and possibly catch some
1252-
// corner cases not currently covered.
1253-
1254-
int64_t OffsetA = MMOa->getOffset();
1255-
int64_t OffsetB = MMOb->getOffset();
1256-
int64_t MinOffset = std::min(OffsetA, OffsetB);
1257-
1258-
uint64_t WidthA = MMOa->getSize();
1259-
uint64_t WidthB = MMOb->getSize();
1260-
bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
1261-
bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
1262-
1263-
const Value *ValA = MMOa->getValue();
1264-
const Value *ValB = MMOb->getValue();
1265-
bool SameVal = (ValA && ValB && (ValA == ValB));
1266-
if (!SameVal) {
1267-
const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
1268-
const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
1269-
if (PSVa && ValB && !PSVa->mayAlias(&MFI))
1270-
return false;
1271-
if (PSVb && ValA && !PSVb->mayAlias(&MFI))
1272-
return false;
1273-
if (PSVa && PSVb && (PSVa == PSVb))
1274-
SameVal = true;
1275-
}
1234+
auto HasAlias = [&](const MachineMemOperand &MMOa,
1235+
const MachineMemOperand &MMOb) {
1236+
// The following interface to AA is fashioned after DAGCombiner::isAlias
1237+
// and operates with MachineMemOperand offset with some important
1238+
// assumptions:
1239+
// - LLVM fundamentally assumes flat address spaces.
1240+
// - MachineOperand offset can *only* result from legalization and
1241+
// cannot affect queries other than the trivial case of overlap
1242+
// checking.
1243+
// - These offsets never wrap and never step outside
1244+
// of allocated objects.
1245+
// - There should never be any negative offsets here.
1246+
//
1247+
// FIXME: Modify API to hide this math from "user"
1248+
// Even before we go to AA we can reason locally about some
1249+
// memory objects. It can save compile time, and possibly catch some
1250+
// corner cases not currently covered.
1251+
1252+
int64_t OffsetA = MMOa.getOffset();
1253+
int64_t OffsetB = MMOb.getOffset();
1254+
int64_t MinOffset = std::min(OffsetA, OffsetB);
1255+
1256+
uint64_t WidthA = MMOa.getSize();
1257+
uint64_t WidthB = MMOb.getSize();
1258+
bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
1259+
bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
1260+
1261+
const Value *ValA = MMOa.getValue();
1262+
const Value *ValB = MMOb.getValue();
1263+
bool SameVal = (ValA && ValB && (ValA == ValB));
1264+
if (!SameVal) {
1265+
const PseudoSourceValue *PSVa = MMOa.getPseudoValue();
1266+
const PseudoSourceValue *PSVb = MMOb.getPseudoValue();
1267+
if (PSVa && ValB && !PSVa->mayAlias(&MFI))
1268+
return false;
1269+
if (PSVb && ValA && !PSVb->mayAlias(&MFI))
1270+
return false;
1271+
if (PSVa && PSVb && (PSVa == PSVb))
1272+
SameVal = true;
1273+
}
1274+
1275+
if (SameVal) {
1276+
if (!KnownWidthA || !KnownWidthB)
1277+
return true;
1278+
int64_t MaxOffset = std::max(OffsetA, OffsetB);
1279+
int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
1280+
return (MinOffset + LowWidth > MaxOffset);
1281+
}
12761282

1277-
if (SameVal) {
1278-
if (!KnownWidthA || !KnownWidthB)
1283+
if (!AA)
12791284
return true;
1280-
int64_t MaxOffset = std::max(OffsetA, OffsetB);
1281-
int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
1282-
return (MinOffset + LowWidth > MaxOffset);
1283-
}
12841285

1285-
if (!AA)
1286-
return true;
1286+
if (!ValA || !ValB)
1287+
return true;
12871288

1288-
if (!ValA || !ValB)
1289-
return true;
1289+
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
1290+
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
12901291

1291-
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
1292-
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
1292+
int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
1293+
: MemoryLocation::UnknownSize;
1294+
int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
1295+
: MemoryLocation::UnknownSize;
12931296

1294-
int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
1295-
: MemoryLocation::UnknownSize;
1296-
int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
1297-
: MemoryLocation::UnknownSize;
1297+
AliasResult AAResult =
1298+
AA->alias(MemoryLocation(ValA, OverlapA,
1299+
UseTBAA ? MMOa.getAAInfo() : AAMDNodes()),
1300+
MemoryLocation(ValB, OverlapB,
1301+
UseTBAA ? MMOb.getAAInfo() : AAMDNodes()));
12981302

1299-
AliasResult AAResult = AA->alias(
1300-
MemoryLocation(ValA, OverlapA,
1301-
UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
1302-
MemoryLocation(ValB, OverlapB,
1303-
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
1303+
return (AAResult != NoAlias);
1304+
};
13041305

1305-
return (AAResult != NoAlias);
1306+
for (auto &&MMOa : memoperands()) {
1307+
for (auto &&MMOb : Other.memoperands()) {
1308+
if (HasAlias(*MMOa, *MMOb))
1309+
return true;
1310+
}
1311+
}
1312+
return false;
13061313
}
13071314

13081315
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered

llvm/lib/CodeGen/ScheduleDAGInstrs.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,9 +544,14 @@ static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) {
544544
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
545545
unsigned Latency) {
546546
if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
547+
LLVM_DEBUG(dbgs() << "Adding chain dependency\n from: " << *SUb->getInstr()
548+
<< " to: " << *SUa->getInstr());
547549
SDep Dep(SUa, SDep::MayAliasMem);
548550
Dep.setLatency(Latency);
549551
SUb->addPred(Dep);
552+
} else {
553+
LLVM_DEBUG(dbgs() << "Not adding chain dependency\n from: "
554+
<< *SUb->getInstr() << " to: " << *SUa->getInstr());
550555
}
551556
}
552557

llvm/test/CodeGen/AArch64/merge-store-dependency.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg
1919
; A53-NEXT: mov x19, x8
2020
; A53-NEXT: mov w0, w1
2121
; A53-NEXT: mov w9, #256
22+
; A53-NEXT: stp x2, x3, [x8, #32]
23+
; A53-NEXT: mov x2, x8
2224
; A53-NEXT: str q0, [x19, #16]!
2325
; A53-NEXT: str w1, [x19]
2426
; A53-NEXT: mov w1, #4
25-
; A53-NEXT: stp x2, x3, [x8, #32]
26-
; A53-NEXT: mov x2, x8
2727
; A53-NEXT: str q0, [x8]
2828
; A53-NEXT: strh w9, [x8, #24]
2929
; A53-NEXT: str wzr, [x8, #20]

llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,12 +503,12 @@ define void @conv_v8f16_to_i128( <8 x half> %a, i128* %store ) {
503503
; CHECK-NEXT: vmov.32 r3, d16[1]
504504
; CHECK-NEXT: vmov.32 r1, d16[0]
505505
; CHECK-NEXT: subs r12, r12, #1
506+
; CHECK-NEXT: str r12, [r0, #12]
506507
; CHECK-NEXT: sbcs r2, r2, #0
508+
; CHECK-NEXT: str r2, [r0, #8]
507509
; CHECK-NEXT: sbcs r3, r3, #0
508510
; CHECK-NEXT: sbc r1, r1, #0
509511
; CHECK-NEXT: stm r0, {r1, r3}
510-
; CHECK-NEXT: str r2, [r0, #8]
511-
; CHECK-NEXT: str r12, [r0, #12]
512512
; CHECK-NEXT: bx lr
513513
; CHECK-NEXT: .p2align 4
514514
; CHECK-NEXT: @ %bb.1:

llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
; CHECK: ********** MI Scheduling **********
1010
; We need second, post-ra scheduling to have VLDM instruction combined from single-loads
1111
; CHECK: ********** MI Scheduling **********
12-
; CHECK: VLDMDIA_UPD
12+
; CHECK: SU(1):{{.*}}VLDMDIA_UPD
1313
; CHECK: rdefs left
1414
; CHECK-NEXT: Latency : 6
1515
; CHECK: Successors:

llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
; We need second, post-ra scheduling to have VSTM instruction combined from single-stores
66
; CHECK: ********** MI Scheduling **********
77
; CHECK: schedule starting
8-
; CHECK: VSTMDIA_UPD
8+
; CHECK: SU(2):{{.*}}VSTMDIA_UPD
99
; CHECK: rdefs left
1010
; CHECK-NEXT: Latency : 4
1111
; CHECK: Successors:

llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
; We need second, post-ra scheduling to have VSTM instruction combined from single-stores
66
; CHECK: ********** MI Scheduling **********
77
; CHECK: schedule starting
8-
; CHECK: VSTMDIA
8+
; CHECK: SU(3):{{.*}}VSTMDIA
99
; CHECK: rdefs left
1010
; CHECK-NEXT: Latency : 2
1111

llvm/test/CodeGen/Thumb2/mve-float32regloops.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,6 +1092,7 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
10921092
; CHECK-NEXT: ldrd lr, r10, [r12, #24]
10931093
; CHECK-NEXT: vstrb.8 q0, [r11], #16
10941094
; CHECK-NEXT: vldrw.u32 q0, [r8], #32
1095+
; CHECK-NEXT: strd r11, r1, [sp, #24] @ 8-byte Folded Spill
10951096
; CHECK-NEXT: vldrw.u32 q1, [r8, #-28]
10961097
; CHECK-NEXT: vmul.f32 q0, q0, r0
10971098
; CHECK-NEXT: vldrw.u32 q6, [r8, #-24]
@@ -1103,13 +1104,12 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
11031104
; CHECK-NEXT: vfma.f32 q0, q4, r6
11041105
; CHECK-NEXT: vldrw.u32 q3, [r8, #-8]
11051106
; CHECK-NEXT: vfma.f32 q0, q5, r5
1106-
; CHECK-NEXT: vldrw.u32 q1, [r8, #-4]
1107-
; CHECK-NEXT: vfma.f32 q0, q2, r3
11081107
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
1108+
; CHECK-NEXT: vfma.f32 q0, q2, r3
1109+
; CHECK-NEXT: vldrw.u32 q1, [r8, #-4]
11091110
; CHECK-NEXT: vfma.f32 q0, q3, lr
1110-
; CHECK-NEXT: strd r11, r1, [sp, #24] @ 8-byte Folded Spill
1111-
; CHECK-NEXT: vfma.f32 q0, q1, r10
11121111
; CHECK-NEXT: cmp r0, #16
1112+
; CHECK-NEXT: vfma.f32 q0, q1, r10
11131113
; CHECK-NEXT: blo .LBB16_7
11141114
; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
11151115
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1

llvm/test/CodeGen/Thumb2/mve-phireg.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,30 +168,30 @@ define dso_local i32 @e() #0 {
168168
; CHECK-NEXT: vmov q1, q4
169169
; CHECK-NEXT: vmov s1, r7
170170
; CHECK-NEXT: vmov.32 q1[1], r6
171-
; CHECK-NEXT: mov.w r10, #0
172-
; CHECK-NEXT: vmov.32 q1[2], r5
173171
; CHECK-NEXT: vmov.32 q5[0], r7
172+
; CHECK-NEXT: vmov.32 q1[2], r5
173+
; CHECK-NEXT: vmov s9, r4
174174
; CHECK-NEXT: vmov.32 q1[3], r4
175-
; CHECK-NEXT: strd r0, r10, [sp, #24]
175+
; CHECK-NEXT: vdup.32 q6, r7
176176
; CHECK-NEXT: vstrw.32 q1, [sp, #76]
177177
; CHECK-NEXT: vmov q1, q5
178-
; CHECK-NEXT: vmov s9, r4
179178
; CHECK-NEXT: vmov.32 q1[1], r7
180-
; CHECK-NEXT: vdup.32 q6, r7
181179
; CHECK-NEXT: vmov.f32 s2, s1
182180
; CHECK-NEXT: vmov.f32 s8, s0
183181
; CHECK-NEXT: vmov.32 q1[2], r6
184182
; CHECK-NEXT: vmov q3, q6
185183
; CHECK-NEXT: vmov q7, q6
186184
; CHECK-NEXT: vmov.f32 s10, s1
187185
; CHECK-NEXT: mov.w r8, #4
186+
; CHECK-NEXT: mov.w r10, #0
188187
; CHECK-NEXT: vmov.32 q1[3], r4
189188
; CHECK-NEXT: vmov.32 q3[0], r4
190189
; CHECK-NEXT: vmov.32 q7[1], r4
191190
; CHECK-NEXT: str r1, [r0]
192191
; CHECK-NEXT: vmov.f32 s11, s3
193192
; CHECK-NEXT: movs r1, #64
194193
; CHECK-NEXT: strh.w r8, [sp, #390]
194+
; CHECK-NEXT: strd r0, r10, [sp, #24]
195195
; CHECK-NEXT: vstrw.32 q0, [sp, #44]
196196
; CHECK-NEXT: str r0, [r0]
197197
; CHECK-NEXT: vstrw.32 q2, [r0]

llvm/test/CodeGen/Thumb2/mve-vst3.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) {
2424
; CHECK-NEXT: vmov.f32 s9, s6
2525
; CHECK-NEXT: vmov.f32 s10, s0
2626
; CHECK-NEXT: vmov.f32 s11, s5
27-
; CHECK-NEXT: strd r2, r0, [r1, #16]
2827
; CHECK-NEXT: vstrw.32 q2, [r1]
28+
; CHECK-NEXT: strd r2, r0, [r1, #16]
2929
; CHECK-NEXT: pop {r4, pc}
3030
entry:
3131
%s1 = getelementptr <2 x i32>, <2 x i32>* %src, i32 0

0 commit comments

Comments
 (0)