Skip to content

Commit 72f569a

Browse files
committed
code review comment - florian
1 parent bb87017 commit 72f569a

File tree

2 files changed

+110
-7
lines changed

2 files changed

+110
-7
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7399,21 +7399,22 @@ static bool getMiscPatterns(MachineInstr &Root,
73997399
static bool getGatherPattern(MachineInstr &Root,
74007400
SmallVectorImpl<unsigned> &Patterns,
74017401
unsigned LoadLaneOpCode, unsigned NumLanes) {
7402+
const MachineFunction *MF = Root.getMF();
7403+
74027404
// Early exit if optimizing for size.
7403-
if (Root.getMF()->getFunction().hasMinSize())
7405+
if (MF->getFunction().hasMinSize())
74047406
return false;
74057407

7406-
const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
7407-
const TargetRegisterInfo *TRI =
7408-
Root.getMF()->getSubtarget().getRegisterInfo();
7408+
const MachineRegisterInfo &MRI = MF->getRegInfo();
7409+
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
74097410

74107411
// The root of the pattern must load into the last lane of the vector.
74117412
if (Root.getOperand(2).getImm() != NumLanes - 1)
74127413
return false;
74137414

74147415
// Check that we have load into all lanes except lane 0.
74157416
// For each load we also want to check that:
7416-
// 1. It has a single debug use (since we will be replacing the virtual
7417+
// 1. It has a single non-debug use (since we will be replacing the virtual
74177418
// register)
74187419
// 2. That the addressing mode only uses a single offset register.
74197420
auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
@@ -7499,6 +7500,12 @@ generateGatherPattern(MachineInstr &Root,
74997500
CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
75007501
}
75017502

7503+
// Sort the load instructions according to the lane.
7504+
llvm::sort(LoadToLaneInstrs,
7505+
[](const MachineInstr *A, const MachineInstr *B) {
7506+
return A->getOperand(2).getImm() > B->getOperand(2).getImm();
7507+
});
7508+
75027509
MachineInstr *SubregToReg = CurrInstr;
75037510
LoadToLaneInstrs.push_back(
75047511
MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));

llvm/test/CodeGen/AArch64/aarch64-combine-split-loads.mir

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,12 +252,12 @@ body: |
252252
RET_ReallyLR implicit $q0
253253
254254
---
255-
name: negative_pattern
255+
name: negative_pattern_missing_lanes
256256
body: |
257257
bb.0.entry:
258258
liveins: $x0, $x1
259259
260-
; CHECK-LABEL: name: negative_pattern
260+
; CHECK-LABEL: name: negative_pattern_missing_lanes
261261
; CHECK: [[LD1:%.*]]:fpr128 = LDRQui $x1, 0
262262
; CHECK-NEXT: [[LD2:%.*]]:fpr128 = LD1i32 [[LD1]]
263263
@@ -266,3 +266,99 @@ body: |
266266
%2:fpr128 = LD1i32 %1, 3, %0
267267
$q0 = COPY %2
268268
RET_ReallyLR implicit $q0
269+
270+
---
271+
name: out_of_order_lanes
272+
body: |
273+
bb.0.entry:
274+
liveins: $x0, $x1, $x2, $x3, $x4
275+
276+
; CHECK-LABEL: name: out_of_order_lanes
277+
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
278+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
279+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
280+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
281+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
282+
; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
283+
; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
284+
; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY3]]
285+
; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY2]], 0
286+
; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub
287+
; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY4]]
288+
; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]]
289+
; CHECK-NEXT: $q0 = COPY [[ZIP]]
290+
; CHECK-NEXT: RET_ReallyLR implicit $q0
291+
%0:gpr64common = COPY $x0
292+
%1:gpr64common = COPY $x1
293+
%2:gpr64common = COPY $x2
294+
%3:gpr64common = COPY $x3
295+
%4:gpr64common = COPY $x4
296+
%5:fpr32 = LDRSroX %0, killed %1, 0, 1
297+
%6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
298+
%7:fpr128 = LD1i32 %6, 2, killed %2
299+
%8:fpr128 = LD1i32 %7, 1, killed %3
300+
%9:fpr128 = LD1i32 %8, 3, killed %4
301+
$q0 = COPY %9
302+
RET_ReallyLR implicit $q0
303+
304+
---
305+
name: negative_pattern_no_subreg_to_reg
306+
body: |
307+
bb.0.entry:
308+
liveins: $x0, $x1, $x2, $x3
309+
310+
; CHECK-LABEL: name: negative_pattern_no_subreg_to_reg
311+
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
312+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
313+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
314+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
315+
; CHECK-NEXT: [[INITIAL_VEC:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
316+
; CHECK-NEXT: [[LD_LANE_1:%[0-9]+]]:fpr128 = LD1i32 [[INITIAL_VEC]], 1, killed [[COPY1]]
317+
; CHECK-NEXT: [[LD_LANE_2:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_1]], 2, killed [[COPY2]]
318+
; CHECK-NEXT: [[LD_LANE_3:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_2]], 3, killed [[COPY3]]
319+
; CHECK-NEXT: $q0 = COPY [[LD_LANE_3]]
320+
; CHECK-NEXT: RET_ReallyLR implicit $q0
321+
%0:gpr64common = COPY $x0
322+
%1:gpr64common = COPY $x1
323+
%2:gpr64common = COPY $x2
324+
%3:gpr64common = COPY $x3
325+
%4:fpr128 = LDRQui %0, 0
326+
%5:fpr128 = LD1i32 %4, 1, killed %1
327+
%6:fpr128 = LD1i32 %5, 2, killed %2
328+
%7:fpr128 = LD1i32 %6, 3, killed %3
329+
$q0 = COPY %7
330+
RET_ReallyLR implicit $q0
331+
332+
---
333+
name: negative_pattern_multiple_users
334+
body: |
335+
bb.0.entry:
336+
liveins: $x0, $x1, $x2, $x3, $x4
337+
338+
; CHECK-LABEL: name: negative_pattern_multiple_users
339+
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
340+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
341+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
342+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
343+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
344+
; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
345+
; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
346+
; CHECK-NEXT: [[LD_LANE_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY2]]
347+
; CHECK-NEXT: [[LD_LANE_2:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_1]], 2, killed [[COPY3]]
348+
; CHECK-NEXT: [[LD_LANE_3:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_2]], 3, killed [[COPY4]]
349+
; CHECK-NEXT: $q0 = COPY [[LD_LANE_3]]
350+
; CHECK-NEXT: $q1 = COPY [[LD_LANE_2]]
351+
; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1
352+
%0:gpr64common = COPY $x0
353+
%1:gpr64common = COPY $x1
354+
%2:gpr64common = COPY $x2
355+
%3:gpr64common = COPY $x3
356+
%4:gpr64common = COPY $x4
357+
%5:fpr32 = LDRSroX %0, killed %1, 0, 1
358+
%6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
359+
%7:fpr128 = LD1i32 %6, 1, killed %2
360+
%8:fpr128 = LD1i32 %7, 2, killed %3
361+
%9:fpr128 = LD1i32 %8, 3, killed %4
362+
$q0 = COPY %9
363+
$q1 = COPY %8
364+
RET_ReallyLR implicit $q0, implicit $q1

0 commit comments

Comments
 (0)