Skip to content

Commit d1a0170

Browse files
committed
fixup! Apply pattern to basic case of 4 i64 loads into fpr128 register
1 parent 150988f commit d1a0170

File tree

2 files changed

+18
-28
lines changed

2 files changed

+18
-28
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7358,12 +7358,12 @@ static bool getLoadPatterns(MachineInstr &Root,
73587358
return false;
73597359

73607360
// Verify that the subreg to reg loads an i32 into the first lane.
7361-
auto Lane0Load = CurrInstr->getOperand(2).getReg();
7362-
if (TRI->getRegSizeInBits(Lane0Load, MRI) != 32)
7361+
auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7362+
if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != 32)
73637363
return false;
73647364

73657365
// Verify that it also has a single non debug use.
7366-
if (!MRI.hasOneNonDBGUse(Lane0Load))
7366+
if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
73677367
return false;
73687368

73697369
Patterns.push_back(AArch64MachineCombinerPattern::SPLIT_LD);
@@ -8747,20 +8747,9 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
87478747
MRI.getUniqueVRegDef(Lane2Load->getOperand(1).getReg());
87488748
MachineInstr *SubregToReg =
87498749
MRI.getUniqueVRegDef(Lane1Load->getOperand(1).getReg());
8750-
MachineInstr *Lane0Load =
8751-
MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg());
87528750
const TargetRegisterClass *FPR128RegClass =
87538751
MRI.getRegClass(Root.getOperand(0).getReg());
87548752

8755-
// Some helpful lambdas to increase code reuse.
8756-
auto CreateImplicitDef = [&]() {
8757-
auto VirtReg = MRI.createVirtualRegister(FPR128RegClass);
8758-
auto DefInstr = BuildMI(MF, MIMetadata(Root),
8759-
TII->get(TargetOpcode::IMPLICIT_DEF), VirtReg);
8760-
InstrIdxForVirtReg.insert(std::make_pair(VirtReg, InsInstrs.size()));
8761-
InsInstrs.push_back(DefInstr);
8762-
return VirtReg;
8763-
};
87648753
auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr,
87658754
Register SrcRegister, unsigned Lane,
87668755
Register OffsetRegister) {
@@ -8775,25 +8764,26 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
87758764
InsInstrs.push_back(LoadIndexIntoRegister);
87768765
return NewRegister;
87778766
};
8778-
// To rewrite the pattern, we first need define new registers to
8779-
// load our results into.
8780-
Register ImplicitDefForReg0 = CreateImplicitDef();
8781-
Register ImplicitDefForReg1 = CreateImplicitDef();
87828767

8783-
// Load index 0 into register 0 lane 0.
8784-
Register Index0LoadReg = LoadLaneToRegister(
8785-
Lane0Load, ImplicitDefForReg0, 0, Lane0Load->getOperand(2).getReg());
8786-
DelInstrs.push_back(Lane0Load);
8787-
DelInstrs.push_back(SubregToReg);
8768+
// To rewrite the pattern, we first need define a new register to
8769+
// load our results into.
8770+
auto ImplicitDefForReg1 = MRI.createVirtualRegister(FPR128RegClass);
8771+
auto DefInstr =
8772+
BuildMI(MF, MIMetadata(Root), TII->get(TargetOpcode::IMPLICIT_DEF),
8773+
ImplicitDefForReg1);
8774+
InstrIdxForVirtReg.insert(
8775+
std::make_pair(ImplicitDefForReg1, InsInstrs.size()));
8776+
InsInstrs.push_back(DefInstr);
87888777

87898778
// Load index 1 into register 1 lane 0.
87908779
Register Index1LoadReg = LoadLaneToRegister(
87918780
Lane1Load, ImplicitDefForReg1, 0, Lane1Load->getOperand(3).getReg());
87928781
DelInstrs.push_back(Lane1Load);
87938782

87948783
// Load index 2 into register 0 lane 1.
8795-
auto Index2LoadReg = LoadLaneToRegister(Lane2Load, Index0LoadReg, 1,
8796-
Lane2Load->getOperand(3).getReg());
8784+
auto Index2LoadReg =
8785+
LoadLaneToRegister(Lane2Load, SubregToReg->getOperand(0).getReg(), 1,
8786+
Lane2Load->getOperand(3).getReg());
87978787
DelInstrs.push_back(Lane2Load);
87988788

87998789
// Load index 3 into register 1 lane 1.

llvm/test/CodeGen/AArch64/aarch64-combine-split-loads.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ body: |
1313
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
1414
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
1515
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
16-
; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = IMPLICIT_DEF
16+
; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
17+
; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]]
1718
; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = IMPLICIT_DEF
18-
; CHECK-NEXT: [[LD0_0:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 0, killed [[COPY1]]
1919
; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 0, killed [[COPY2]]
20-
; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[LD0_0]], 1, killed [[COPY3]]
20+
; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY3]]
2121
; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[LD1_0]], 1, killed [[COPY4]]
2222
; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]]
2323
; CHECK-NEXT: $q0 = COPY [[ZIP]]

0 commit comments

Comments
 (0)