@@ -7373,9 +7373,8 @@ static bool getMiscPatterns(MachineInstr &Root,
7373
7373
}
7374
7374
7375
7375
static bool getGatherPattern (MachineInstr &Root,
7376
- SmallVectorImpl<unsigned > &Patterns,
7377
- unsigned LoadLaneOpCode,
7378
- unsigned NumLanes) {
7376
+ SmallVectorImpl<unsigned > &Patterns,
7377
+ unsigned LoadLaneOpCode, unsigned NumLanes) {
7379
7378
const MachineRegisterInfo &MRI = Root.getMF ()->getRegInfo ();
7380
7379
const TargetRegisterInfo *TRI =
7381
7380
Root.getMF ()->getSubtarget ().getRegisterInfo ();
@@ -7386,7 +7385,8 @@ static bool getGatherPattern(MachineInstr &Root,
7386
7385
7387
7386
// Check that we have load into all lanes except lane 0.
7388
7387
// For each load we also want to check that:
7389
- // 1. It has a single debug use (since we will be replacing the virtual register)
7388
+ // 1. It has a single debug use (since we will be replacing the virtual
7389
+ // register)
7390
7390
// 2. That the addressing mode only uses a single offset register.
7391
7391
auto *CurrInstr = MRI.getUniqueVRegDef (Root.getOperand (1 ).getReg ());
7392
7392
auto Range = llvm::seq<unsigned >(1 , NumLanes - 1 );
@@ -7417,17 +7417,17 @@ static bool getGatherPattern(MachineInstr &Root,
7417
7417
return false ;
7418
7418
7419
7419
switch (NumLanes) {
7420
- case 4 :
7421
- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i32);
7422
- break ;
7423
- case 8 :
7424
- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i16);
7425
- break ;
7426
- case 16 :
7427
- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i8);
7428
- break ;
7429
- default :
7430
- llvm_unreachable (" Got bad number of lanes for gather pattern." );
7420
+ case 4 :
7421
+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i32);
7422
+ break ;
7423
+ case 8 :
7424
+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i16);
7425
+ break ;
7426
+ case 16 :
7427
+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i8);
7428
+ break ;
7429
+ default :
7430
+ llvm_unreachable (" Got bad number of lanes for gather pattern." );
7431
7431
}
7432
7432
7433
7433
return true ;
@@ -7441,23 +7441,24 @@ static bool getLoadPatterns(MachineInstr &Root,
7441
7441
7442
7442
// The pattern searches for loads into single lanes.
7443
7443
switch (Root.getOpcode ()) {
7444
- case AArch64::LD1i32:
7445
- return getGatherPattern (Root, Patterns, Root.getOpcode (), 4 );
7446
- case AArch64::LD1i16:
7447
- return getGatherPattern (Root, Patterns, Root.getOpcode (), 8 );
7448
- case AArch64::LD1i8:
7449
- return getGatherPattern (Root, Patterns, Root.getOpcode (), 16 );
7450
- default :
7451
- return false ;
7444
+ case AArch64::LD1i32:
7445
+ return getGatherPattern (Root, Patterns, Root.getOpcode (), 4 );
7446
+ case AArch64::LD1i16:
7447
+ return getGatherPattern (Root, Patterns, Root.getOpcode (), 8 );
7448
+ case AArch64::LD1i8:
7449
+ return getGatherPattern (Root, Patterns, Root.getOpcode (), 16 );
7450
+ default :
7451
+ return false ;
7452
7452
}
7453
7453
}
7454
7454
7455
- static void generateGatherPattern (
7456
- MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7457
- SmallVectorImpl<MachineInstr *> &DelInstrs,
7458
- DenseMap<Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern,
7459
- unsigned NumLanes) {
7460
-
7455
+ static void
7456
+ generateGatherPattern (MachineInstr &Root,
7457
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
7458
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
7459
+ DenseMap<Register, unsigned > &InstrIdxForVirtReg,
7460
+ unsigned Pattern, unsigned NumLanes) {
7461
+
7461
7462
MachineFunction &MF = *Root.getParent ()->getParent ();
7462
7463
MachineRegisterInfo &MRI = MF.getRegInfo ();
7463
7464
const TargetInstrInfo *TII = MF.getSubtarget ().getInstrInfo ();
@@ -7469,7 +7470,7 @@ static void generateGatherPattern(
7469
7470
LoadToLaneInstrs.push_back (CurrInstr);
7470
7471
CurrInstr = MRI.getUniqueVRegDef (CurrInstr->getOperand (1 ).getReg ());
7471
7472
}
7472
-
7473
+
7473
7474
MachineInstr *SubregToReg = CurrInstr;
7474
7475
LoadToLaneInstrs.push_back (
7475
7476
MRI.getUniqueVRegDef (SubregToReg->getOperand (2 ).getReg ()));
@@ -7494,26 +7495,27 @@ static void generateGatherPattern(
7494
7495
};
7495
7496
7496
7497
// Helper to create load instruction based on opcode
7497
- auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
7498
- Register OffsetReg) -> MachineInstrBuilder {
7499
- unsigned Opcode;
7500
- switch (NumLanes) {
7501
- case 4 :
7502
- Opcode = AArch64::LDRSui;
7503
- break ;
7504
- case 8 :
7505
- Opcode = AArch64::LDRHui;
7506
- break ;
7507
- case 16 :
7508
- Opcode = AArch64::LDRBui;
7509
- break ;
7510
- default :
7511
- llvm_unreachable (" Got unsupported number of lanes in machine-combiner gather pattern" );
7512
- }
7513
- // Immediate offset load
7514
- return BuildMI (MF, MIMetadata (Root), TII->get (Opcode), DestReg)
7515
- .addReg (OffsetReg)
7516
- .addImm (0 ); // immediate offset
7498
+ auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
7499
+ Register OffsetReg) -> MachineInstrBuilder {
7500
+ unsigned Opcode;
7501
+ switch (NumLanes) {
7502
+ case 4 :
7503
+ Opcode = AArch64::LDRSui;
7504
+ break ;
7505
+ case 8 :
7506
+ Opcode = AArch64::LDRHui;
7507
+ break ;
7508
+ case 16 :
7509
+ Opcode = AArch64::LDRBui;
7510
+ break ;
7511
+ default :
7512
+ llvm_unreachable (
7513
+ " Got unsupported number of lanes in machine-combiner gather pattern" );
7514
+ }
7515
+ // Immediate offset load
7516
+ return BuildMI (MF, MIMetadata (Root), TII->get (Opcode), DestReg)
7517
+ .addReg (OffsetReg)
7518
+ .addImm (0 ); // immediate offset
7517
7519
};
7518
7520
7519
7521
// Load the remaining lanes into register 0.
@@ -7522,40 +7524,45 @@ static void generateGatherPattern(
7522
7524
LoadToLaneInstrsAscending.begin () + NumLanes / 2 );
7523
7525
auto PrevReg = SubregToReg->getOperand (0 ).getReg ();
7524
7526
for (auto [Index, LoadInstr] : llvm::enumerate (LanesToLoadToReg0)) {
7525
- PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 , LoadInstr->getOperand (3 ).getReg ());
7527
+ PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 ,
7528
+ LoadInstr->getOperand (3 ).getReg ());
7526
7529
DelInstrs.push_back (LoadInstr);
7527
7530
}
7528
7531
auto LastLoadReg0 = PrevReg;
7529
7532
7530
- // First load into register 1. Perform a LDRSui to zero out the upper lanes in a single instruction.
7533
+ // First load into register 1. Perform a LDRSui to zero out the upper lanes in
7534
+ // a single instruction.
7531
7535
auto Lane0Load = *LoadToLaneInstrsAscending.begin ();
7532
- auto OriginalSplitLoad = *std::next (LoadToLaneInstrsAscending.begin (), NumLanes / 2 );
7536
+ auto OriginalSplitLoad =
7537
+ *std::next (LoadToLaneInstrsAscending.begin (), NumLanes / 2 );
7533
7538
auto DestRegForMiddleIndex = MRI.createVirtualRegister (
7534
7539
MRI.getRegClass (Lane0Load->getOperand (0 ).getReg ()));
7535
-
7536
- MachineInstrBuilder MiddleIndexLoadInstr = CreateLoadInstruction (
7537
- NumLanes, DestRegForMiddleIndex,
7538
- OriginalSplitLoad->getOperand (3 ).getReg ());
7539
-
7540
- InstrIdxForVirtReg.insert (std::make_pair (DestRegForMiddleIndex, InsInstrs.size ()));
7540
+
7541
+ MachineInstrBuilder MiddleIndexLoadInstr =
7542
+ CreateLoadInstruction (NumLanes, DestRegForMiddleIndex,
7543
+ OriginalSplitLoad->getOperand (3 ).getReg ());
7544
+
7545
+ InstrIdxForVirtReg.insert (
7546
+ std::make_pair (DestRegForMiddleIndex, InsInstrs.size ()));
7541
7547
InsInstrs.push_back (MiddleIndexLoadInstr);
7542
7548
DelInstrs.push_back (OriginalSplitLoad);
7543
7549
7544
7550
// Subreg To Reg instruction for register 1.
7545
7551
auto DestRegForSubregToReg = MRI.createVirtualRegister (FPR128RegClass);
7546
7552
unsigned SubregType;
7547
7553
switch (NumLanes) {
7548
- case 4 :
7549
- SubregType = AArch64::ssub;
7550
- break ;
7551
- case 8 :
7552
- SubregType = AArch64::hsub;
7553
- break ;
7554
- case 16 :
7555
- SubregType = AArch64::bsub;
7556
- break ;
7557
- default :
7558
- llvm_unreachable (" Got invalid NumLanes for machine-combiner gather pattern" );
7554
+ case 4 :
7555
+ SubregType = AArch64::ssub;
7556
+ break ;
7557
+ case 8 :
7558
+ SubregType = AArch64::hsub;
7559
+ break ;
7560
+ case 16 :
7561
+ SubregType = AArch64::bsub;
7562
+ break ;
7563
+ default :
7564
+ llvm_unreachable (
7565
+ " Got invalid NumLanes for machine-combiner gather pattern" );
7559
7566
}
7560
7567
7561
7568
auto SubRegToRegInstr =
@@ -7569,11 +7576,13 @@ static void generateGatherPattern(
7569
7576
InsInstrs.push_back (SubRegToRegInstr);
7570
7577
7571
7578
// Load remaining lanes into register 1.
7572
- auto LanesToLoadToReg1 = llvm::make_range (
7573
- LoadToLaneInstrsAscending.begin () + NumLanes / 2 + 1 , LoadToLaneInstrsAscending.end ());
7579
+ auto LanesToLoadToReg1 =
7580
+ llvm::make_range (LoadToLaneInstrsAscending.begin () + NumLanes / 2 + 1 ,
7581
+ LoadToLaneInstrsAscending.end ());
7574
7582
PrevReg = SubRegToRegInstr->getOperand (0 ).getReg ();
7575
7583
for (auto [Index, LoadInstr] : llvm::enumerate (LanesToLoadToReg1)) {
7576
- PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 , LoadInstr->getOperand (3 ).getReg ());
7584
+ PrevReg = LoadLaneToRegister (LoadInstr, PrevReg, Index + 1 ,
7585
+ LoadInstr->getOperand (3 ).getReg ());
7577
7586
if (Index == NumLanes / 2 - 2 ) {
7578
7587
break ;
7579
7588
}
@@ -8967,11 +8976,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
8967
8976
break ;
8968
8977
}
8969
8978
case AArch64MachineCombinerPattern::GATHER_i16: {
8970
- generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, Pattern, 8 );
8979
+ generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
8980
+ Pattern, 8 );
8971
8981
break ;
8972
8982
}
8973
8983
case AArch64MachineCombinerPattern::GATHER_i8: {
8974
- generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, Pattern, 16 );
8984
+ generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
8985
+ Pattern, 16 );
8975
8986
break ;
8976
8987
}
8977
8988
0 commit comments