@@ -7329,9 +7329,9 @@ bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {
7329
7329
case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
7330
7330
case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
7331
7331
case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
7332
- case AArch64MachineCombinerPattern::GATHER_i32 :
7333
- case AArch64MachineCombinerPattern::GATHER_i16 :
7334
- case AArch64MachineCombinerPattern::GATHER_i8 :
7332
+ case AArch64MachineCombinerPattern::GATHER_LANE_i32 :
7333
+ case AArch64MachineCombinerPattern::GATHER_LANE_i16 :
7334
+ case AArch64MachineCombinerPattern::GATHER_LANE_i8 :
7335
7335
return true ;
7336
7336
} // end switch (Pattern)
7337
7337
return false ;
@@ -7375,6 +7375,10 @@ static bool getMiscPatterns(MachineInstr &Root,
7375
7375
static bool getGatherPattern (MachineInstr &Root,
7376
7376
SmallVectorImpl<unsigned > &Patterns,
7377
7377
unsigned LoadLaneOpCode, unsigned NumLanes) {
7378
+ // Early exit if optimizing for size.
7379
+ if (Root.getMF ()->getFunction ().hasMinSize ())
7380
+ return false ;
7381
+
7378
7382
const MachineRegisterInfo &MRI = Root.getMF ()->getRegInfo ();
7379
7383
const TargetRegisterInfo *TRI =
7380
7384
Root.getMF ()->getSubtarget ().getRegisterInfo ();
@@ -7391,7 +7395,7 @@ static bool getGatherPattern(MachineInstr &Root,
7391
7395
auto *CurrInstr = MRI.getUniqueVRegDef (Root.getOperand (1 ).getReg ());
7392
7396
auto Range = llvm::seq<unsigned >(1 , NumLanes - 1 );
7393
7397
SmallSet<unsigned , 4 > RemainingLanes (Range.begin (), Range.end ());
7394
- while (RemainingLanes.begin () != RemainingLanes. end () &&
7398
+ while (! RemainingLanes.empty () && CurrInstr &&
7395
7399
CurrInstr->getOpcode () == LoadLaneOpCode &&
7396
7400
MRI.hasOneNonDBGUse (CurrInstr->getOperand (0 ).getReg ()) &&
7397
7401
CurrInstr->getNumOperands () == 4 ) {
@@ -7418,13 +7422,13 @@ static bool getGatherPattern(MachineInstr &Root,
7418
7422
7419
7423
switch (NumLanes) {
7420
7424
case 4 :
7421
- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i32 );
7425
+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_LANE_i32 );
7422
7426
break ;
7423
7427
case 8 :
7424
- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i16 );
7428
+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_LANE_i16 );
7425
7429
break ;
7426
7430
case 16 :
7427
- Patterns.push_back (AArch64MachineCombinerPattern::GATHER_i8 );
7431
+ Patterns.push_back (AArch64MachineCombinerPattern::GATHER_LANE_i8 );
7428
7432
break ;
7429
7433
default :
7430
7434
llvm_unreachable (" Got bad number of lanes for gather pattern." );
@@ -7434,8 +7438,8 @@ static bool getGatherPattern(MachineInstr &Root,
7434
7438
}
7435
7439
7436
7440
// / Search for patterns where we use LD1 instructions to load into
7437
- // / separate lanes of an 128 bit Neon register. We can increase MLP
7438
- // / by loading into 2 Neon registers instead.
7441
+ // / separate lanes of an 128 bit Neon register. We can increase Memory Level
7442
+ // / Parallelism by loading into 2 Neon registers instead.
7439
7443
static bool getLoadPatterns (MachineInstr &Root,
7440
7444
SmallVectorImpl<unsigned > &Patterns) {
7441
7445
@@ -7604,9 +7608,9 @@ AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
7604
7608
switch (Pattern) {
7605
7609
case AArch64MachineCombinerPattern::SUBADD_OP1:
7606
7610
case AArch64MachineCombinerPattern::SUBADD_OP2:
7607
- case AArch64MachineCombinerPattern::GATHER_i32 :
7608
- case AArch64MachineCombinerPattern::GATHER_i16 :
7609
- case AArch64MachineCombinerPattern::GATHER_i8 :
7611
+ case AArch64MachineCombinerPattern::GATHER_LANE_i32 :
7612
+ case AArch64MachineCombinerPattern::GATHER_LANE_i16 :
7613
+ case AArch64MachineCombinerPattern::GATHER_LANE_i8 :
7610
7614
return CombinerObjective::MustReduceDepth;
7611
7615
default :
7612
7616
return TargetInstrInfo::getCombinerObjective (Pattern);
@@ -8895,17 +8899,17 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
8895
8899
MUL = genFNegatedMAD (MF, MRI, TII, Root, InsInstrs);
8896
8900
break ;
8897
8901
}
8898
- case AArch64MachineCombinerPattern::GATHER_i32 : {
8902
+ case AArch64MachineCombinerPattern::GATHER_LANE_i32 : {
8899
8903
generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
8900
8904
Pattern, 4 );
8901
8905
break ;
8902
8906
}
8903
- case AArch64MachineCombinerPattern::GATHER_i16 : {
8907
+ case AArch64MachineCombinerPattern::GATHER_LANE_i16 : {
8904
8908
generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
8905
8909
Pattern, 8 );
8906
8910
break ;
8907
8911
}
8908
- case AArch64MachineCombinerPattern::GATHER_i8 : {
8912
+ case AArch64MachineCombinerPattern::GATHER_LANE_i8 : {
8909
8913
generateGatherPattern (Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
8910
8914
Pattern, 16 );
8911
8915
break ;
0 commit comments