Skip to content

Commit bd80000

Browse files
committed
[AIEX] Add a combiner to change vector load element type based on alignment
In this case, we can improve the legalized code.
1 parent c6d6d3e commit bd80000

File tree

5 files changed

+760
-5
lines changed

5 files changed

+760
-5
lines changed

llvm/lib/Target/AIE/AIECombine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,12 @@ def combine_unaligned_extract_load : GICombineRule<
258258
[{ return matchUnalignedExtractLoad(*${root}, MRI, Observer, ${matchinfo}); }]),
259259
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
260260

261+
def combine_unaligned_vector_load : GICombineRule<
262+
(defs root:$root, build_fn_matchinfo:$matchinfo),
263+
(match (wip_match_opcode G_LOAD): $root,
264+
[{ return matchUnalignedVectorLoad(*${root}, MRI, Observer, ${matchinfo}); }]),
265+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
266+
261267
// AIE-specifc combines (currently shared by AIE2 and AIE2P).
262268
def aie_additional_combines : GICombineGroup<[
263269
combine_unpad_vector,
@@ -281,7 +287,8 @@ def aie_additional_combines : GICombineGroup<[
281287
combine_peel_memset,
282288
combine_pack_stores_into_memset,
283289
combine_trunc_load,
284-
combine_unaligned_extract_load
290+
combine_unaligned_extract_load,
291+
combine_unaligned_vector_load
285292
]>;
286293

287294
// AIE2P-specific combines.

llvm/lib/Target/AIE/AIECombinerHelper.cpp

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4665,3 +4665,118 @@ bool llvm::matchUnalignedExtractLoad(MachineInstr &ExtractMI,
46654665

46664666
return true;
46674667
}
4668+
4669+
/// Match unaligned vector loads and transform them to use a better-aligned
4670+
/// element type based on the actual alignment.
4671+
/// Pattern:
4672+
/// %vec:_(<32 x s16>) = G_LOAD %ptr(p0) :: (align 4)
4673+
/// Converts to:
4674+
/// %vec_new:_(<16 x s32>) = G_LOAD %ptr(p0) :: (align 4)
4675+
/// %vec:_(<32 x s16>) = G_BITCAST %vec_new(<16 x s32>)
4676+
bool llvm::matchUnalignedVectorLoad(MachineInstr &LoadMI,
4677+
MachineRegisterInfo &MRI,
4678+
GISelChangeObserver &Observer,
4679+
BuildFnTy &MatchInfo) {
4680+
assert(LoadMI.getOpcode() == TargetOpcode::G_LOAD && "Expected G_LOAD");
4681+
4682+
// Get load information
4683+
const Register DstReg = LoadMI.getOperand(0).getReg();
4684+
const LLT DstTy = MRI.getType(DstReg);
4685+
4686+
// Only process vector loads
4687+
if (!DstTy.isVector())
4688+
return false;
4689+
4690+
// Check memory operand for alignment
4691+
if (LoadMI.memoperands_empty())
4692+
return false;
4693+
4694+
const MachineMemOperand *MMO = LoadMI.memoperands().front();
4695+
const unsigned Alignment = MMO->getAlign().value();
4696+
4697+
// Skip if the vector is already well-aligned (alignment >= vector size)
4698+
const unsigned VecSizeInBytes = DstTy.getSizeInBytes();
4699+
if (Alignment >= VecSizeInBytes)
4700+
return false;
4701+
4702+
// Get element type information
4703+
const LLT ElemTy = DstTy.getElementType();
4704+
const unsigned ElemSizeInBits = ElemTy.getSizeInBits();
4705+
4706+
// Skip if the load is only used for extracts - let matchUnalignedExtractLoad
4707+
// handle it. This prevents the two combiners from competing for the same
4708+
// opportunities
4709+
const MachineFunction &MF = *LoadMI.getMF();
4710+
const AIEBaseInstrInfo &TII =
4711+
*static_cast<const AIEBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
4712+
const unsigned ZExtExtractOpcode =
4713+
TII.getGenericExtractVectorEltOpcode(false);
4714+
const unsigned SExtExtractOpcode = TII.getGenericExtractVectorEltOpcode(true);
4715+
const unsigned PadVectorOpcode = TII.getGenericPadVectorOpcode();
4716+
4717+
if (areLoadUsesValidForExtractCombine(
4718+
DstReg, ZExtExtractOpcode, SExtExtractOpcode, PadVectorOpcode, MRI))
4719+
return false;
4720+
4721+
// Skip if the load has a single user that is a G_STORE with the same
4722+
// alignment. This case can be perfectly scalarized during legalization
4723+
if (MRI.hasOneNonDBGUse(DstReg)) {
4724+
const MachineInstr *UserMI = &*MRI.use_instr_nodbg_begin(DstReg);
4725+
if (UserMI->getOpcode() == TargetOpcode::G_STORE) {
4726+
const GStore *StoreMI = cast<GStore>(UserMI);
4727+
if (!StoreMI->memoperands_empty()) {
4728+
const MachineMemOperand *StoreMMO = StoreMI->memoperands().front();
4729+
// If store has the same alignment as the load, skip
4730+
if (StoreMMO->getAlign().value() == Alignment)
4731+
return false;
4732+
}
4733+
}
4734+
}
4735+
4736+
// We already have the best element size option.
4737+
if (Alignment == ElemSizeInBits / 8)
4738+
return false;
4739+
4740+
// Only handle s8 and s16 element types that can be promoted to s32
4741+
if (ElemSizeInBits != 8 && ElemSizeInBits != 16)
4742+
return false;
4743+
4744+
// Determine the optimal element type based on alignment
4745+
unsigned NewElemSizeInBits = 0;
4746+
if (Alignment >= 4) {
4747+
NewElemSizeInBits = 32;
4748+
} else if (Alignment >= 2) {
4749+
NewElemSizeInBits = 16;
4750+
} else {
4751+
// Alignment doesn't allow for a better element type
4752+
return false;
4753+
}
4754+
4755+
// Check if the vector size is compatible with the new element size
4756+
const unsigned VecSizeInBits = DstTy.getSizeInBits();
4757+
if (VecSizeInBits % NewElemSizeInBits != 0)
4758+
return false;
4759+
4760+
MatchInfo = [=, PtrReg = LoadMI.getOperand(1).getReg(), &MRI,
4761+
&Observer](MachineIRBuilder &B) {
4762+
MachineFunction &MF = B.getMF();
4763+
4764+
// Calculate new number of elements
4765+
const unsigned NewNumElems = VecSizeInBits / NewElemSizeInBits;
4766+
4767+
// Create the new vector type with better-aligned elements
4768+
const LLT NewVecTy = LLT::fixed_vector(NewNumElems, NewElemSizeInBits);
4769+
const Register NewLoadReg = MRI.createGenericVirtualRegister(NewVecTy);
4770+
4771+
// Create a new MMO with the same properties but updated type
4772+
MachineMemOperand *NewMMO = MF.getMachineMemOperand(
4773+
MMO->getPointerInfo(), MMO->getFlags(), NewVecTy, MMO->getAlign());
4774+
4775+
Observer.createdInstr(*B.buildLoad(NewLoadReg, PtrReg, *NewMMO));
4776+
4777+
// Bitcast back to the original type
4778+
Observer.createdInstr(*B.buildBitcast(DstReg, NewLoadReg));
4779+
};
4780+
4781+
return true;
4782+
}

llvm/lib/Target/AIE/AIECombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,10 @@ bool matchUnalignedExtractLoad(MachineInstr &ExtractMI,
313313
MachineRegisterInfo &MRI,
314314
GISelChangeObserver &Observer,
315315
BuildFnTy &MatchInfo);
316+
317+
bool matchUnalignedVectorLoad(MachineInstr &LoadMI, MachineRegisterInfo &MRI,
318+
GISelChangeObserver &Observer,
319+
BuildFnTy &MatchInfo);
316320
} // namespace llvm
317321

318322
#endif

0 commit comments

Comments
 (0)