@@ -4665,3 +4665,118 @@ bool llvm::matchUnalignedExtractLoad(MachineInstr &ExtractMI,
46654665
46664666 return true ;
46674667}
4668+
4669+ // / Match unaligned vector loads and transform them to use a better-aligned
4670+ // / element type based on the actual alignment.
4671+ // / Pattern:
4672+ // / %vec:_(<32 x s16>) = G_LOAD %ptr(p0) :: (align 4)
4673+ // / Converts to:
4674+ // / %vec_new:_(<16 x s32>) = G_LOAD %ptr(p0) :: (align 4)
4675+ // / %vec:_(<32 x s16>) = G_BITCAST %vec_new(<16 x s32>)
4676+ bool llvm::matchUnalignedVectorLoad (MachineInstr &LoadMI,
4677+ MachineRegisterInfo &MRI,
4678+ GISelChangeObserver &Observer,
4679+ BuildFnTy &MatchInfo) {
4680+ assert (LoadMI.getOpcode () == TargetOpcode::G_LOAD && " Expected G_LOAD" );
4681+
4682+ // Get load information
4683+ const Register DstReg = LoadMI.getOperand (0 ).getReg ();
4684+ const LLT DstTy = MRI.getType (DstReg);
4685+
4686+ // Only process vector loads
4687+ if (!DstTy.isVector ())
4688+ return false ;
4689+
4690+ // Check memory operand for alignment
4691+ if (LoadMI.memoperands_empty ())
4692+ return false ;
4693+
4694+ const MachineMemOperand *MMO = LoadMI.memoperands ().front ();
4695+ const unsigned Alignment = MMO->getAlign ().value ();
4696+
4697+ // Skip if the vector is already well-aligned (alignment >= vector size)
4698+ const unsigned VecSizeInBytes = DstTy.getSizeInBytes ();
4699+ if (Alignment >= VecSizeInBytes)
4700+ return false ;
4701+
4702+ // Get element type information
4703+ const LLT ElemTy = DstTy.getElementType ();
4704+ const unsigned ElemSizeInBits = ElemTy.getSizeInBits ();
4705+
4706+ // Skip if the load is only used for extracts - let matchUnalignedExtractLoad
4707+ // handle it. This prevents the two combiners from competing for the same
4708+ // opportunities
4709+ const MachineFunction &MF = *LoadMI.getMF ();
4710+ const AIEBaseInstrInfo &TII =
4711+ *static_cast <const AIEBaseInstrInfo *>(MF.getSubtarget ().getInstrInfo ());
4712+ const unsigned ZExtExtractOpcode =
4713+ TII.getGenericExtractVectorEltOpcode (false );
4714+ const unsigned SExtExtractOpcode = TII.getGenericExtractVectorEltOpcode (true );
4715+ const unsigned PadVectorOpcode = TII.getGenericPadVectorOpcode ();
4716+
4717+ if (areLoadUsesValidForExtractCombine (
4718+ DstReg, ZExtExtractOpcode, SExtExtractOpcode, PadVectorOpcode, MRI))
4719+ return false ;
4720+
4721+ // Skip if the load has a single user that is a G_STORE with the same
4722+ // alignment. This case can be perfectly scalarized during legalization
4723+ if (MRI.hasOneNonDBGUse (DstReg)) {
4724+ const MachineInstr *UserMI = &*MRI.use_instr_nodbg_begin (DstReg);
4725+ if (UserMI->getOpcode () == TargetOpcode::G_STORE) {
4726+ const GStore *StoreMI = cast<GStore>(UserMI);
4727+ if (!StoreMI->memoperands_empty ()) {
4728+ const MachineMemOperand *StoreMMO = StoreMI->memoperands ().front ();
4729+ // If store has the same alignment as the load, skip
4730+ if (StoreMMO->getAlign ().value () == Alignment)
4731+ return false ;
4732+ }
4733+ }
4734+ }
4735+
4736+ // We already have the best element size option.
4737+ if (Alignment == ElemSizeInBits / 8 )
4738+ return false ;
4739+
4740+ // Only handle s8 and s16 element types that can be promoted to s32
4741+ if (ElemSizeInBits != 8 && ElemSizeInBits != 16 )
4742+ return false ;
4743+
4744+ // Determine the optimal element type based on alignment
4745+ unsigned NewElemSizeInBits = 0 ;
4746+ if (Alignment >= 4 ) {
4747+ NewElemSizeInBits = 32 ;
4748+ } else if (Alignment >= 2 ) {
4749+ NewElemSizeInBits = 16 ;
4750+ } else {
4751+ // Alignment doesn't allow for a better element type
4752+ return false ;
4753+ }
4754+
4755+ // Check if the vector size is compatible with the new element size
4756+ const unsigned VecSizeInBits = DstTy.getSizeInBits ();
4757+ if (VecSizeInBits % NewElemSizeInBits != 0 )
4758+ return false ;
4759+
4760+ MatchInfo = [=, PtrReg = LoadMI.getOperand (1 ).getReg (), &MRI,
4761+ &Observer](MachineIRBuilder &B) {
4762+ MachineFunction &MF = B.getMF ();
4763+
4764+ // Calculate new number of elements
4765+ const unsigned NewNumElems = VecSizeInBits / NewElemSizeInBits;
4766+
4767+ // Create the new vector type with better-aligned elements
4768+ const LLT NewVecTy = LLT::fixed_vector (NewNumElems, NewElemSizeInBits);
4769+ const Register NewLoadReg = MRI.createGenericVirtualRegister (NewVecTy);
4770+
4771+ // Create a new MMO with the same properties but updated type
4772+ MachineMemOperand *NewMMO = MF.getMachineMemOperand (
4773+ MMO->getPointerInfo (), MMO->getFlags (), NewVecTy, MMO->getAlign ());
4774+
4775+ Observer.createdInstr (*B.buildLoad (NewLoadReg, PtrReg, *NewMMO));
4776+
4777+ // Bitcast back to the original type
4778+ Observer.createdInstr (*B.buildBitcast (DstReg, NewLoadReg));
4779+ };
4780+
4781+ return true ;
4782+ }
0 commit comments