Skip to content

Commit 60e0120

Browse files
committed
[ARM] Improve codegen of volatile load/store of i64
Summary: Instead of generating two i32 instructions for each load or store of a volatile i64 value (two LDRs or STRs), now emit LDRD/STRD. These improvements cover architectures implementing ARMv5TE or Thumb-2. Reviewers: dmgreen, efriedma, john.brawn, nickdesaulniers Reviewed By: efriedma, nickdesaulniers Subscribers: nickdesaulniers, vvereschaka, kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70072
1 parent 6ff1ea3 commit 60e0120

File tree

7 files changed

+342
-6
lines changed

7 files changed

+342
-6
lines changed

llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1952,6 +1952,24 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
19521952
MI.eraseFromParent();
19531953
return true;
19541954
}
1955+
case ARM::LOADDUAL:
1956+
case ARM::STOREDUAL: {
1957+
Register PairReg = MI.getOperand(0).getReg();
1958+
1959+
MachineInstrBuilder MIB =
1960+
BuildMI(MBB, MBBI, MI.getDebugLoc(),
1961+
TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
1962+
.addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
1963+
Opcode == ARM::LOADDUAL ? RegState::Define : 0)
1964+
.addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
1965+
Opcode == ARM::LOADDUAL ? RegState::Define : 0);
1966+
for (unsigned i = 1; i < MI.getNumOperands(); i++)
1967+
MIB.add(MI.getOperand(i));
1968+
MIB.add(predOps(ARMCC::AL));
1969+
MIB.cloneMemRefs(MI);
1970+
MI.eraseFromParent();
1971+
return true;
1972+
}
19551973
}
19561974
}
19571975

llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
145145

146146
// Thumb 2 Addressing Modes:
147147
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
148+
template <unsigned Shift>
149+
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
148150
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
149151
SDValue &OffImm);
150152
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
@@ -1294,6 +1296,33 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
12941296
return true;
12951297
}
12961298

1299+
template <unsigned Shift>
1300+
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1301+
SDValue &OffImm) {
1302+
if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1303+
int RHSC;
1304+
if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1305+
Base = N.getOperand(0);
1306+
if (Base.getOpcode() == ISD::FrameIndex) {
1307+
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1308+
Base = CurDAG->getTargetFrameIndex(
1309+
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1310+
}
1311+
1312+
if (N.getOpcode() == ISD::SUB)
1313+
RHSC = -RHSC;
1314+
OffImm =
1315+
CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1316+
return true;
1317+
}
1318+
}
1319+
1320+
// Base only.
1321+
Base = N;
1322+
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1323+
return true;
1324+
}
1325+
12971326
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
12981327
SDValue &Base, SDValue &OffImm) {
12991328
// Match simple R - imm8 operands.
@@ -3486,6 +3515,26 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
34863515
CurDAG->RemoveDeadNode(N);
34873516
return;
34883517
}
3518+
case ARMISD::LDRD: {
3519+
if (Subtarget->isThumb2())
3520+
break; // TableGen handles isel in this case.
3521+
SDValue Base, RegOffset, ImmOffset;
3522+
const SDValue &Chain = N->getOperand(0);
3523+
const SDValue &Addr = N->getOperand(1);
3524+
SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
3525+
SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
3526+
SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
3527+
{MVT::Untyped, MVT::Other}, Ops);
3528+
SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3529+
SDValue(New, 0));
3530+
SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3531+
SDValue(New, 0));
3532+
ReplaceUses(SDValue(N, 0), Lo);
3533+
ReplaceUses(SDValue(N, 1), Hi);
3534+
ReplaceUses(SDValue(N, 2), SDValue(New, 1));
3535+
CurDAG->RemoveDeadNode(N);
3536+
return;
3537+
}
34893538
case ARMISD::LOOP_DEC: {
34903539
SDValue Ops[] = { N->getOperand(1),
34913540
N->getOperand(2),

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,6 +1077,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
10771077
setOperationAction(ISD::SRA, MVT::i64, Custom);
10781078
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
10791079
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1080+
setOperationAction(ISD::LOAD, MVT::i64, Custom);
1081+
setOperationAction(ISD::STORE, MVT::i64, Custom);
10801082

10811083
// MVE lowers 64 bit shifts to lsll and lsrl
10821084
// assuming that ISD::SRL and SRA of i64 are already marked custom
@@ -1600,6 +1602,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
16001602

16011603
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
16021604

1605+
case ARMISD::LDRD: return "ARMISD::LDRD";
1606+
case ARMISD::STRD: return "ARMISD::STRD";
1607+
16031608
case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
16041609
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
16051610

@@ -9087,6 +9092,24 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
90879092
return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
90889093
}
90899094

9095+
void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
9096+
SelectionDAG &DAG) const {
9097+
LoadSDNode *LD = cast<LoadSDNode>(N);
9098+
EVT MemVT = LD->getMemoryVT();
9099+
assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
9100+
9101+
if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
9102+
!Subtarget->isThumb1Only() && LD->isVolatile()) {
9103+
SDLoc dl(N);
9104+
SDValue Result = DAG.getMemIntrinsicNode(
9105+
ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
9106+
{LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
9107+
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
9108+
Result.getValue(0), Result.getValue(1));
9109+
Results.append({Pair, Result.getValue(2)});
9110+
}
9111+
}
9112+
90909113
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
90919114
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
90929115
EVT MemVT = ST->getMemoryVT();
@@ -9116,6 +9139,34 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
91169139
ST->getMemOperand());
91179140
}
91189141

9142+
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
9143+
const ARMSubtarget *Subtarget) {
9144+
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
9145+
EVT MemVT = ST->getMemoryVT();
9146+
assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
9147+
9148+
if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
9149+
!Subtarget->isThumb1Only() && ST->isVolatile()) {
9150+
SDNode *N = Op.getNode();
9151+
SDLoc dl(N);
9152+
9153+
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
9154+
DAG.getTargetConstant(0, dl, MVT::i32));
9155+
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
9156+
DAG.getTargetConstant(1, dl, MVT::i32));
9157+
9158+
return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
9159+
{ST->getChain(), Lo, Hi, ST->getBasePtr()},
9160+
MemVT, ST->getMemOperand());
9161+
} else if (Subtarget->hasMVEIntegerOps() &&
9162+
((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9163+
MemVT == MVT::v16i1))) {
9164+
return LowerPredicateStore(Op, DAG);
9165+
}
9166+
9167+
return SDValue();
9168+
}
9169+
91199170
static bool isZeroVector(SDValue N) {
91209171
return (ISD::isBuildVectorAllZeros(N.getNode()) ||
91219172
(N->getOpcode() == ARMISD::VMOVIMM &&
@@ -9303,7 +9354,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
93039354
case ISD::LOAD:
93049355
return LowerPredicateLoad(Op, DAG);
93059356
case ISD::STORE:
9306-
return LowerPredicateStore(Op, DAG);
9357+
return LowerSTORE(Op, DAG, Subtarget);
93079358
case ISD::MLOAD:
93089359
return LowerMLOAD(Op, DAG);
93099360
case ISD::ATOMIC_LOAD:
@@ -9405,7 +9456,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
94059456
case ISD::ABS:
94069457
lowerABS(N, Results, DAG);
94079458
return ;
9408-
9459+
case ISD::LOAD:
9460+
LowerLOAD(N, Results, DAG);
9461+
break;
94099462
}
94109463
if (Res.getNode())
94119464
Results.push_back(Res);

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,11 @@ class VectorType;
278278
VST4_UPD,
279279
VST2LN_UPD,
280280
VST3LN_UPD,
281-
VST4LN_UPD
281+
VST4LN_UPD,
282+
283+
// Load/Store of dual registers
284+
LDRD,
285+
STRD
282286
};
283287

284288
} // end namespace ARMISD
@@ -731,6 +735,8 @@ class VectorType;
731735
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
732736
void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
733737
SelectionDAG &DAG) const;
738+
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
739+
SelectionDAG &DAG) const;
734740

735741
Register getRegisterByName(const char* RegName, EVT VT,
736742
const MachineFunction &MF) const override;

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,12 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
243243
def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
244244
def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
245245

246+
def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
247+
def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
248+
249+
def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
250+
def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
251+
246252
// Vector operations shared between NEON and MVE
247253

248254
def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
@@ -2695,6 +2701,14 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
26952701
Requires<[IsARM, HasV5TE]>;
26962702
}
26972703

2704+
let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
2705+
def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr),
2706+
64, IIC_iLoad_d_r, []>,
2707+
Requires<[IsARM, HasV5TE]> {
2708+
let AM = AddrMode3;
2709+
}
2710+
}
2711+
26982712
def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
26992713
NoItinerary, "lda", "\t$Rt, $addr", []>;
27002714
def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
@@ -2970,6 +2984,19 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
29702984
}
29712985
}
29722986

2987+
let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
2988+
def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr),
2989+
64, IIC_iStore_d_r, []>,
2990+
Requires<[IsARM, HasV5TE]> {
2991+
let AM = AddrMode3;
2992+
}
2993+
}
2994+
2995+
let Predicates = [IsARM, HasV5TE] in {
2996+
def : Pat<(ARMstrd GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
2997+
(STOREDUAL (REG_SEQUENCE GPRPair, GPR:$Rt, gsub_0, GPR:$Rt2, gsub_1), addrmode3:$addr)>;
2998+
}
2999+
29733000
// Indexed stores
29743001
multiclass AI2_stridx<bit isByte, string opc,
29753002
InstrItinClass iii, InstrItinClass iir> {

llvm/lib/Target/ARM/ARMInstrThumb2.td

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,8 @@ def t2am_imm8_offset : MemOperand,
270270

271271
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
272272
def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
273-
class T2AddrMode_Imm8s4 : MemOperand {
273+
class T2AddrMode_Imm8s4 : MemOperand,
274+
ComplexPattern<i32, 2, "SelectT2AddrModeImm8<2>", []> {
274275
let EncoderMethod = "getT2AddrModeImm8s4OpValue";
275276
let DecoderMethod = "DecodeT2AddrModeImm8s4";
276277
let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@@ -1412,7 +1413,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
14121413
// Load doubleword
14131414
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
14141415
(ins t2addrmode_imm8s4:$addr),
1415-
IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>,
1416+
IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "",
1417+
[(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>,
14161418
Sched<[WriteLd]>;
14171419
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
14181420

@@ -1593,7 +1595,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
15931595
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
15941596
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
15951597
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
1596-
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>,
1598+
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "",
1599+
[(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>,
15971600
Sched<[WriteST]>;
15981601

15991602
// Indexed stores

0 commit comments

Comments
 (0)