Skip to content

Commit 0835d56

Browse files
Enable regbankselect to choose f32/f64 for ambiguous instructions (e.g. load)
1 parent 21544dd commit 0835d56

File tree

5 files changed

+236
-22
lines changed

5 files changed

+236
-22
lines changed

llvm/lib/Target/WebAssembly/GISel/WebAssemblyLegalizerInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
3838

3939
getActionDefinitionsBuilder(G_PHI)
4040
.legalFor({p0, s32, s64})
41+
.scalarize(0)
4142
.widenScalarToNextPow2(0)
4243
.clampScalar(0, s32, s64);
4344
getActionDefinitionsBuilder(G_BR).alwaysLegal();

llvm/lib/Target/WebAssembly/GISel/WebAssemblyRegisterBankInfo.cpp

Lines changed: 194 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,113 @@ using namespace llvm;
6161
WebAssemblyRegisterBankInfo::WebAssemblyRegisterBankInfo(
6262
const TargetRegisterInfo &TRI) {}
6363

64+
bool WebAssemblyRegisterBankInfo::isPHIWithFPConstraints(
65+
const MachineInstr &MI, const MachineRegisterInfo &MRI,
66+
const WebAssemblyRegisterInfo &TRI, const unsigned Depth) const {
67+
if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
68+
return false;
69+
70+
return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
71+
[&](const MachineInstr &UseMI) {
72+
if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1))
73+
return true;
74+
return isPHIWithFPConstraints(UseMI, MRI, TRI, Depth + 1);
75+
});
76+
}
77+
78+
bool WebAssemblyRegisterBankInfo::hasFPConstraints(
79+
const MachineInstr &MI, const MachineRegisterInfo &MRI,
80+
const WebAssemblyRegisterInfo &TRI, unsigned Depth) const {
81+
unsigned Op = MI.getOpcode();
82+
// if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
83+
// return true;
84+
85+
// Do we have an explicit floating point instruction?
86+
if (isPreISelGenericFloatingPointOpcode(Op))
87+
return true;
88+
89+
// No. Check if we have a copy-like instruction. If we do, then we could
90+
// still be fed by floating point instructions.
91+
if (Op != TargetOpcode::COPY && !MI.isPHI() &&
92+
!isPreISelGenericOptimizationHint(Op))
93+
return false;
94+
95+
// Check if we already know the register bank.
96+
auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
97+
if (RB == &WebAssembly::F32RegBank || RB == &WebAssembly::F64RegBank)
98+
return true;
99+
if (RB == &WebAssembly::I32RegBank || RB == &WebAssembly::I64RegBank)
100+
return false;
101+
102+
// We don't know anything.
103+
//
104+
// If we have a phi, we may be able to infer that it will be assigned a FPR
105+
// based off of its inputs.
106+
if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
107+
return false;
108+
109+
return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
110+
return Op.isReg() &&
111+
onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
112+
});
113+
}
114+
115+
bool WebAssemblyRegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
116+
const MachineRegisterInfo &MRI,
117+
const WebAssemblyRegisterInfo &TRI,
118+
unsigned Depth) const {
119+
switch (MI.getOpcode()) {
120+
case TargetOpcode::G_FPTOSI:
121+
case TargetOpcode::G_FPTOUI:
122+
case TargetOpcode::G_FPTOSI_SAT:
123+
case TargetOpcode::G_FPTOUI_SAT:
124+
case TargetOpcode::G_FCMP:
125+
case TargetOpcode::G_LROUND:
126+
case TargetOpcode::G_LLROUND:
127+
return true;
128+
default:
129+
break;
130+
}
131+
return hasFPConstraints(MI, MRI, TRI, Depth);
132+
}
133+
134+
bool WebAssemblyRegisterBankInfo::onlyDefinesFP(
135+
const MachineInstr &MI, const MachineRegisterInfo &MRI,
136+
const WebAssemblyRegisterInfo &TRI, unsigned Depth) const {
137+
switch (MI.getOpcode()) {
138+
case TargetOpcode::G_SITOFP:
139+
case TargetOpcode::G_UITOFP:
140+
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
141+
case TargetOpcode::G_INSERT_VECTOR_ELT:
142+
case TargetOpcode::G_BUILD_VECTOR:
143+
case TargetOpcode::G_BUILD_VECTOR_TRUNC:
144+
return true;
145+
default:
146+
break;
147+
}
148+
return hasFPConstraints(MI, MRI, TRI, Depth);
149+
}
150+
151+
bool WebAssemblyRegisterBankInfo::prefersFPUse(
152+
const MachineInstr &MI, const MachineRegisterInfo &MRI,
153+
const WebAssemblyRegisterInfo &TRI, unsigned Depth) const {
154+
switch (MI.getOpcode()) {
155+
case TargetOpcode::G_SITOFP:
156+
case TargetOpcode::G_UITOFP:
157+
return MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() ==
158+
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
159+
}
160+
return onlyDefinesFP(MI, MRI, TRI, Depth);
161+
}
162+
64163
const RegisterBankInfo::InstructionMapping &
65164
WebAssemblyRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
66165

67166
unsigned Opc = MI.getOpcode();
68167
const MachineFunction &MF = *MI.getParent()->getParent();
69168
const MachineRegisterInfo &MRI = MF.getRegInfo();
70-
const TargetSubtargetInfo &STI = MF.getSubtarget();
71-
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
169+
const WebAssemblySubtarget &STI = MF.getSubtarget<WebAssemblySubtarget>();
170+
const WebAssemblyRegisterInfo &TRI = *STI.getRegisterInfo();
72171

73172
if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
74173
Opc == TargetOpcode::G_PHI) {
@@ -223,13 +322,50 @@ WebAssemblyRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
223322
}
224323
case G_LOAD:
225324
case G_ZEXTLOAD:
226-
case G_SEXTLOAD:
227-
case G_STORE:
325+
case G_SEXTLOAD: {
228326
if (MRI.getType(MI.getOperand(1).getReg()).getAddressSpace() != 0)
229327
break;
328+
329+
auto *LoadValueMapping = &Op0IntValueMapping;
330+
if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
331+
[&](const MachineInstr &UseMI) {
332+
// If we have at least one direct or indirect use
333+
// in a FP instruction,
334+
// assume this was a floating point load in the IR. If it was
335+
// not, we would have had a bitcast before reaching that
336+
// instruction.
337+
//
338+
// Int->FP conversion operations are also captured in
339+
// prefersFPUse().
340+
341+
if (isPHIWithFPConstraints(UseMI, MRI, TRI))
342+
return true;
343+
344+
return onlyUsesFP(UseMI, MRI, TRI) ||
345+
prefersFPUse(UseMI, MRI, TRI);
346+
}))
347+
LoadValueMapping = &Op0FloatValueMapping;
230348
OperandsMapping =
231-
getOperandsMapping({&Op0IntValueMapping, &Pointer0ValueMapping});
349+
getOperandsMapping({LoadValueMapping, &Pointer0ValueMapping});
350+
break;
351+
}
352+
case G_STORE: {
353+
if (MRI.getType(MI.getOperand(1).getReg()).getAddressSpace() != 0)
354+
break;
355+
356+
Register VReg = MI.getOperand(0).getReg();
357+
if (!VReg)
358+
break;
359+
MachineInstr *DefMI = MRI.getVRegDef(VReg);
360+
if (onlyDefinesFP(*DefMI, MRI, TRI)) {
361+
OperandsMapping =
362+
getOperandsMapping({&Op0FloatValueMapping, &Pointer0ValueMapping});
363+
} else {
364+
OperandsMapping =
365+
getOperandsMapping({&Op0IntValueMapping, &Pointer0ValueMapping});
366+
}
232367
break;
368+
}
233369
case G_MEMCPY:
234370
case G_MEMMOVE: {
235371
if (MRI.getType(MI.getOperand(0).getReg()).getAddressSpace() != 0)
@@ -375,11 +511,60 @@ WebAssemblyRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
375511
// We only care about the mapping of the destination for COPY.
376512
1);
377513
}
378-
case G_SELECT:
379-
OperandsMapping = getOperandsMapping(
380-
{&Op0IntValueMapping, &WebAssembly::ValueMappings[WebAssembly::I32Idx],
381-
&Op0IntValueMapping, &Op0IntValueMapping});
514+
case G_SELECT: {
515+
// Try to minimize the number of copies. If we have more floating point
516+
// constrained values than not, then we'll put everything on FPR. Otherwise,
517+
// everything has to be on GPR.
518+
unsigned NumFP = 0;
519+
520+
// Check if the uses of the result always produce floating point values.
521+
//
522+
// For example:
523+
//
524+
// %z = G_SELECT %cond %x %y
525+
// fpr = G_FOO %z ...
526+
if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
527+
[&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
528+
++NumFP;
529+
530+
// Check if the defs of the source values always produce floating point
531+
// values.
532+
//
533+
// For example:
534+
//
535+
// %x = G_SOMETHING_ALWAYS_FLOAT %a ...
536+
// %z = G_SELECT %cond %x %y
537+
//
538+
// Also check whether or not the sources have already been decided to be
539+
// FPR. Keep track of this.
540+
//
541+
// This doesn't check the condition, since it's just whatever is in NZCV.
542+
// This isn't passed explicitly in a register to fcsel/csel.
543+
for (unsigned Idx = 2; Idx < 4; ++Idx) {
544+
Register VReg = MI.getOperand(Idx).getReg();
545+
MachineInstr *DefMI = MRI.getVRegDef(VReg);
546+
if (getRegBank(VReg, MRI, TRI) == &WebAssembly::F32RegBank ||
547+
getRegBank(VReg, MRI, TRI) == &WebAssembly::F64RegBank ||
548+
onlyDefinesFP(*DefMI, MRI, TRI))
549+
++NumFP;
550+
}
551+
552+
// If we have more FP constraints than not, then move everything over to
553+
// FPR.
554+
if (NumFP >= 2) {
555+
OperandsMapping =
556+
getOperandsMapping({&Op0FloatValueMapping,
557+
&WebAssembly::ValueMappings[WebAssembly::I32Idx],
558+
&Op0FloatValueMapping, &Op0FloatValueMapping});
559+
560+
} else {
561+
OperandsMapping =
562+
getOperandsMapping({&Op0IntValueMapping,
563+
&WebAssembly::ValueMappings[WebAssembly::I32Idx],
564+
&Op0IntValueMapping, &Op0IntValueMapping});
565+
}
382566
break;
567+
}
383568
case G_FPTOSI:
384569
case G_FPTOSI_SAT:
385570
case G_FPTOUI:

llvm/lib/Target/WebAssembly/GISel/WebAssemblyRegisterBankInfo.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERBANKINFO_H
1414
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERBANKINFO_H
1515

16+
#include "WebAssemblyRegisterInfo.h"
1617
#include "llvm/CodeGen/RegisterBankInfo.h"
1718

1819
#define GET_REGBANK_DECLARATIONS
@@ -35,6 +36,33 @@ class WebAssemblyRegisterBankInfo final
3536

3637
const InstructionMapping &
3738
getInstrMapping(const MachineInstr &MI) const override;
39+
40+
/// Maximum recursion depth for hasFPConstraints.
41+
const unsigned MaxFPRSearchDepth = 2;
42+
43+
/// \returns true if \p MI is a PHI that its def is used by
44+
/// any instruction that onlyUsesFP.
45+
bool isPHIWithFPConstraints(const MachineInstr &MI,
46+
const MachineRegisterInfo &MRI,
47+
const WebAssemblyRegisterInfo &TRI,
48+
unsigned Depth = 0) const;
49+
50+
/// \returns true if \p MI only uses and defines FPRs.
51+
bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
52+
const WebAssemblyRegisterInfo &TRI,
53+
unsigned Depth = 0) const;
54+
55+
/// \returns true if \p MI only uses FPRs.
56+
bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
57+
const WebAssemblyRegisterInfo &TRI, unsigned Depth = 0) const;
58+
59+
/// \returns true if \p MI only defines FPRs.
60+
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
61+
const WebAssemblyRegisterInfo &TRI, unsigned Depth = 0) const;
62+
63+
/// \returns true if \p MI can take both fpr and gpr uses, but prefers fp.
64+
bool prefersFPUse(const MachineInstr &MI, const MachineRegisterInfo &MRI,
65+
const WebAssemblyRegisterInfo &TRI, unsigned Depth = 0) const;
3866
};
3967
} // end namespace llvm
4068
#endif

llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ defm STORE_I64 : WebAssemblyStore<I64, "i64.store", 0x37>;
145145
defm STORE_F32 : WebAssemblyStore<F32, "f32.store", 0x38>;
146146
defm STORE_F64 : WebAssemblyStore<F64, "f64.store", 0x39>;
147147

148-
multiclass StorePat<ValueType ty, SDPatternOperator kind, string Name> {
149-
def : Pat<(kind ty:$val, (AddrOps32 offset32_op:$offset, I32:$addr)),
148+
multiclass StorePat<ValueType ty, WebAssemblyRegClass rc, SDPatternOperator kind, string Name> {
149+
def : Pat<(kind (ty rc:$val), (AddrOps32 offset32_op:$offset, I32:$addr)),
150150
(!cast<NI>(Name # "_A32") 0,
151151
offset32_op:$offset,
152152
I32:$addr,
@@ -160,10 +160,10 @@ multiclass StorePat<ValueType ty, SDPatternOperator kind, string Name> {
160160
Requires<[HasAddr64]>;
161161
}
162162

163-
defm : StorePat<i32, store, "STORE_I32">;
164-
defm : StorePat<i64, store, "STORE_I64">;
165-
defm : StorePat<f32, store, "STORE_F32">;
166-
defm : StorePat<f64, store, "STORE_F64">;
163+
defm : StorePat<i32, I32, store, "STORE_I32">;
164+
defm : StorePat<i64, I64, store, "STORE_I64">;
165+
defm : StorePat<f32, F32, store, "STORE_F32">;
166+
defm : StorePat<f64, F64, store, "STORE_F64">;
167167

168168
// Truncating store.
169169
defm STORE8_I32 : WebAssemblyStore<I32, "i32.store8", 0x3a>;
@@ -176,13 +176,13 @@ defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>;
176176
defm STORE_F16_F32 :
177177
WebAssemblyStore<F32, "f32.store_f16", 0xfc31, [HasFP16]>;
178178

179-
defm : StorePat<i32, truncstorei8, "STORE8_I32">;
180-
defm : StorePat<i32, truncstorei16, "STORE16_I32">;
181-
defm : StorePat<i64, truncstorei8, "STORE8_I64">;
182-
defm : StorePat<i64, truncstorei16, "STORE16_I64">;
183-
defm : StorePat<i64, truncstorei32, "STORE32_I64">;
179+
defm : StorePat<i32, I32, truncstorei8, "STORE8_I32">;
180+
defm : StorePat<i32, I32, truncstorei16, "STORE16_I32">;
181+
defm : StorePat<i64, I64, truncstorei8, "STORE8_I64">;
182+
defm : StorePat<i64, I64, truncstorei16, "STORE16_I64">;
183+
defm : StorePat<i64, I64, truncstorei32, "STORE32_I64">;
184184

185-
defm : StorePat<f32, int_wasm_storef16_f32, "STORE_F16_F32">;
185+
defm : StorePat<f32, F32, int_wasm_storef16_f32, "STORE_F16_F32">;
186186

187187
multiclass MemoryOps<WebAssemblyRegClass rc, string B> {
188188
// Current memory size.

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ defm STORE_V128_A64 :
391391

392392
// Def store patterns from WebAssemblyInstrMemory.td for vector types
393393
foreach vec = AllVecs in {
394-
defm : StorePat<vec.vt, store, "STORE_V128">;
394+
defm : StorePat<vec.vt, V128, store, "STORE_V128">;
395395
}
396396

397397
// Store lane

0 commit comments

Comments
 (0)