Skip to content

Commit 21544dd

Browse files
Add scalarization for vector ops (fallback when SIMD isn't available)
1 parent ce5e73b commit 21544dd

File tree

2 files changed

+99
-9
lines changed

2 files changed

+99
-9
lines changed

llvm/lib/Target/WebAssembly/GISel/WebAssemblyCallLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -659,8 +659,6 @@ bool WebAssemblyCallLowering::lowerFormalArguments(
659659
for (unsigned Part = 0; Part < NumParts; ++Part) {
660660
Arg.Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
661661
}
662-
buildCopyFromRegs(MIRBuilder, Arg.OrigRegs, Arg.Regs, OrigLLT, NewLLT,
663-
Arg.Flags[0], Arg.Ty->isFloatingPointTy());
664662
}
665663

666664
for (unsigned Part = 0; Part < NumParts; ++Part) {
@@ -673,6 +671,11 @@ bool WebAssemblyCallLowering::lowerFormalArguments(
673671
MFI->addParam(NewVT);
674672
++FinalArgIdx;
675673
}
674+
675+
if (NumParts != 1 || OrigVT != NewVT) {
676+
buildCopyFromRegs(MIRBuilder, Arg.OrigRegs, Arg.Regs, OrigLLT, NewLLT,
677+
Arg.Flags[0], Arg.Ty->isFloatingPointTy());
678+
}
676679
}
677680

678681
/**/

llvm/lib/Target/WebAssembly/GISel/WebAssemblyLegalizerInfo.cpp

Lines changed: 94 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
4747

4848
getActionDefinitionsBuilder(G_SELECT)
4949
.legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
50+
.scalarize(0)
5051
.widenScalarToNextPow2(0)
5152
.clampScalar(0, s32, s64)
5253
.clampScalar(1, s32, s32);
@@ -55,12 +56,14 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
5556

5657
getActionDefinitionsBuilder(G_ICMP)
5758
.legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
59+
.scalarize(0)
5860
.widenScalarToNextPow2(1)
5961
.clampScalar(1, s32, s64)
6062
.clampScalar(0, s32, s32);
6163

6264
getActionDefinitionsBuilder(G_FCMP)
6365
.customFor({{s32, s32}, {s32, s64}})
66+
.scalarize(0)
6467
.clampScalar(0, s32, s32)
6568
.libcall();
6669

@@ -77,32 +80,36 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
7780

7881
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
7982
.legalFor({s32, s64, p0})
83+
.scalarize(0)
8084
.widenScalarToNextPow2(0)
8185
.clampScalar(0, s32, s64);
8286

8387
getActionDefinitionsBuilder(
8488
{G_ADD, G_SUB, G_MUL, G_UDIV, G_SDIV, G_UREM, G_SREM})
8589
.legalFor({s32, s64})
90+
.scalarize(0)
8691
.widenScalarToNextPow2(0)
8792
.clampScalar(0, s32, s64);
8893

8994
getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL, G_CTLZ, G_CTLZ_ZERO_UNDEF,
90-
G_CTTZ, G_CTTZ_ZERO_UNDEF, G_CTPOP, G_ROTL,
91-
G_ROTR})
95+
G_CTTZ, G_CTTZ_ZERO_UNDEF, G_CTPOP})
9296
.legalFor({{s32, s32}, {s64, s64}})
97+
.scalarize(0)
9398
.widenScalarToNextPow2(0)
9499
.clampScalar(0, s32, s64)
95100
.minScalarSameAs(1, 0)
96101
.maxScalarSameAs(1, 0);
97102

98-
getActionDefinitionsBuilder({G_FSHL, G_FSHR})
103+
getActionDefinitionsBuilder({G_FSHL, G_FSHR, G_ROTL, G_ROTR})
99104
.legalFor({{s32, s32}, {s64, s64}})
105+
.scalarize(0)
100106
.lower();
101107

102108
getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
103109

104110
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
105111
.legalFor({s32, s64})
112+
.scalarize(0)
106113
.widenScalarToNextPow2(0)
107114
.clampScalar(0, s32, s64);
108115

@@ -113,14 +120,35 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
113120
G_FNEARBYINT, G_FRINT, G_INTRINSIC_ROUNDEVEN,
114121
G_FMINIMUM, G_FMAXIMUM, G_STRICT_FMUL})
115122
.legalFor({s32, s64})
123+
.scalarize(0)
116124
.minScalar(0, s32);
117125

118126
getActionDefinitionsBuilder({G_FMINNUM, G_FMAXNUM})
119127
.customFor({s32, s64})
128+
.scalarize(0)
120129
.minScalar(0, s32);
121130

131+
getActionDefinitionsBuilder({G_VECREDUCE_OR, G_VECREDUCE_AND}).scalarize(1);
132+
133+
getActionDefinitionsBuilder(G_BITCAST)
134+
.customIf([=](const LegalityQuery &Query) {
135+
// Handle casts from i1 vectors to scalars.
136+
LLT DstTy = Query.Types[0];
137+
LLT SrcTy = Query.Types[1];
138+
return DstTy.isScalar() && SrcTy.isVector() &&
139+
SrcTy.getScalarSizeInBits() == 1;
140+
})
141+
.lowerIf([=](const LegalityQuery &Query) {
142+
return Query.Types[0].isVector() != Query.Types[1].isVector();
143+
})
144+
.scalarize(0);
145+
146+
getActionDefinitionsBuilder(G_MERGE_VALUES)
147+
.lowerFor({{s64, s32}, {s64, s16}, {s64, s8}, {s32, s16}, {s32, s8}});
148+
122149
getActionDefinitionsBuilder(G_FCANONICALIZE)
123150
.customFor({s32, s64})
151+
.scalarize(0)
124152
.minScalar(0, s32);
125153

126154
getActionDefinitionsBuilder({G_FMA, G_FREM})
@@ -135,6 +163,7 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
135163

136164
getActionDefinitionsBuilder(G_FCOPYSIGN)
137165
.legalFor({s32, s64})
166+
.scalarize(0)
138167
.minScalar(0, s32)
139168
.minScalarSameAs(1, 0)
140169
.maxScalarSameAs(1, 0);
@@ -147,6 +176,7 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
147176

148177
getActionDefinitionsBuilder({G_UITOFP, G_SITOFP})
149178
.legalForCartesianProduct({s32, s64}, {s32, s64})
179+
.scalarize(0)
150180
.minScalar(1, s32)
151181
.widenScalarToNextPow2(1)
152182
.clampScalar(1, s32, s64);
@@ -169,7 +199,8 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
169199
{s64, p0, s16, 1},
170200
{s64, p0, s32, 1}})
171201
.clampScalar(0, s32, s64)
172-
.lowerIfMemSizeNotByteSizePow2();
202+
.lowerIfMemSizeNotByteSizePow2()
203+
.scalarize(0);
173204

174205
getActionDefinitionsBuilder(G_STORE)
175206
.legalForTypesWithMemDesc(
@@ -181,7 +212,25 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
181212
{s64, p0, s16, 1},
182213
{s64, p0, s32, 1}})
183214
.clampScalar(0, s32, s64)
184-
.lowerIfMemSizeNotByteSizePow2();
215+
.lowerIf([=](const LegalityQuery &Query) {
216+
return Query.Types[0].isScalar() &&
217+
Query.Types[0] != Query.MMODescrs[0].MemoryTy;
218+
})
219+
.bitcastIf(
220+
[=](const LegalityQuery &Query) {
221+
// Handle stores of i1 vectors.
222+
LLT Ty = Query.Types[0];
223+
return Ty.isVector() && Ty.getScalarSizeInBits() == 1;
224+
},
225+
[=](const LegalityQuery &Query) {
226+
const LLT VecTy = Query.Types[0];
227+
return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
228+
})
229+
.scalarize(0);
230+
231+
getActionDefinitionsBuilder(
232+
{G_SHUFFLE_VECTOR, G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT})
233+
.lower();
185234

186235
getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD})
187236
.legalForTypesWithMemDesc({{s32, p0, s8, 1},
@@ -219,11 +268,13 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
219268

220269
getActionDefinitionsBuilder(G_ANYEXT)
221270
.legalFor({{s64, s32}})
271+
.scalarize(0)
222272
.clampScalar(0, s32, s64)
223273
.clampScalar(1, s32, s64);
224274

225275
getActionDefinitionsBuilder({G_SEXT, G_ZEXT})
226276
.legalFor({{s64, s32}})
277+
.scalarize(0)
227278
.clampScalar(0, s32, s64)
228279
.clampScalar(1, s32, s64)
229280
.lower();
@@ -238,13 +289,14 @@ WebAssemblyLegalizerInfo::WebAssemblyLegalizerInfo(
238289

239290
getActionDefinitionsBuilder(G_TRUNC)
240291
.legalFor({{s32, s64}})
292+
.scalarize(0)
241293
.clampScalar(0, s32, s64)
242294
.clampScalar(1, s32, s64)
243295
.lower();
244296

245-
getActionDefinitionsBuilder(G_FPEXT).legalFor({{s64, s32}});
297+
getActionDefinitionsBuilder(G_FPEXT).legalFor({{s64, s32}}).scalarize(0);
246298

247-
getActionDefinitionsBuilder(G_FPTRUNC).legalFor({{s32, s64}});
299+
getActionDefinitionsBuilder(G_FPTRUNC).legalFor({{s32, s64}}).scalarize(0);
248300

249301
getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
250302
getActionDefinitionsBuilder(G_VAARG)
@@ -339,6 +391,41 @@ bool WebAssemblyLegalizerInfo::legalizeCustom(
339391
MI.eraseFromParent();
340392
return true;
341393
}
394+
case TargetOpcode::G_BITCAST: {
395+
if (MIRBuilder.getMF().getSubtarget<WebAssemblySubtarget>().hasSIMD128()) {
396+
return false;
397+
}
398+
399+
auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
400+
401+
if (!DstTy.isScalar() || !SrcTy.isVector() ||
402+
SrcTy.getElementType() != LLT::scalar(1))
403+
return false;
404+
405+
Register ResultReg = MRI.createGenericVirtualRegister(DstTy);
406+
MIRBuilder.buildConstant(ResultReg, 0);
407+
408+
for (unsigned i = 0; i < SrcTy.getNumElements(); i++) {
409+
auto Elm = MRI.createGenericVirtualRegister(LLT::scalar(1));
410+
auto ExtElm = MRI.createGenericVirtualRegister(DstTy);
411+
auto ShiftedElm = MRI.createGenericVirtualRegister(DstTy);
412+
auto Idx = MRI.createGenericVirtualRegister(LLT::scalar(8));
413+
auto NewResultReg = MRI.createGenericVirtualRegister(DstTy);
414+
415+
MIRBuilder.buildConstant(Idx, i);
416+
MIRBuilder.buildExtractVectorElement(Elm, SrcReg, Idx);
417+
MIRBuilder.buildZExt(ExtElm, Elm, false);
418+
MIRBuilder.buildShl(ShiftedElm, ExtElm, Idx);
419+
MIRBuilder.buildOr(NewResultReg, ResultReg, ShiftedElm);
420+
421+
ResultReg = NewResultReg;
422+
}
423+
424+
MIRBuilder.buildCopy(DstReg, ResultReg);
425+
426+
MI.eraseFromParent();
427+
return true;
428+
}
342429
case TargetOpcode::G_FCMP: {
343430
Register LHS = MI.getOperand(2).getReg();
344431
Register RHS = MI.getOperand(3).getReg();

0 commit comments

Comments
 (0)