Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit ef5bf36

Browse files
committed
[WebAssembly] Parsing missing directives to produce valid .o
Summary: The assembler was able to assemble and then dump back to .s, but was failing to parse certain directives necessary for valid .o output: - .type directives are now recognized to distinguish function symbols and others. - .size is now parsed to provide function size. - .globaltype (introduced in https://reviews.llvm.org/D54012) is now recognized to ensure symbols like __stack_pointer have a proper type set for both .s and .o output. Also added tests for the above. Reviewers: sbc100, dschuff Subscribers: jgravelle-google, aheejin, dexonsmith, kristina, llvm-commits, sunfish Differential Revision: https://reviews.llvm.org/D53842 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346047 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 83e97cb commit ef5bf36

File tree

3 files changed

+108
-34
lines changed

3 files changed

+108
-34
lines changed

lib/MC/MCWasmStreamer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ void MCWasmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
6161
void MCWasmStreamer::ChangeSection(MCSection *Section,
6262
const MCExpr *Subsection) {
6363
MCAssembler &Asm = getAssembler();
64-
auto *SectionWasm = static_cast<const MCSectionWasm *>(Section);
64+
auto *SectionWasm = cast<MCSectionWasm>(Section);
6565
const MCSymbol *Grp = SectionWasm->getGroup();
6666
if (Grp)
6767
Asm.registerSymbol(*Grp);

lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp

Lines changed: 100 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/MC/MCStreamer.h"
2626
#include "llvm/MC/MCSubtargetInfo.h"
2727
#include "llvm/MC/MCSymbol.h"
28+
#include "llvm/MC/MCSymbolWasm.h"
2829
#include "llvm/Support/Endian.h"
2930
#include "llvm/Support/TargetRegistry.h"
3031

@@ -131,14 +132,14 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
131132
class WebAssemblyAsmParser final : public MCTargetAsmParser {
132133
MCAsmParser &Parser;
133134
MCAsmLexer &Lexer;
134-
MCSymbol *LastLabel;
135+
MCSymbolWasm *LastSymbol;
135136

136137
public:
137-
WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
138-
const MCInstrInfo &mii, const MCTargetOptions &Options)
139-
: MCTargetAsmParser(Options, sti, mii), Parser(Parser),
140-
Lexer(Parser.getLexer()), LastLabel(nullptr) {
141-
setAvailableFeatures(ComputeAvailableFeatures(sti.getFeatureBits()));
138+
WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
139+
const MCInstrInfo &MII, const MCTargetOptions &Options)
140+
: MCTargetAsmParser(Options, STI, MII), Parser(Parser),
141+
Lexer(Parser.getLexer()), LastSymbol(nullptr) {
142+
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
142143
}
143144

144145
#define GET_ASSEMBLER_HEADER
@@ -168,24 +169,26 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
168169
return false;
169170
}
170171

171-
MVT::SimpleValueType ParseRegType(const StringRef &RegType) {
172+
173+
std::pair<MVT::SimpleValueType, unsigned>
174+
ParseRegType(const StringRef &RegType) {
172175
// Derive type from .param .local decls, or the instruction itself.
173-
return StringSwitch<MVT::SimpleValueType>(RegType)
174-
.Case("i32", MVT::i32)
175-
.Case("i64", MVT::i64)
176-
.Case("f32", MVT::f32)
177-
.Case("f64", MVT::f64)
178-
.Case("i8x16", MVT::v16i8)
179-
.Case("i16x8", MVT::v8i16)
180-
.Case("i32x4", MVT::v4i32)
181-
.Case("i64x2", MVT::v2i64)
182-
.Case("f32x4", MVT::v4f32)
183-
.Case("f64x2", MVT::v2f64)
176+
return StringSwitch<std::pair<MVT::SimpleValueType, unsigned>>(RegType)
177+
.Case("i32", {MVT::i32, wasm::WASM_TYPE_I32})
178+
.Case("i64", {MVT::i64, wasm::WASM_TYPE_I64})
179+
.Case("f32", {MVT::f32, wasm::WASM_TYPE_F32})
180+
.Case("f64", {MVT::f64, wasm::WASM_TYPE_F64})
181+
.Case("i8x16", {MVT::v16i8, wasm::WASM_TYPE_V128})
182+
.Case("i16x8", {MVT::v8i16, wasm::WASM_TYPE_V128})
183+
.Case("i32x4", {MVT::v4i32, wasm::WASM_TYPE_V128})
184+
.Case("i64x2", {MVT::v2i64, wasm::WASM_TYPE_V128})
185+
.Case("f32x4", {MVT::v4f32, wasm::WASM_TYPE_V128})
186+
.Case("f64x2", {MVT::v2f64, wasm::WASM_TYPE_V128})
184187
// arbitrarily chosen vector type to associate with "v128"
185188
// FIXME: should these be EVTs to avoid this arbitrary hack? Do we want
186189
// to accept more specific SIMD register types?
187-
.Case("v128", MVT::v16i8)
188-
.Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
190+
.Case("v128", {MVT::v16i8, wasm::WASM_TYPE_V128})
191+
.Default({MVT::INVALID_SIMPLE_VALUE_TYPE, wasm::WASM_TYPE_NORESULT});
189192
}
190193

191194
void ParseSingleInteger(bool IsNegative, OperandVector &Operands) {
@@ -311,24 +314,84 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
311314
return false;
312315
}
313316

314-
void onLabelParsed(MCSymbol *Symbol) override { LastLabel = Symbol; }
317+
void onLabelParsed(MCSymbol *Symbol) override {
318+
LastSymbol = cast<MCSymbolWasm>(Symbol);
319+
}
315320

316321
bool ParseDirective(AsmToken DirectiveID) override {
322+
// This function has a really weird return value behavior that is different
323+
// from all the other parsing functions:
324+
// - return true && no tokens consumed -> don't know this directive / let
325+
// the generic parser handle it.
326+
// - return true && tokens consumed -> a parsing error occurred.
327+
// - return false -> processed this directive successfully.
317328
assert(DirectiveID.getKind() == AsmToken::Identifier);
318329
auto &Out = getStreamer();
319330
auto &TOut =
320331
reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
321-
// TODO: we're just parsing the subset of directives we're interested in,
322-
// and ignoring ones we don't recognise. We should ideally verify
323-
// all directives here.
332+
// TODO: any time we return an error, at least one token must have been
333+
// consumed, otherwise this will not signal an error to the caller.
324334
if (DirectiveID.getString() == ".type") {
325335
// This could be the start of a function, check if followed by
326336
// "label,@function"
327-
if (!(IsNext(AsmToken::Identifier) && IsNext(AsmToken::Comma) &&
328-
IsNext(AsmToken::At) && Lexer.is(AsmToken::Identifier)))
337+
if (!Lexer.is(AsmToken::Identifier))
338+
return Error("Expected label after .type directive, got: ",
339+
Lexer.getTok());
340+
auto WasmSym = cast<MCSymbolWasm>(
341+
TOut.getStreamer().getContext().getOrCreateSymbol(
342+
Lexer.getTok().getString()));
343+
Parser.Lex();
344+
if (!(IsNext(AsmToken::Comma) && IsNext(AsmToken::At) &&
345+
Lexer.is(AsmToken::Identifier)))
329346
return Error("Expected label,@type declaration, got: ", Lexer.getTok());
347+
auto TypeName = Lexer.getTok().getString();
348+
if (TypeName == "function")
349+
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
350+
else if (TypeName == "global")
351+
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
352+
else
353+
return Error("Unknown WASM symbol type: ", Lexer.getTok());
354+
Parser.Lex();
355+
return Expect(AsmToken::EndOfStatement, "EOL");
356+
} else if (DirectiveID.getString() == ".size") {
357+
if (!Lexer.is(AsmToken::Identifier))
358+
return Error("Expected label after .size directive, got: ",
359+
Lexer.getTok());
360+
auto WasmSym = cast<MCSymbolWasm>(
361+
TOut.getStreamer().getContext().getOrCreateSymbol(
362+
Lexer.getTok().getString()));
330363
Parser.Lex();
331-
// Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction);
364+
if (!IsNext(AsmToken::Comma))
365+
return Error("Expected `,`, got: ", Lexer.getTok());
366+
const MCExpr *Exp;
367+
if (Parser.parseExpression(Exp))
368+
return Error("Cannot parse .size expression: ", Lexer.getTok());
369+
WasmSym->setSize(Exp);
370+
return Expect(AsmToken::EndOfStatement, "EOL");
371+
} else if (DirectiveID.getString() == ".globaltype") {
372+
if (!Lexer.is(AsmToken::Identifier))
373+
return Error("Expected symbol name after .globaltype directive, got: ",
374+
Lexer.getTok());
375+
auto Name = Lexer.getTok().getString();
376+
Parser.Lex();
377+
if (!IsNext(AsmToken::Comma))
378+
return Error("Expected `,`, got: ", Lexer.getTok());
379+
if (!Lexer.is(AsmToken::Identifier))
380+
return Error("Expected type in .globaltype directive, got: ",
381+
Lexer.getTok());
382+
auto Type = ParseRegType(Lexer.getTok().getString()).second;
383+
if (Type == wasm::WASM_TYPE_NORESULT)
384+
return Error("Unknown type in .globaltype directive: ",
385+
Lexer.getTok());
386+
Parser.Lex();
387+
// Now set this symbol with the correct type.
388+
auto WasmSym = cast<MCSymbolWasm>(
389+
TOut.getStreamer().getContext().getOrCreateSymbol(Name));
390+
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
391+
WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), true});
392+
// And emit the directive again.
393+
TOut.emitGlobalType(WasmSym);
394+
return Expect(AsmToken::EndOfStatement, "EOL");
332395
} else if (DirectiveID.getString() == ".param" ||
333396
DirectiveID.getString() == ".local") {
334397
// Track the number of locals, needed for correct virtual register
@@ -337,7 +400,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
337400
std::vector<MVT> Params;
338401
std::vector<MVT> Locals;
339402
while (Lexer.is(AsmToken::Identifier)) {
340-
auto RegType = ParseRegType(Lexer.getTok().getString());
403+
auto RegType = ParseRegType(Lexer.getTok().getString()).first;
341404
if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE)
342405
return true;
343406
if (DirectiveID.getString() == ".param") {
@@ -349,15 +412,20 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
349412
if (!IsNext(AsmToken::Comma))
350413
break;
351414
}
352-
assert(LastLabel);
353-
TOut.emitParam(LastLabel, Params);
415+
assert(LastSymbol);
416+
// TODO: LastSymbol isn't even used by emitParam, so could be removed.
417+
TOut.emitParam(LastSymbol, Params);
354418
TOut.emitLocal(Locals);
419+
return Expect(AsmToken::EndOfStatement, "EOL");
355420
} else {
356-
// For now, ignore anydirective we don't recognize:
421+
// TODO: remove.
357422
while (Lexer.isNot(AsmToken::EndOfStatement))
358423
Parser.Lex();
424+
return Expect(AsmToken::EndOfStatement, "EOL");
359425
}
360-
return Expect(AsmToken::EndOfStatement, "EOL");
426+
// TODO: current ELF directive parsing is broken, fix this is a followup.
427+
//return true; // We didn't process this directive.
428+
return false;
361429
}
362430

363431
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,

test/MC/WebAssembly/basic-assembly.s

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# RUN: llvm-mc -triple=wasm32-unknown-unknown -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s | FileCheck %s
2+
# this one is just here to see if it converts to .o without errors, but doesn't check any output:
3+
# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s
24

35
.text
46
.type test0,@function
@@ -56,7 +58,9 @@ test0:
5658
#i32.trunc_s:sat/f32
5759
get_global __stack_pointer@GLOBAL
5860
end_function
59-
61+
.Lfunc_end0:
62+
.size test0, .Lfunc_end0-test0
63+
.globaltype __stack_pointer, i32
6064

6165
# CHECK: .text
6266
# CHECK-LABEL: test0:
@@ -104,3 +108,5 @@ test0:
104108
# CHECK-NEXT: end_try
105109
# CHECK-NEXT: get_global __stack_pointer@GLOBAL
106110
# CHECK-NEXT: end_function
111+
112+
# CHECK: .globaltype __stack_pointer, i32

0 commit comments

Comments
 (0)