From d347d13db75ff3031ef62048dfbc505453aaa29c Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 22 Jul 2025 09:29:20 -0700 Subject: [PATCH] MC: Allocate initial fragment in changeSection 13a79bbfe583e1d8cc85d241b580907260065eb8 (2017) introduced fragment creation in MCContext for createELFSectionImpl, which was inappropriate. Fragments should only be created when using MCSteramer, not during `MCContext::get*Section` calls. `initMachOMCObjectFileInfo` defines multiple sections, some of which may not be used by the code generator. This caused symbol names matching these sections to be incorrectly marked as undefined (see https://reviews.llvm.org/D55173). The fragment code was later replicated in other file formats, such as WebAssembly (see https://reviews.llvm.org/D46561), XCOFF, and GOFF. This patch fixes the problem by moving initial fragment allocation from MCContext::createSection to MCStreamer::changeSection. While MCContext still creates a section symbol, the symbol is not attached to the initial fragment. * test/CodeGen/XCore/section-name.ll now passes. XCore doesn't support MCObjectStreamer. I don't think the MCAsmStreamer output behavior change matters. --- llvm/include/llvm/MC/MCSection.h | 4 ++- llvm/include/llvm/MC/MCXCOFFStreamer.h | 1 + llvm/lib/MC/ELFObjectWriter.cpp | 2 -- llvm/lib/MC/MCAsmStreamer.cpp | 7 +++++ llvm/lib/MC/MCAssembler.cpp | 1 - llvm/lib/MC/MCContext.cpp | 40 ++++++------------------- llvm/lib/MC/MCELFStreamer.cpp | 5 +++- llvm/lib/MC/MCGOFFStreamer.cpp | 16 +++++++--- llvm/lib/MC/MCMachOStreamer.cpp | 2 ++ llvm/lib/MC/MCObjectStreamer.cpp | 24 +++++++++++---- llvm/lib/MC/MCSection.cpp | 3 +- llvm/lib/MC/MCStreamer.cpp | 6 ---- llvm/lib/MC/MCXCOFFStreamer.cpp | 14 +++++++++ llvm/test/CodeGen/XCore/section-name.ll | 4 +-- llvm/test/MC/ELF/section-sym2.s | 27 ++++++++++++----- llvm/test/MC/ELF/undefined-debug.s | 5 ---- 16 files changed, 94 insertions(+), 67 deletions(-) delete mode 100644 llvm/test/MC/ELF/undefined-debug.s diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index c1f3f02b7a7b6..125f849cb4854 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -589,6 +589,8 @@ class LLVM_ABI MCSection { /// offset between two locations may not be fully resolved. bool LinkerRelaxable : 1; + MCFragment DummyFragment; + // Mapping from subsection number to fragment list. At layout time, the // subsection 0 list is replaced with concatenated fragments from all // subsections. @@ -650,7 +652,7 @@ class LLVM_ABI MCSection { bool isLinkerRelaxable() const { return LinkerRelaxable; } void setLinkerRelaxable() { LinkerRelaxable = true; } - MCFragment &getDummyFragment() { return *Subsections[0].second.Head; } + MCFragment &getDummyFragment() { return DummyFragment; } FragList *curFragList() const { return CurFragList; } iterator begin() const { return iterator(CurFragList->Head); } diff --git a/llvm/include/llvm/MC/MCXCOFFStreamer.h b/llvm/include/llvm/MC/MCXCOFFStreamer.h index 870d48fe4200c..c3bc2ca94f711 100644 --- a/llvm/include/llvm/MC/MCXCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCXCOFFStreamer.h @@ -22,6 +22,7 @@ class MCXCOFFStreamer : public MCObjectStreamer { XCOFFObjectWriter &getWriter(); + void changeSection(MCSection *Section, uint32_t Subsection = 0) override; bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, Align ByteAlignment) override; diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 9f52b3e3e85c0..5a1306a910c97 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -566,8 +566,6 @@ void ELFWriter::computeSymbolTable(const RevGroupMapTy &RevGroupMap) { // And if these sections were not really defined in the code, but were // referenced, we simply error out. if (!Section.isRegistered()) { - assert(static_cast(Symbol).getType() == - ELF::STT_SECTION); Ctx.reportError(SMLoc(), "Undefined section reference: " + Symbol.getName()); continue; diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 7119ef4a2bbcd..cd251fe07f8c1 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -60,6 +60,7 @@ class MCAsmStreamer final : public MCStreamer { bool ShowInst = false; bool UseDwarfDirectory = false; + void changeSection(MCSection *, uint32_t) override; void EmitRegisterName(int64_t Register); void PrintQuotedString(StringRef Data, raw_ostream &OS) const; void printDwarfFileDirective(unsigned FileNo, StringRef Directory, @@ -2429,6 +2430,12 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst, } } +void MCAsmStreamer::changeSection(MCSection *Sec, uint32_t) { + CurFrag = &Sec->getDummyFragment(); + if (auto *Sym = Sec->getBeginSymbol()) + Sym->setFragment(&Sec->getDummyFragment()); +} + void MCAsmStreamer::emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) { if (MAI->isAIX() && CurFrag) diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index e142ac1860035..8500fd1fef520 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -106,7 +106,6 @@ void MCAssembler::reset() { bool MCAssembler::registerSection(MCSection &Section) { if (Section.isRegistered()) return false; - assert(Section.curFragList()->Head && "allocInitialFragment not called"); Sections.push_back(&Section); Section.setIsRegistered(true); return true; diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index 12b3fbab8fb8f..0ab5e74530083 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -443,17 +443,19 @@ MCSymbol *MCContext::getDirectionalLocalSymbol(unsigned LocalLabelVal, return getOrCreateDirectionalLocalSymbol(LocalLabelVal, Instance); } +// Create a section symbol, with a distinct one for each section of the same. +// The first symbol is used for assembly code references. template Symbol *MCContext::getOrCreateSectionSymbol(StringRef Section) { Symbol *R; auto &SymEntry = getSymbolTableEntry(Section); MCSymbol *Sym = SymEntry.second.Symbol; - // A section symbol can not redefine regular symbols. There may be multiple - // sections with the same name, in which case the first such section wins. if (Sym && Sym->isDefined() && (!Sym->isInSection() || Sym->getSection().getBeginSymbol() != Sym)) reportError(SMLoc(), "invalid symbol redefinition"); - if (Sym && Sym->isUndefined()) { + // Use the symbol's index to track if it has been used as a section symbol. + // Set to -1 to catch potential bugs if misused as a symbol index. + if (Sym && Sym->getIndex() != -1u) { R = cast(Sym); } else { SymEntry.second.Used = true; @@ -461,6 +463,8 @@ Symbol *MCContext::getOrCreateSectionSymbol(StringRef Section) { if (!Sym) SymEntry.second.Symbol = R; } + // Mark as section symbol. + R->setIndex(-1u); return R; } @@ -568,7 +572,6 @@ MCSectionMachO *MCContext::getMachOSection(StringRef Segment, StringRef Section, MCSectionMachO(Segment, Name.substr(Name.size() - Section.size()), TypeAndAttributes, Reserved2, Kind, Begin); R.first->second = Ret; - allocInitialFragment(*Ret); return Ret; } @@ -579,15 +582,8 @@ MCSectionELF *MCContext::createELFSectionImpl(StringRef Section, unsigned Type, bool Comdat, unsigned UniqueID, const MCSymbolELF *LinkedToSym) { auto *R = getOrCreateSectionSymbol(Section); - R->setBinding(ELF::STB_LOCAL); - R->setType(ELF::STT_SECTION); - - auto *Ret = new (ELFAllocator.Allocate()) MCSectionELF( + return new (ELFAllocator.Allocate()) MCSectionELF( Section, Type, Flags, EntrySize, Group, Comdat, UniqueID, R, LinkedToSym); - - auto *F = allocInitialFragment(*Ret); - R->setFragment(F); - return Ret; } MCSectionELF * @@ -743,7 +739,6 @@ MCSectionGOFF *MCContext::getGOFFSection(SectionKind Kind, StringRef Name, MCSectionGOFF(CachedName, Kind, IsVirtual, Attributes, static_cast(Parent)); Iter->second = GOFFSection; - allocInitialFragment(*GOFFSection); return GOFFSection; } @@ -798,8 +793,7 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section, MCSectionCOFF *Result = new (COFFAllocator.Allocate()) MCSectionCOFF( CachedName, Characteristics, COMDATSymbol, Selection, UniqueID, Begin); Iter->second = Result; - auto *F = allocInitialFragment(*Result); - Begin->setFragment(F); + Begin->setFragment(&Result->getDummyFragment()); return Result; } @@ -870,8 +864,6 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind, MCSectionWasm(CachedName, Kind, Flags, GroupSym, UniqueID, Begin); Entry.second = Result; - auto *F = allocInitialFragment(*Result); - Begin->setFragment(F); return Result; } @@ -927,24 +919,11 @@ MCSectionXCOFF *MCContext::getXCOFFSection( MultiSymbolsAllowed); Entry.second = Result; - - auto *F = allocInitialFragment(*Result); - - // We might miss calculating the symbols difference as absolute value before - // adding fixups when symbol_A without the fragment set is the csect itself - // and symbol_B is in it. - // TODO: Currently we only set the fragment for XMC_PR csects and DWARF - // sections because we don't have other cases that hit this problem yet. - if (IsDwarfSec || CsectProp->MappingClass == XCOFF::XMC_PR) - QualName->setFragment(F); - return Result; } MCSectionSPIRV *MCContext::getSPIRVSection() { MCSectionSPIRV *Result = new (SPIRVAllocator.Allocate()) MCSectionSPIRV(); - - allocInitialFragment(*Result); return Result; } @@ -964,7 +943,6 @@ MCSectionDXContainer *MCContext::getDXContainerSection(StringRef Section, new (DXCAllocator.Allocate()) MCSectionDXContainer(Name, K, nullptr); // The first fragment will store the header - allocInitialFragment(*MapIt->second); return MapIt->second; } diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index b8cbaea52560f..de11b679cbdda 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -89,7 +89,10 @@ void MCELFStreamer::changeSection(MCSection *Section, uint32_t Subsection) { getWriter().markGnuAbi(); MCObjectStreamer::changeSection(Section, Subsection); - Asm.registerSymbol(*Section->getBeginSymbol()); + auto *Sym = static_cast(Section->getBeginSymbol()); + Sym->setBinding(ELF::STB_LOCAL); + Sym->setType(ELF::STT_SECTION); + Asm.registerSymbol(*Sym); } void MCELFStreamer::emitWeakReference(MCSymbol *Alias, const MCSymbol *Target) { diff --git a/llvm/lib/MC/MCGOFFStreamer.cpp b/llvm/lib/MC/MCGOFFStreamer.cpp index 1718e2a4eb2d7..c5ed8619e6877 100644 --- a/llvm/lib/MC/MCGOFFStreamer.cpp +++ b/llvm/lib/MC/MCGOFFStreamer.cpp @@ -29,11 +29,19 @@ GOFFObjectWriter &MCGOFFStreamer::getWriter() { void MCGOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) { // Make sure that all section are registered in the correct order. SmallVector Sections; - for (auto *S = static_cast(Section); S; S = S->getParent()) + for (auto *S = static_cast(Section); (S = S->getParent());) Sections.push_back(S); - while (!Sections.empty()) { - auto *S = Sections.pop_back_val(); - MCObjectStreamer::changeSection(S, Sections.empty() ? Subsection : 0); + while (!Sections.empty()) + MCObjectStreamer::changeSection(Sections.pop_back_val(), 0); + + bool Registered = Section->isRegistered(); + MCObjectStreamer::changeSection(Section, Subsection); + if (!Registered) { + MCSymbol *Sym = Section->getBeginSymbol(); + if (Sym) { + Sym->setFragment(nullptr); + emitLabel(Sym); + } } } diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp index 8c3332cd8935f..493481524be19 100644 --- a/llvm/lib/MC/MCMachOStreamer.cpp +++ b/llvm/lib/MC/MCMachOStreamer.cpp @@ -140,6 +140,8 @@ void MCMachOStreamer::changeSection(MCSection *Section, uint32_t Subsection) { MCSymbol *Label = getContext().createLinkerPrivateTempSymbol(); Section->setBeginSymbol(Label); HasSectionLabel[Section] = true; + if (!Label->isInSection()) + emitLabel(Label); } } diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index fcd5cbfbd7341..aff4aa5b04233 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -185,10 +185,10 @@ void MCObjectStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { getAssembler().registerSymbol(*Symbol); - // If there is a current fragment, mark the symbol as pointing into it. - // Otherwise queue the label and set its fragment pointer when we emit the - // next fragment. - MCFragment *F = getCurrentFragment(); + // Set the fragment and offset. This function might be called by + // changeSection, when the section stack top hasn't been changed to the new + // section. + MCFragment *F = CurFrag; Symbol->setFragment(F); Symbol->setOffset(F->getContents().size()); @@ -247,6 +247,15 @@ void MCObjectStreamer::changeSection(MCSection *Section, uint32_t Subsection) { assert(Section && "Cannot switch to a null section!"); getContext().clearDwarfLocSeen(); + // If the section was not registered before and Subsection is not zero, + // allocate the initial fragment for subsection 0 for the section symbol. + bool NewSec = getAssembler().registerSection(*Section); + MCFragment *F0 = nullptr; + if (NewSec && Subsection) { + changeSection(Section, 0); + F0 = CurFrag; + } + auto &Subsections = Section->Subsections; size_t I = 0, E = Subsections.size(); while (I != E && Subsections[I].first < Subsection) @@ -262,7 +271,12 @@ void MCObjectStreamer::changeSection(MCSection *Section, uint32_t Subsection) { Section->CurFragList = &Subsections[I].second; CurFrag = Section->CurFragList->Tail; - getAssembler().registerSection(*Section); + if (NewSec) { + if (!Subsection) + F0 = CurFrag; + if (auto *Sym = Section->getBeginSymbol()) + Sym->setFragment(F0); + } } void MCObjectStreamer::switchSectionNoPrint(MCSection *Section) { diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index 023f7f27de0aa..e738a22ec11cd 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -22,8 +22,7 @@ MCSection::MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsBss, MCSymbol *Begin) : Begin(Begin), HasInstructions(false), IsRegistered(false), IsText(IsText), IsBss(IsBss), LinkerRelaxable(false), Name(Name), Variant(V) { - // The initial subsection number is 0. Create a fragment list. - CurFragList = &Subsections.emplace_back(0u, FragList{}).second; + DummyFragment.setParent(this); } MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) { diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index e14a32f5dc0ce..d8e58c3e6ffe4 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1358,9 +1358,6 @@ void MCStreamer::switchSection(MCSection *Section, uint32_t Subsection) { changeSection(Section, Subsection); SectionStack.back().first = MCSectionSubPair(Section, Subsection); assert(!Section->hasEnded() && "Section already ended"); - MCSymbol *Sym = Section->getBeginSymbol(); - if (Sym && !Sym->isInSection()) - emitLabel(Sym); } } @@ -1387,9 +1384,6 @@ void MCStreamer::switchSectionNoPrint(MCSection *Section) { SectionStack.back().second = SectionStack.back().first; SectionStack.back().first = MCSectionSubPair(Section, 0); changeSection(Section, 0); - MCSymbol *Sym = Section->getBeginSymbol(); - if (Sym && !Sym->isInSection()) - emitLabel(Sym); } MCSymbol *MCStreamer::endSection(MCSection *Section) { diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp index 63381b4f81859..78e4b953ed550 100644 --- a/llvm/lib/MC/MCXCOFFStreamer.cpp +++ b/llvm/lib/MC/MCXCOFFStreamer.cpp @@ -36,6 +36,20 @@ XCOFFObjectWriter &MCXCOFFStreamer::getWriter() { return static_cast(getAssembler().getWriter()); } +void MCXCOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) { + MCObjectStreamer::changeSection(Section, Subsection); + auto *Sec = cast(Section); + // We might miss calculating the symbols difference as absolute value before + // adding fixups when symbol_A without the fragment set is the csect itself + // and symbol_B is in it. + // TODO: Currently we only set the fragment for XMC_PR csects and DWARF + // sections because we don't have other cases that hit this problem yet. + // if (IsDwarfSec || CsectProp->MappingClass == XCOFF::XMC_PR) + // QualName->setFragment(F); + if (Sec->isDwarfSect() || Sec->getMappingClass() == XCOFF::XMC_PR) + Sec->getQualNameSymbol()->setFragment(CurFrag); +} + bool MCXCOFFStreamer::emitSymbolAttribute(MCSymbol *Sym, MCSymbolAttr Attribute) { auto *Symbol = cast(Sym); diff --git a/llvm/test/CodeGen/XCore/section-name.ll b/llvm/test/CodeGen/XCore/section-name.ll index 0fa2cc606a5e0..b2176ecaa8380 100644 --- a/llvm/test/CodeGen/XCore/section-name.ll +++ b/llvm/test/CodeGen/XCore/section-name.ll @@ -1,4 +1,4 @@ -; RUN: not llc < %s -mtriple=xcore -o /dev/null 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=xcore | FileCheck %s @bar = internal global i32 zeroinitializer @@ -6,4 +6,4 @@ define void @".dp.bss"() { ret void } -; CHECK: :0: error: symbol '.dp.bss' is already defined +; CHECK: .dp.bss: diff --git a/llvm/test/MC/ELF/section-sym2.s b/llvm/test/MC/ELF/section-sym2.s index 167fc8c938ea4..fe2b904c9f368 100644 --- a/llvm/test/MC/ELF/section-sym2.s +++ b/llvm/test/MC/ELF/section-sym2.s @@ -1,27 +1,40 @@ # RUN: llvm-mc -filetype=obj -triple x86_64 %s -o %t -# RUN: llvm-readelf -Srs %t | FileCheck %s +# RUN: llvm-readelf -SrsX %t | FileCheck %s ## Test that we can forward reference a section. mov .rodata, %rsi +mov data, %rsi mov .debug_info, %rsi +mov .debug_abbrev, %rsi .section .rodata,"a" +.pushsection data, 2; .long 2; .popsection +.section data; .long 1 .section .debug_info,"G",@progbits,11,comdat; .long x1 .section .debug_info,"G",@progbits,22,comdat; .long x2 .section .debug_info,"",@progbits; .long x0 +.text +mov data, %rdi + +# CHECK: Relocation section '.rela.text' +# CHECK: R_X86_64_32S {{.*}} data + 0 +# CHECK: R_X86_64_32S {{.*}} data + 0 + # CHECK: Relocation section '.rela.debug_info' at offset {{.*}} contains 1 # CHECK: Relocation section '.rela.debug_info' at offset {{.*}} contains 1 # CHECK: Relocation section '.rela.debug_info' at offset {{.*}} contains 1 -# CHECK: Symbol table '.symtab' contains 8 entries: -# CHECK-NEXT: Num: Value Size Type Bind Vis Ndx Name +# CHECK: Symbol table '.symtab' contains 10 entries: +# CHECK-NEXT: Num: # CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND -# CHECK-NEXT: 0000000000000000 0 SECTION LOCAL DEFAULT 4 .rodata -# CHECK-NEXT: 0000000000000000 0 SECTION LOCAL DEFAULT 11 .debug_info -# CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT 5 11 -# CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT 8 22 +# CHECK-NEXT: 0000000000000000 0 SECTION LOCAL DEFAULT [[#]] (.rodata) .rodata +# CHECK-NEXT: 0000000000000000 0 SECTION LOCAL DEFAULT [[#]] (data) data +# CHECK-NEXT: 0000000000000000 0 SECTION LOCAL DEFAULT [[#]] (.debug_info) .debug_info +# CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT [[#]] (.group) 11 +# CHECK-NEXT: 0000000000000000 0 NOTYPE LOCAL DEFAULT [[#]] (.group) 22 +# CHECK-NEXT: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND .debug_abbrev # CHECK-NEXT: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND x1 # CHECK-NEXT: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND x2 # CHECK-NEXT: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND x0 diff --git a/llvm/test/MC/ELF/undefined-debug.s b/llvm/test/MC/ELF/undefined-debug.s deleted file mode 100644 index 95ead70ef9714..0000000000000 --- a/llvm/test/MC/ELF/undefined-debug.s +++ /dev/null @@ -1,5 +0,0 @@ -// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t 2>&1 | FileCheck %s -// CHECK: error: Undefined section reference: .debug_pubnames - -.section .foo,"",@progbits - .long .debug_pubnames