diff --git a/bolt/docs/BAT.md b/bolt/docs/BAT.md index 817ad288aa34b..afbd59220b045 100644 --- a/bolt/docs/BAT.md +++ b/bolt/docs/BAT.md @@ -114,9 +114,13 @@ Deleted basic blocks are emitted as having `OutputOffset` equal to the size of the function. They don't affect address translation and only participate in input basic block mapping. -### Secondary Entry Points table +### Secondary Entry Points and Call Continuation Landing Pads table The table is emitted for hot fragments only. It contains `NumSecEntryPoints` -offsets denoting secondary entry points, delta encoded, implicitly starting at zero. +offsets, delta encoded, implicitly starting at zero. | Entry | Encoding | Description | | ----- | -------- | ----------- | -| `SecEntryPoint` | Delta, ULEB128 | Secondary entry point offset | +| `OutputOffset` | Delta, ULEB128 | An offset of secondary entry point or a call continuation landing pad\*| + +Call continuation landing pads offsets are shifted by the size of the function +for backwards compatibility (treated as entry points past the end of the +function). diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h index fcc578f35e322..f956f48b8356b 100644 --- a/bolt/include/bolt/Profile/BoltAddressTranslation.h +++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h @@ -168,10 +168,6 @@ class BoltAddressTranslation { /// Map a function to its secondary entry points vector std::unordered_map> SecondaryEntryPointsMap; - /// Return a secondary entry point ID for a function located at \p Address and - /// \p Offset within that function. - unsigned getSecondaryEntryPointId(uint64_t Address, uint32_t Offset) const; - /// Links outlined cold bocks to their original function std::map ColdPartSource; @@ -183,6 +179,10 @@ class BoltAddressTranslation { const static uint32_t BRANCHENTRY = 0x1; public: + /// Return a secondary entry point ID for a function located at \p Address and + /// \p Offset within that function. + unsigned getSecondaryEntryPointId(uint64_t Address, uint32_t Offset) const; + /// Map basic block input offset to a basic block index and hash pair. class BBHashMapTy { struct EntryTy { diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index f34a94c577921..31b93418f5394 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -257,7 +257,8 @@ void BinaryEmitter::emitFunctions() { Streamer.setAllowAutoPadding(OriginalAllowAutoPadding); if (Emitted) - Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics); + Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics || + opts::EnableBAT); } }; diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp index 4d005f942699e..2f8bccf4afb5d 100644 --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -87,13 +87,33 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) { continue; uint32_t NumSecondaryEntryPoints = 0; - Function.forEachEntryPoint([&](uint64_t Offset, const MCSymbol *) { - if (!Offset) - return true; + // Offset call continuation landing pads by max input offset + 1 to prevent + // confusing them with real entry points. Note we can't use the input size + // as it's not available in BOLTed binary. + const BBHashMapTy &BBHashMap = getBBHashMap(InputAddress); + const uint32_t CallContLPOffset = std::prev(BBHashMap.end())->first + 1; + for (const BinaryBasicBlock &BB : llvm::drop_begin(Function)) { + if (BB.isEntryPoint()) { + ++NumSecondaryEntryPoints; + SecondaryEntryPointsMap[OutputAddress].push_back(BB.getOffset()); + continue; + } + // Add call continuation landing pads, offset by function size + if (!BB.isLandingPad()) + continue; + const BinaryBasicBlock *PrevBB = + Function.getLayout().getBlock(BB.getIndex() - 1); + if (!PrevBB->isSuccessor(&BB)) + continue; + const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); + if (!Instr || !BC.MIB->isCall(*Instr)) + continue; ++NumSecondaryEntryPoints; - SecondaryEntryPointsMap[OutputAddress].push_back(Offset); - return true; - }); + SecondaryEntryPointsMap[OutputAddress].push_back(CallContLPOffset + + BB.getOffset()); + } + if (NumSecondaryEntryPoints) + llvm::sort(SecondaryEntryPointsMap[OutputAddress]); LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n"); LLVM_DEBUG(dbgs() << " Address reference: 0x" @@ -109,7 +129,6 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) { // Add entries for deleted blocks. They are still required for correct BB // mapping of branches modified by SCTC. By convention, they would have the // end of the function as output address. - const BBHashMapTy &BBHashMap = getBBHashMap(InputAddress); if (BBHashMap.size() != Function.size()) { const uint64_t EndOffset = Function.getOutputSize(); std::unordered_set MappedInputOffsets; @@ -600,8 +619,8 @@ BoltAddressTranslation::getSecondaryEntryPointId(uint64_t Address, if (FunctionIt == SecondaryEntryPointsMap.end()) return 0; const std::vector &Offsets = FunctionIt->second; - auto OffsetIt = std::find(Offsets.begin(), Offsets.end(), Offset); - if (OffsetIt == Offsets.end()) + auto OffsetIt = llvm::lower_bound(FunctionIt->second, Offset); + if (OffsetIt == Offsets.end() || *OffsetIt != Offset) return 0; // Adding one here because main entry point is not stored in BAT, and // enumeration for secondary entry points starts with 1. diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 2b02086e3e0c9..32c1d82c8c31f 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -808,10 +808,16 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, if (!Offset) return false; - // FIXME: support BAT case where the function might be in empty state - // (split fragments declared non-simple). - if (!Func.hasCFG()) - return false; + if (!Func.hasCFG()) { + const uint64_t Address = Func.getAddress(); + if (!BAT) + return false; + const uint32_t InputOffset = BAT->translate(Address, Offset, false); + // Check if offset is a secondary entry point or a call continuation + // landing pad (offset shifted by function size). + return !BAT->getSecondaryEntryPointId(Address, InputOffset) && + !BAT->getSecondaryEntryPointId(Address, Func.getSize() + InputOffset); + } // The offset should not be an entry point or a landing pad. const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset); diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s index 31a7910d7fa3f..d463c7f8eebc8 100644 --- a/bolt/test/X86/callcont-fallthru.s +++ b/bolt/test/X86/callcont-fallthru.s @@ -5,7 +5,6 @@ # RUN: link_fdata %s %t %t.pa1 PREAGG # RUN: link_fdata %s %t %t.pa2 PREAGG2 # RUN: link_fdata %s %t %t.pa3 PREAGG3 -# RUN: link_fdata %s %t %t.pa4 PREAGG4 ## Check normal case: fallthrough is not LP or secondary entry. # RUN: llvm-strip --strip-unneeded %t -o %t.exe @@ -18,12 +17,43 @@ # RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2 ## Check that we don't treat secondary entry points as call continuation sites. -# RUN: llvm-bolt %t --pa -p %t.pa3 -o %t.out \ +# RUN: llvm-bolt %t --pa -p %t.pa3 -o %t.out3 \ # RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3 ## Check fallthrough to a landing pad case. -# RUN: llvm-bolt %t.exe --pa -p %t.pa4 -o %t.out \ -# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK4 +# RUN: llvm-bolt %t.exe --pa -p %t.pa3 -o %t.out4 \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3 + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --defsym GLOBL=1 \ +# RUN: %s -o %t.o +# RUN: %clangxx %cxxflags %t.o -o %t2 -Wl,-q -nostdlib +# RUN: llvm-bolt %t2 --pa -p %t.pa3 -o %t.bat --enable-bat \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3 + +## Check that a landing pad is emitted in BAT +# RUN: llvm-bat-dump %t.bat --dump-all | FileCheck %s --check-prefix=CHECK-BAT + +# CHECK-BAT: secondary entry points: + +## Check BAT case of a fallthrough to a secondary entry point or a landing pad +# RUN: link_fdata %s %t.bat %t.pa.bat2 PREAGG3 + +## Secondary entry +# RUN: perf2bolt %t.bat -p %t.pa.bat2 --pa -o %t.fdata2 +# RUN: FileCheck %s --check-prefix=CHECK-BAT-ENTRY --input-file=%t.fdata2 +# CHECK-BAT-ENTRY: main + +## Landing pad +# RUN: llvm-strip --strip-unneeded %t.bat +# RUN: perf2bolt %t.bat -p %t.pa.bat2 --pa -o %t.fdata3 +# RUN: FileCheck %s --check-prefix=CHECK-BAT-LP --input-file=%t.fdata3 +# CHECK-BAT-LP: main + +## Check BAT case of a fallthrough to a call continuation +# link_fdata %s %t.bat %t.pa.bat PREAGG +# RUN: perf2bolt %t.bat -p %t.pa.bat --pa -o %t.fdata +# RUN: FileCheck %s --check-prefix=CHECK-BAT-CC --input-file=%t.fdata +# CHECK-BAT-CC: main .globl foo .type foo, %function @@ -77,16 +107,15 @@ Ltmp4: # CHECK2: callq foo # CHECK2-NEXT: count: 3 -## Target is a secondary entry point +## Target is a secondary entry point (non-stripped) or a landing pad +## (strip-unneeded) # PREAGG3: B X:0 #Ltmp3# 2 0 # CHECK3: callq foo # CHECK3-NEXT: count: 0 -## Target is a landing pad -# PREAGG4: B X:0 #Ltmp3# 2 0 -# CHECK4: callq puts@PLT -# CHECK4-NEXT: count: 0 - +.ifdef GLOBL +.globl Ltmp3 +.endif Ltmp3: cmpl $0x0, -0x18(%rbp) Ltmp3_br: