From ed4551c9ecae41a6a9fb4ad822a0e41b425f1361 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 10 Feb 2015 03:52:36 +0000 Subject: [PATCH 01/17] Merging r228565: ------------------------------------------------------------------------ r228565 | majnemer | 2015-02-08 22:31:31 -0800 (Sun, 08 Feb 2015) | 3 lines MC: Calculate intra-section symbol differences correctly for COFF This fixes PR22060. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228667 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/WinCOFFObjectWriter.cpp | 17 +++++++++++------ test/MC/COFF/diff.s | 25 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index c17f99b9bd7b..ec0e0f7256a4 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -710,17 +710,22 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, CrossSection = &Symbol.getSection() != &B->getSection(); // Offset of the symbol in the section - int64_t a = Layout.getSymbolOffset(&B_SD); + int64_t OffsetOfB = Layout.getSymbolOffset(&B_SD); - // Offset of the relocation in the section - int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); - - FixedValue = b - a; // In the case where we have SymbA and SymB, we just need to store the delta // between the two symbols. Update FixedValue to account for the delta, and // skip recording the relocation. - if (!CrossSection) + if (!CrossSection) { + int64_t OffsetOfA = Layout.getSymbolOffset(&A_SD); + FixedValue = (OffsetOfA - OffsetOfB) + Target.getConstant(); return; + } + + // Offset of the relocation in the section + int64_t OffsetOfRelocation = + Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + + FixedValue = OffsetOfRelocation - OffsetOfB; } else { FixedValue = Target.getConstant(); } diff --git a/test/MC/COFF/diff.s b/test/MC/COFF/diff.s index 820272a40bf4..5111600c7449 100644 --- a/test/MC/COFF/diff.s +++ b/test/MC/COFF/diff.s @@ -1,5 +1,23 @@ // RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | llvm-readobj -s -sr -sd | FileCheck %s +.section baz, "xr" + .def X + .scl 2; + .type 32; + .endef + .globl X +X: + mov Y-X+42, %eax + retl + + .def Y + .scl 2; + .type 32; + .endef + .globl Y +Y: + retl + .def _foobar; .scl 2; .type 32; @@ -30,3 +48,10 @@ _rust_crate: // CHECK: SectionData ( // CHECK-NEXT: 0000: 00000000 00000000 1C000000 20000000 // CHECK-NEXT: ) + +// CHECK: Name: baz +// CHECK: Relocations [ +// CHECK-NEXT: ] +// CHECK: SectionData ( +// CHECK-NEXT: 0000: A1300000 00C3C3 +// CHECK-NEXT: ) From 514eb49e556e8fd2c7f569d6081d906cd6ec9ad6 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 11 Feb 2015 02:15:09 +0000 Subject: [PATCH 02/17] Merging r228656: ------------------------------------------------------------------------ r228656 | chandlerc | 2015-02-09 18:25:56 -0800 (Mon, 09 Feb 2015) | 16 lines [x86] Fix PR22524: the DAG combiner was incorrectly handling illegal nodes when folding bitcasts of constants. We can't fold things and then check after-the-fact whether it was legal. Once we have formed the DAG node, arbitrary other nodes may have been collapsed to it. There is no easy way to go back. Instead, we need to test for the specific folding cases we're interested in and ensure those are legal first. This could in theory make this less powerful for bitcasting from an integer to some vector type, but AFAICT, that can't actually happen in the SDAG so its fine. Now, we *only* whitelist specific int->fp and fp->int bitcasts for post-legalization folding. I've added the test case from the PR. (Also as a note, this does not appear to be in 3.6, no backport needed) ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228787 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 22 ++++++--------- test/CodeGen/X86/constant-combines.ll | 35 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/X86/constant-combines.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5145731f6231..1bd6cfff9d25 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6544,19 +6544,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa(N0) || isa(N0)) { - SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); - if (Res.getNode() != N) { - if (!LegalOperations || - TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) - return Res; - - // Folding it resulted in an illegal node, and it's too late to - // do that. Clean up the old node and forego the transformation. - // Ideally this won't happen very often, because instcombine - // and the earlier dagcombine runs (where illegal nodes are - // permitted) should have folded most of them already. - deleteAndRecombine(Res.getNode()); - } + // If we can't allow illegal operations, we need to check that this is just + // a fp -> int or int -> conversion and that the resulting operation will + // be legal. + if (!LegalOperations || + (isa(N0) && VT.isFloatingPoint() && !VT.isVector() && + TLI.isOperationLegal(ISD::ConstantFP, VT)) || + (isa(N0) && VT.isInteger() && !VT.isVector() && + TLI.isOperationLegal(ISD::Constant, VT))) + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); } // (conv (conv x, t1), t2) -> (conv x, t2) diff --git a/test/CodeGen/X86/constant-combines.ll b/test/CodeGen/X86/constant-combines.ll new file mode 100644 index 000000000000..d2a6ef4f5d25 --- /dev/null +++ b/test/CodeGen/X86/constant-combines.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +define void @PR22524({ float, float }* %arg) { +; Check that we can materialize the zero constants we store in two places here, +; and at least form a legal store of the floating point value at the end. +; The DAG combiner at one point contained bugs that given enough permutations +; would incorrectly form an illegal operation for the last of these stores when +; it folded it to a zero too late to legalize the zero store operation. If this +; ever starts forming a zero store instead of movss, the test case has stopped +; being useful. +; +; CHECK-LABEL: PR22524: +entry: + %0 = getelementptr inbounds { float, float }* %arg, i32 0, i32 1 + store float 0.000000e+00, float* %0, align 4 +; CHECK: movl $0, 4(%rdi) + + %1 = getelementptr inbounds { float, float }* %arg, i64 0, i32 0 + %2 = bitcast float* %1 to i64* + %3 = load i64* %2, align 8 + %4 = trunc i64 %3 to i32 + %5 = lshr i64 %3, 32 + %6 = trunc i64 %5 to i32 + %7 = bitcast i32 %6 to float + %8 = fmul float %7, 0.000000e+00 + %9 = bitcast float* %1 to i32* + store i32 %6, i32* %9, align 4 +; CHECK: movl $0, (%rdi) + store float %8, float* %0, align 4 +; CHECK: movss %{{.*}}, 4(%rdi) + ret void +} From d7210365f520abd9b34e158acf460899997016e6 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 11 Feb 2015 02:23:00 +0000 Subject: [PATCH 03/17] Merging r228760 and r228761: ------------------------------------------------------------------------ r228760 | majnemer | 2015-02-10 15:09:43 -0800 (Tue, 10 Feb 2015) | 3 lines EarlyCSE: It isn't safe to CSE across synchronization boundaries This fixes PR22514. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r228761 | majnemer | 2015-02-10 15:11:02 -0800 (Tue, 10 Feb 2015) | 1 line EarlyCSE: Add check lines for test added in r228760 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228790 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/EarlyCSE.cpp | 3 +++ test/Transforms/EarlyCSE/basic.ll | 11 ++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 394b0d3de7bd..969b9a8f8df1 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -480,6 +480,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Ignore volatile loads. if (!LI->isSimple()) { LastStore = nullptr; + // Don't CSE across synchronization boundaries. + if (Inst->mayWriteToMemory()) + ++CurrentGeneration; continue; } diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll index 155d36f60e21..3ec8831def18 100644 --- a/test/Transforms/EarlyCSE/basic.ll +++ b/test/Transforms/EarlyCSE/basic.ll @@ -192,4 +192,13 @@ define void @test11(i32 *%P) { ; CHECK-NEXT: ret void } - +; CHECK-LABEL: @test12( +define i32 @test12(i1 %B, i32* %P1, i32* %P2) { + %load0 = load i32* %P1 + %1 = load atomic i32* %P2 seq_cst, align 4 + %load1 = load i32* %P1 + %sel = select i1 %B, i32 %load0, i32 %load1 + ret i32 %sel + ; CHECK: load i32* %P1 + ; CHECK: load i32* %P1 +} From 335a95e1ab485bd8b155344cffc6524284683de9 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 11 Feb 2015 04:41:09 +0000 Subject: [PATCH 04/17] Merging r228793: ------------------------------------------------------------------------ r228793 | bogner | 2015-02-10 18:52:44 -0800 (Tue, 10 Feb 2015) | 12 lines InstrProf: Lower coverage mappings by setting their sections appropriately Add handling for __llvm_coverage_mapping to the InstrProfiling pass. We need to make sure the constant and any profile names it refers to are in the correct sections, which is easier and cleaner to do here where we have to know about profiling sections anyway. This is really tricky to test without a frontend, so I'm committing the test for the fix in clang. If anyone knows a good way to test this within LLVM, please let me know. Fixes PR22531. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228800 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/InstrProfiling.cpp | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 5f73b89e8551..2a3d15421de9 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -71,9 +71,17 @@ class InstrProfiling : public ModulePass { return isMachO() ? "__DATA,__llvm_prf_data" : "__llvm_prf_data"; } + /// Get the section name for the coverage mapping data. + StringRef getCoverageSection() const { + return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap"; + } + /// Replace instrprof_increment with an increment of the appropriate value. void lowerIncrement(InstrProfIncrementInst *Inc); + /// Set up the section and uses for coverage data and its references. + void lowerCoverageData(GlobalVariable *CoverageData); + /// Get the region counters for an increment, creating them if necessary. /// /// If the counter array doesn't yet exist, the profile data variables @@ -118,6 +126,10 @@ bool InstrProfiling::runOnModule(Module &M) { lowerIncrement(Inc); MadeChange = true; } + if (GlobalVariable *Coverage = M.getNamedGlobal("__llvm_coverage_mapping")) { + lowerCoverageData(Coverage); + MadeChange = true; + } if (!MadeChange) return false; @@ -140,6 +152,35 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { Inc->eraseFromParent(); } +void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) { + CoverageData->setSection(getCoverageSection()); + CoverageData->setAlignment(8); + + Constant *Init = CoverageData->getInitializer(); + // We're expecting { i32, i32, i32, i32, [n x { i8*, i32, i32 }], [m x i8] } + // for some C. If not, the frontend's given us something broken. + assert(Init->getNumOperands() == 6 && "bad number of fields in coverage map"); + assert(isa(Init->getAggregateElement(4)) && + "invalid function list in coverage map"); + ConstantArray *Records = cast(Init->getAggregateElement(4)); + for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) { + Constant *Record = Records->getOperand(I); + Value *V = const_cast(Record->getOperand(0))->stripPointerCasts(); + + assert(isa(V) && "Missing reference to function name"); + GlobalVariable *Name = cast(V); + + // If we have region counters for this name, we've already handled it. + auto It = RegionCounters.find(Name); + if (It != RegionCounters.end()) + continue; + + // Move the name variable to the right section. + Name->setSection(getNameSection()); + Name->setAlignment(1); + } +} + /// Get the name of a profiling variable for a particular function. static std::string getVarName(InstrProfIncrementInst *Inc, StringRef VarName) { auto *Arr = cast(Inc->getName()->getInitializer()); From 2fae077773f02475b04eaefa7c99301311edebd0 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 12 Feb 2015 00:29:01 +0000 Subject: [PATCH 05/17] Reverting r225904 to fix PR22505: ------------------------------------------------------------------------ r225904 | rnk | 2015-01-13 17:05:27 -0800 (Tue, 13 Jan 2015) | 27 lines CodeGen support for x86_64 SEH catch handlers in LLVM This adds handling for ExceptionHandling::MSVC, used by the x86_64-pc-windows-msvc triple. It assumes that filter functions have already been outlined in either the frontend or the backend. Filter functions are used in place of the landingpad catch clause type info operands. In catch clause order, the first filter to return true will catch the exception. The C specific handler table expects the landing pad to be split into one block per handler, but LLVM IR uses a single landing pad for all possible unwind actions. This patch papers over the mismatch by synthesizing single instruction BBs for every catch clause to fill in the EH selector that the landing pad block expects. Missing functionality: - Accessing data in the parent frame from outlined filters - Cleanups (from __finally) are unsupported, as they will require outlining and parent frame access - Filter clauses are unsupported, as there's no clear analogue in SEH In other words, this is the minimal set of changes needed to write IR to catch arbitrary exceptions and resume normal execution. Reviewers: majnemer Differential Revision: http://reviews.llvm.org/D6300 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228891 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineModuleInfo.h | 6 - lib/CodeGen/AsmPrinter/EHStreamer.cpp | 15 +- lib/CodeGen/AsmPrinter/EHStreamer.h | 17 +- lib/CodeGen/AsmPrinter/Win64Exception.cpp | 149 +------------ lib/CodeGen/AsmPrinter/Win64Exception.h | 4 - lib/CodeGen/MachineModuleInfo.cpp | 8 - lib/CodeGen/Passes.cpp | 2 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 33 +-- .../SelectionDAG/SelectionDAGBuilder.h | 2 - lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 64 +----- test/CodeGen/X86/seh-basic.ll | 175 ---------------- test/CodeGen/X86/seh-safe-div.ll | 196 ------------------ 12 files changed, 18 insertions(+), 653 deletions(-) delete mode 100644 test/CodeGen/X86/seh-basic.ll delete mode 100644 test/CodeGen/X86/seh-safe-div.ll diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index f0d0b2dbcdbc..584ce65c9d17 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -66,7 +66,6 @@ struct LandingPadInfo { MachineBasicBlock *LandingPadBlock; // Landing pad block. SmallVector BeginLabels; // Labels prior to invoke. SmallVector EndLabels; // Labels after invoke. - SmallVector ClauseLabels; // Labels for each clause. MCSymbol *LandingPadLabel; // Label at beginning of landing pad. const Function *Personality; // Personality function. std::vector TypeIds; // List of type ids (filters negative) @@ -331,11 +330,6 @@ class MachineModuleInfo : public ImmutablePass { /// void addCleanup(MachineBasicBlock *LandingPad); - /// Add a clause for a landing pad. Returns a new label for the clause. This - /// is used by EH schemes that have more than one landing pad. In this case, - /// each clause gets its own basic block. - MCSymbol *addClauseForLandingPad(MachineBasicBlock *LandingPad); - /// getTypeIDFor - Return the type id for the specified typeinfo. This is /// function wide. unsigned getTypeIDFor(const GlobalValue *TI); diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 1bc86f6c222a..f112120c1abc 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -121,8 +121,7 @@ computeActionsTable(const SmallVectorImpl &LandingPads, for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) { int TypeID = TypeIds[J]; assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); - int ValueForTypeID = - isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID; + int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; unsigned SizeTypeID = getSLEB128Size(ValueForTypeID); int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; @@ -270,14 +269,14 @@ computeCallSiteTable(SmallVectorImpl &CallSites, CallSiteEntry Site = { BeginLabel, LastLabel, - LandingPad, + LandingPad->LandingPadLabel, FirstActions[P.PadIndex] }; // Try to merge with the previous call-site. SJLJ doesn't do this if (PreviousIsInvoke && !IsSJLJ) { CallSiteEntry &Prev = CallSites.back(); - if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) { + if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { // Extend the range of the previous entry. Prev.EndLabel = Site.EndLabel; continue; @@ -577,15 +576,15 @@ void EHStreamer::emitExceptionTable() { // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. - if (!S.LPad) { + if (!S.PadLabel) { if (VerboseAsm) Asm->OutStreamer.AddComment(" has no landing pad"); Asm->OutStreamer.EmitIntValue(0, 4/*size*/); } else { if (VerboseAsm) Asm->OutStreamer.AddComment(Twine(" jumps to ") + - S.LPad->LandingPadLabel->getName()); - Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4); + S.PadLabel->getName()); + Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4); } // Offset of the first associated action record, relative to the start of @@ -682,7 +681,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { unsigned TypeID = *I; if (VerboseAsm) { --Entry; - if (isFilterEHSelector(TypeID)) + if (TypeID != 0) Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); } diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index 9b316ff00e9b..e93055ce8655 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -23,8 +23,6 @@ class MachineModuleInfo; class MachineInstr; class MachineFunction; class AsmPrinter; -class MCSymbol; -class MCSymbolRefExpr; template class SmallVectorImpl; @@ -62,11 +60,11 @@ class EHStreamer : public AsmPrinterHandler { /// Structure describing an entry in the call-site table. struct CallSiteEntry { // The 'try-range' is BeginLabel .. EndLabel. - MCSymbol *BeginLabel; // Null indicates the start of the function. - MCSymbol *EndLabel; // Null indicates the end of the function. + MCSymbol *BeginLabel; // zero indicates the start of the function. + MCSymbol *EndLabel; // zero indicates the end of the function. - // LPad contains the landing pad start labels. - const LandingPadInfo *LPad; // Null indicates that there is no landing pad. + // The landing pad starts at PadLabel. + MCSymbol *PadLabel; // zero indicates that there is no landing pad. unsigned Action; }; @@ -114,13 +112,6 @@ class EHStreamer : public AsmPrinterHandler { virtual void emitTypeInfos(unsigned TTypeEncoding); - // Helpers for for identifying what kind of clause an EH typeid or selector - // corresponds to. Negative selectors are for filter clauses, the zero - // selector is for cleanups, and positive selectors are for catch clauses. - static bool isFilterEHSelector(int Selector) { return Selector < 0; } - static bool isCleanupEHSelector(int Selector) { return Selector == 0; } - static bool isCatchEHSelector(int Selector) { return Selector > 0; } - public: EHStreamer(AsmPrinter *A); virtual ~EHStreamer(); diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 2138cb9514f8..0f0ad755835d 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -99,156 +99,9 @@ void Win64Exception::endFunction(const MachineFunction *) { if (shouldEmitPersonality) { Asm->OutStreamer.PushSection(); - - // Emit an UNWIND_INFO struct describing the prologue. Asm->OutStreamer.EmitWinEHHandlerData(); - - // Emit either MSVC-compatible tables or the usual Itanium-style LSDA after - // the UNWIND_INFO struct. - if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::MSVC) { - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - if (Per->getName() == "__C_specific_handler") - emitCSpecificHandlerTable(); - else - report_fatal_error(Twine("unexpected personality function: ") + - Per->getName()); - } else { - emitExceptionTable(); - } - + emitExceptionTable(); Asm->OutStreamer.PopSection(); } Asm->OutStreamer.EmitWinCFIEndProc(); } - -const MCSymbolRefExpr *Win64Exception::createImageRel32(const MCSymbol *Value) { - return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32, - Asm->OutContext); -} - -/// Emit the language-specific data that __C_specific_handler expects. This -/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning -/// up after faults with __try, __except, and __finally. The typeinfo values -/// are not really RTTI data, but pointers to filter functions that return an -/// integer (1, 0, or -1) indicating how to handle the exception. For __finally -/// blocks and other cleanups, the landing pad label is zero, and the filter -/// function is actually a cleanup handler with the same prototype. A catch-all -/// entry is modeled with a null filter function field and a non-zero landing -/// pad label. -/// -/// Possible filter function return values: -/// EXCEPTION_EXECUTE_HANDLER (1): -/// Jump to the landing pad label after cleanups. -/// EXCEPTION_CONTINUE_SEARCH (0): -/// Continue searching this table or continue unwinding. -/// EXCEPTION_CONTINUE_EXECUTION (-1): -/// Resume execution at the trapping PC. -/// -/// Inferred table structure: -/// struct Table { -/// int NumEntries; -/// struct Entry { -/// imagerel32 LabelStart; -/// imagerel32 LabelEnd; -/// imagerel32 FilterOrFinally; // Zero means catch-all. -/// imagerel32 LabelLPad; // Zero means __finally. -/// } Entries[NumEntries]; -/// }; -void Win64Exception::emitCSpecificHandlerTable() { - const std::vector &PadInfos = MMI->getLandingPads(); - - // Simplifying assumptions for first implementation: - // - Cleanups are not implemented. - // - Filters are not implemented. - - // The Itanium LSDA table sorts similar landing pads together to simplify the - // actions table, but we don't need that. - SmallVector LandingPads; - LandingPads.reserve(PadInfos.size()); - for (const auto &LP : PadInfos) - LandingPads.push_back(&LP); - - // Compute label ranges for call sites as we would for the Itanium LSDA, but - // use an all zero action table because we aren't using these actions. - SmallVector FirstActions; - FirstActions.resize(LandingPads.size()); - SmallVector CallSites; - computeCallSiteTable(CallSites, LandingPads, FirstActions); - - MCSymbol *EHFuncBeginSym = - Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); - MCSymbol *EHFuncEndSym = - Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); - - // Emit the number of table entries. - unsigned NumEntries = 0; - for (const CallSiteEntry &CSE : CallSites) { - if (!CSE.LPad) - continue; // Ignore gaps. - for (int Selector : CSE.LPad->TypeIds) { - // Ignore C++ filter clauses in SEH. - // FIXME: Implement cleanup clauses. - if (isCatchEHSelector(Selector)) - ++NumEntries; - } - } - Asm->OutStreamer.EmitIntValue(NumEntries, 4); - - // Emit the four-label records for each call site entry. The table has to be - // sorted in layout order, and the call sites should already be sorted. - for (const CallSiteEntry &CSE : CallSites) { - // Ignore gaps. Unlike the Itanium model, unwinding through a frame without - // an EH table entry will propagate the exception rather than terminating - // the program. - if (!CSE.LPad) - continue; - const LandingPadInfo *LPad = CSE.LPad; - - // Compute the label range. We may reuse the function begin and end labels - // rather than forming new ones. - const MCExpr *Begin = - createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym); - const MCExpr *End; - if (CSE.EndLabel) { - // The interval is half-open, so we have to add one to include the return - // address of the last invoke in the range. - End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel), - MCConstantExpr::Create(1, Asm->OutContext), - Asm->OutContext); - } else { - End = createImageRel32(EHFuncEndSym); - } - - // These aren't really type info globals, they are actually pointers to - // filter functions ordered by selector. The zero selector is used for - // cleanups, so slot zero corresponds to selector 1. - const std::vector &SelectorToFilter = MMI->getTypeInfos(); - - // Do a parallel iteration across typeids and clause labels, skipping filter - // clauses. - assert(LPad->TypeIds.size() == LPad->ClauseLabels.size()); - for (size_t I = 0, E = LPad->TypeIds.size(); I < E; ++I) { - // AddLandingPadInfo stores the clauses in reverse, but there is a FIXME - // to change that. - int Selector = LPad->TypeIds[E - I - 1]; - MCSymbol *ClauseLabel = LPad->ClauseLabels[I]; - - // Ignore C++ filter clauses in SEH. - // FIXME: Implement cleanup clauses. - if (!isCatchEHSelector(Selector)) - continue; - - Asm->OutStreamer.EmitValue(Begin, 4); - Asm->OutStreamer.EmitValue(End, 4); - if (isCatchEHSelector(Selector)) { - assert(unsigned(Selector - 1) < SelectorToFilter.size()); - const GlobalValue *TI = SelectorToFilter[Selector - 1]; - if (TI) // Emit the filter function pointer. - Asm->OutStreamer.EmitValue(createImageRel32(Asm->getSymbol(TI)), 4); - else // Otherwise, this is a "catch i8* null", or catch all. - Asm->OutStreamer.EmitIntValue(0, 4); - } - Asm->OutStreamer.EmitValue(createImageRel32(ClauseLabel), 4); - } - } -} diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h index b2d5d1bce563..538e1328157f 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.h +++ b/lib/CodeGen/AsmPrinter/Win64Exception.h @@ -29,10 +29,6 @@ class Win64Exception : public EHStreamer { /// Per-function flag to indicate if frame moves info should be emitted. bool shouldEmitMoves; - void emitCSpecificHandlerTable(); - - const MCSymbolRefExpr *createImageRel32(const MCSymbol *Value); - public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 32d511285eb2..baad411e2c5b 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -452,14 +452,6 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { LP.TypeIds.push_back(0); } -MCSymbol * -MachineModuleInfo::addClauseForLandingPad(MachineBasicBlock *LandingPad) { - MCSymbol *ClauseLabel = Context.CreateTempSymbol(); - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.ClauseLabels.push_back(ClauseLabel); - return ClauseLabel; -} - /// TidyLandingPads - Remap landing pad labels and remove any deleted landing /// pads. void MachineModuleInfo::TidyLandingPads(DenseMap *LPMap) { diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 28e9f847a9d7..e53e874cb178 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -449,9 +449,9 @@ void TargetPassConfig::addPassesToHandleExceptions() { case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: case ExceptionHandling::ItaniumWinEH: - case ExceptionHandling::MSVC: // FIXME: Needs preparation. addPass(createDwarfEHPass(TM)); break; + case ExceptionHandling::MSVC: // FIXME: Add preparation. case ExceptionHandling::None: addPass(createLowerInvokePass()); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 86a63eea7c2a..151bc724df67 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2071,14 +2071,10 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; - if (FuncInfo.ExceptionPointerVirtReg) { - Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), - getCurSDLoc(), ValueVTs[0]); - } else { - Ops[0] = DAG.getConstant(0, TLI.getPointerTy()); - } + Ops[0] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), + getCurSDLoc(), ValueVTs[0]); Ops[1] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), @@ -2090,27 +2086,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { setValue(&LP, Res); } -unsigned -SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, - MachineBasicBlock *LPadBB) { - SDValue Chain = getControlRoot(); - - // Get the typeid that we will dispatch on later. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); - unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); - SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy()); - Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel); - - // Branch to the main landing pad block. - MachineBasicBlock *ClauseMBB = FuncInfo.MBB; - ClauseMBB->addSuccessor(LPadBB); - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain, - DAG.getBasicBlock(LPadBB))); - return VReg; -} - /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for /// small case ranges). bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index eba98b8086b7..9070091d77b8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -713,8 +713,6 @@ class SelectionDAGBuilder { void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); - unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV, - MachineBasicBlock *LPadMBB); private: // These all get lowered before this pass. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4f031d3ff7e7..ef5452554f72 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -41,7 +40,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -909,8 +907,6 @@ void SelectionDAGISel::DoInstructionSelection() { void SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; - const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); - // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(MBB); @@ -922,66 +918,8 @@ void SelectionDAGISel::PrepareEHLandingPad() { BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); - if (TM.getMCAsmInfo()->getExceptionHandlingType() == - ExceptionHandling::MSVC) { - // Make virtual registers and a series of labels that fill in values for the - // clauses. - auto &RI = MF->getRegInfo(); - FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC); - - // Get all invoke BBs that will unwind into the clause BBs. - SmallVector InvokeBBs(MBB->pred_begin(), - MBB->pred_end()); - - // Emit separate machine basic blocks with separate labels for each clause - // before the main landing pad block. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); - MachineInstrBuilder SelectorPHI = BuildMI( - *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI), - FuncInfo->ExceptionSelectorVirtReg); - for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) { - MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB); - MF->insert(MBB, ClauseBB); - - // Add the edge from the invoke to the clause. - for (MachineBasicBlock *InvokeBB : InvokeBBs) - InvokeBB->addSuccessor(ClauseBB); - - // Mark the clause as a landing pad or MI passes will delete it. - ClauseBB->setIsLandingPad(); - - GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I)); - - // Start the BB with a label. - MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB); - BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II) - .addSym(ClauseLabel); - - // Construct a simple BB that defines a register with the typeid constant. - FuncInfo->MBB = ClauseBB; - FuncInfo->InsertPt = ClauseBB->end(); - unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB); - CurDAG->setRoot(SDB->getRoot()); - SDB->clear(); - CodeGenAndEmitDAG(); - - // Add the typeid virtual register to the phi in the main landing pad. - SelectorPHI.addReg(VReg).addMBB(ClauseBB); - } - - // Remove the edge from the invoke to the lpad. - for (MachineBasicBlock *InvokeBB : InvokeBBs) - InvokeBB->removeSuccessor(MBB); - - // Restore FuncInfo back to its previous state and select the main landing - // pad block. - FuncInfo->MBB = MBB; - FuncInfo->InsertPt = MBB->end(); - return; - } - // Mark exception register as live in. + const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); if (unsigned Reg = TLI->getExceptionPointerRegister()) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); diff --git a/test/CodeGen/X86/seh-basic.ll b/test/CodeGen/X86/seh-basic.ll deleted file mode 100644 index 69d70d70948c..000000000000 --- a/test/CodeGen/X86/seh-basic.ll +++ /dev/null @@ -1,175 +0,0 @@ -; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s - -define void @two_invoke_merged() { -entry: - invoke void @try_body() - to label %again unwind label %lpad - -again: - invoke void @try_body() - to label %done unwind label %lpad - -done: - ret void - -lpad: - %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) - catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*) - catch i8* bitcast (i32 (i8*, i8*)* @filt1 to i8*) - %sel = extractvalue { i8*, i32 } %vals, 1 - call void @use_selector(i32 %sel) - ret void -} - -; Normal path code - -; CHECK-LABEL: {{^}}two_invoke_merged: -; CHECK: .seh_proc two_invoke_merged -; CHECK: .seh_handler __C_specific_handler, @unwind, @except -; CHECK: .Ltmp0: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp1: -; CHECK: .Ltmp2: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp3: -; CHECK: retq - -; Landing pad code - -; CHECK: .Ltmp5: -; CHECK: movl $1, %ecx -; CHECK: jmp -; CHECK: .Ltmp6: -; CHECK: movl $2, %ecx -; CHECK: callq use_selector - -; CHECK: .seh_handlerdata -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp3@IMGREL+1 -; CHECK-NEXT: .long filt0@IMGREL -; CHECK-NEXT: .long .Ltmp5@IMGREL -; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp3@IMGREL+1 -; CHECK-NEXT: .long filt1@IMGREL -; CHECK-NEXT: .long .Ltmp6@IMGREL -; CHECK: .text -; CHECK: .seh_endproc - -define void @two_invoke_gap() { -entry: - invoke void @try_body() - to label %again unwind label %lpad - -again: - call void @do_nothing_on_unwind() - invoke void @try_body() - to label %done unwind label %lpad - -done: - ret void - -lpad: - %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) - catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*) - %sel = extractvalue { i8*, i32 } %vals, 1 - call void @use_selector(i32 %sel) - ret void -} - -; Normal path code - -; CHECK-LABEL: {{^}}two_invoke_gap: -; CHECK: .seh_proc two_invoke_gap -; CHECK: .seh_handler __C_specific_handler, @unwind, @except -; CHECK: .Ltmp11: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp12: -; CHECK: callq do_nothing_on_unwind -; CHECK: .Ltmp13: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp14: -; CHECK: retq - -; Landing pad code - -; CHECK: .Ltmp16: -; CHECK: movl $1, %ecx -; CHECK: callq use_selector - -; CHECK: .seh_handlerdata -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long .Ltmp11@IMGREL -; CHECK-NEXT: .long .Ltmp12@IMGREL+1 -; CHECK-NEXT: .long filt0@IMGREL -; CHECK-NEXT: .long .Ltmp16@IMGREL -; CHECK-NEXT: .long .Ltmp13@IMGREL -; CHECK-NEXT: .long .Ltmp14@IMGREL+1 -; CHECK-NEXT: .long filt0@IMGREL -; CHECK-NEXT: .long .Ltmp16@IMGREL -; CHECK: .text -; CHECK: .seh_endproc - -define void @two_invoke_nounwind_gap() { -entry: - invoke void @try_body() - to label %again unwind label %lpad - -again: - call void @cannot_unwind() - invoke void @try_body() - to label %done unwind label %lpad - -done: - ret void - -lpad: - %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) - catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*) - %sel = extractvalue { i8*, i32 } %vals, 1 - call void @use_selector(i32 %sel) - ret void -} - -; Normal path code - -; CHECK-LABEL: {{^}}two_invoke_nounwind_gap: -; CHECK: .seh_proc two_invoke_nounwind_gap -; CHECK: .seh_handler __C_specific_handler, @unwind, @except -; CHECK: .Ltmp21: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp22: -; CHECK: callq cannot_unwind -; CHECK: .Ltmp23: -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp24: -; CHECK: retq - -; Landing pad code - -; CHECK: .Ltmp26: -; CHECK: movl $1, %ecx -; CHECK: callq use_selector - -; CHECK: .seh_handlerdata -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp21@IMGREL -; CHECK-NEXT: .long .Ltmp24@IMGREL+1 -; CHECK-NEXT: .long filt0@IMGREL -; CHECK-NEXT: .long .Ltmp26@IMGREL -; CHECK: .text -; CHECK: .seh_endproc - -declare void @try_body() -declare void @do_nothing_on_unwind() -declare void @cannot_unwind() nounwind -declare void @use_selector(i32) - -declare i32 @filt0(i8* %eh_info, i8* %rsp) -declare i32 @filt1(i8* %eh_info, i8* %rsp) - -declare void @handler0() -declare void @handler1() - -declare i32 @__C_specific_handler(...) -declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind diff --git a/test/CodeGen/X86/seh-safe-div.ll b/test/CodeGen/X86/seh-safe-div.ll deleted file mode 100644 index e911df04ded4..000000000000 --- a/test/CodeGen/X86/seh-safe-div.ll +++ /dev/null @@ -1,196 +0,0 @@ -; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s - -; This test case is also intended to be run manually as a complete functional -; test. It should link, print something, and exit zero rather than crashing. -; It is the hypothetical lowering of a C source program that looks like: -; -; int safe_div(int *n, int *d) { -; int r; -; __try { -; __try { -; r = *n / *d; -; } __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION) { -; puts("EXCEPTION_ACCESS_VIOLATION"); -; r = -1; -; } -; } __except(GetExceptionCode() == EXCEPTION_INT_DIVIDE_BY_ZERO) { -; puts("EXCEPTION_INT_DIVIDE_BY_ZERO"); -; r = -2; -; } -; return r; -; } - -@str1 = internal constant [27 x i8] c"EXCEPTION_ACCESS_VIOLATION\00" -@str2 = internal constant [29 x i8] c"EXCEPTION_INT_DIVIDE_BY_ZERO\00" - -define i32 @safe_div(i32* %n, i32* %d) { -entry: - %r = alloca i32, align 4 - invoke void @try_body(i32* %r, i32* %n, i32* %d) - to label %__try.cont unwind label %lpad - -lpad: - %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) - catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*) - catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*) - %ehptr = extractvalue { i8*, i32 } %vals, 0 - %sel = extractvalue { i8*, i32 } %vals, 1 - %filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*)) - %is_filt0 = icmp eq i32 %sel, %filt0_val - br i1 %is_filt0, label %handler0, label %eh.dispatch1 - -eh.dispatch1: - %filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*)) - %is_filt1 = icmp eq i32 %sel, %filt1_val - br i1 %is_filt1, label %handler1, label %eh.resume - -handler0: - call void @puts(i8* getelementptr ([27 x i8]* @str1, i32 0, i32 0)) - store i32 -1, i32* %r, align 4 - br label %__try.cont - -handler1: - call void @puts(i8* getelementptr ([29 x i8]* @str2, i32 0, i32 0)) - store i32 -2, i32* %r, align 4 - br label %__try.cont - -eh.resume: - resume { i8*, i32 } %vals - -__try.cont: - %safe_ret = load i32* %r, align 4 - ret i32 %safe_ret -} - -; Normal path code - -; CHECK: {{^}}safe_div: -; CHECK: .seh_proc safe_div -; CHECK: .seh_handler __C_specific_handler, @unwind, @except -; CHECK: .Ltmp0: -; CHECK: leaq [[rloc:.*\(%rsp\)]], %rcx -; CHECK: callq try_body -; CHECK-NEXT: .Ltmp1 -; CHECK: .LBB0_7: -; CHECK: movl [[rloc]], %eax -; CHECK: retq - -; Landing pad code - -; CHECK: .Ltmp3: -; CHECK: movl $1, %[[sel:[a-z]+]] -; CHECK: .Ltmp4 -; CHECK: movl $2, %[[sel]] -; CHECK: .L{{.*}}: -; CHECK: cmpl $1, %[[sel]] - -; CHECK: # %handler0 -; CHECK: callq puts -; CHECK: movl $-1, [[rloc]] -; CHECK: jmp .LBB0_7 - -; CHECK: cmpl $2, %[[sel]] - -; CHECK: # %handler1 -; CHECK: callq puts -; CHECK: movl $-2, [[rloc]] -; CHECK: jmp .LBB0_7 - -; FIXME: EH preparation should not call _Unwind_Resume. -; CHECK: callq _Unwind_Resume -; CHECK: ud2 - -; CHECK: .seh_handlerdata -; CHECK: .long 2 -; CHECK: .long .Ltmp0@IMGREL -; CHECK: .long .Ltmp1@IMGREL+1 -; CHECK: .long safe_div_filt0@IMGREL -; CHECK: .long .Ltmp3@IMGREL -; CHECK: .long .Ltmp0@IMGREL -; CHECK: .long .Ltmp1@IMGREL+1 -; CHECK: .long safe_div_filt1@IMGREL -; CHECK: .long .Ltmp4@IMGREL -; CHECK: .text -; CHECK: .seh_endproc - - -define void @try_body(i32* %r, i32* %n, i32* %d) { -entry: - %0 = load i32* %n, align 4 - %1 = load i32* %d, align 4 - %div = sdiv i32 %0, %1 - store i32 %div, i32* %r, align 4 - ret void -} - -; The prototype of these filter functions is: -; int filter(EXCEPTION_POINTERS *eh_ptrs, void *rbp); - -; The definition of EXCEPTION_POINTERS is: -; typedef struct _EXCEPTION_POINTERS { -; EXCEPTION_RECORD *ExceptionRecord; -; CONTEXT *ContextRecord; -; } EXCEPTION_POINTERS; - -; The definition of EXCEPTION_RECORD is: -; typedef struct _EXCEPTION_RECORD { -; DWORD ExceptionCode; -; ... -; } EXCEPTION_RECORD; - -; The exception code can be retreived with two loads, one for the record -; pointer and one for the code. The values of local variables can be -; accessed via rbp, but that would require additional not yet implemented LLVM -; support. - -define i32 @safe_div_filt0(i8* %eh_ptrs, i8* %rbp) { - %eh_ptrs_c = bitcast i8* %eh_ptrs to i32** - %eh_rec = load i32** %eh_ptrs_c - %eh_code = load i32* %eh_rec - ; EXCEPTION_ACCESS_VIOLATION = 0xC0000005 - %cmp = icmp eq i32 %eh_code, 3221225477 - %filt.res = zext i1 %cmp to i32 - ret i32 %filt.res -} - -define i32 @safe_div_filt1(i8* %eh_ptrs, i8* %rbp) { - %eh_ptrs_c = bitcast i8* %eh_ptrs to i32** - %eh_rec = load i32** %eh_ptrs_c - %eh_code = load i32* %eh_rec - ; EXCEPTION_INT_DIVIDE_BY_ZERO = 0xC0000094 - %cmp = icmp eq i32 %eh_code, 3221225620 - %filt.res = zext i1 %cmp to i32 - ret i32 %filt.res -} - -@str_result = internal constant [21 x i8] c"safe_div result: %d\0A\00" - -define i32 @main() { - %d.addr = alloca i32, align 4 - %n.addr = alloca i32, align 4 - - store i32 10, i32* %n.addr, align 4 - store i32 2, i32* %d.addr, align 4 - %r1 = call i32 @safe_div(i32* %n.addr, i32* %d.addr) - call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r1) - - store i32 10, i32* %n.addr, align 4 - store i32 0, i32* %d.addr, align 4 - %r2 = call i32 @safe_div(i32* %n.addr, i32* %d.addr) - call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r2) - - %r3 = call i32 @safe_div(i32* %n.addr, i32* null) - call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r3) - ret i32 0 -} - -define void @_Unwind_Resume() { - call void @abort() - unreachable -} - -declare i32 @__C_specific_handler(...) -declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind -declare void @puts(i8*) -declare void @printf(i8*, ...) -declare void @abort() From 2d92d99da80052dbb65a6b4e7ed7d39c139379fc Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 12 Feb 2015 17:51:17 +0000 Subject: [PATCH 06/17] Merging r228899: ------------------------------------------------------------------------ r228899 | chandlerc | 2015-02-11 18:30:56 -0800 (Wed, 11 Feb 2015) | 28 lines [slp] Fix a nasty bug in the SLP vectorizer that Joerg pointed out. Apparently some code finally started to tickle this after my canonicalization changes to instcombine. The bug stems from trying to form a vector type out of scalars that aren't compatible at all. In this example, from x86_mmx values. The code in the vectorizer that checks for reasonable types whas checking for aggregates or vectors, but there are lots of other types that should just never reach the vectorizer. Debugging this was made more confusing by the lie in an assert in VectorType::get() -- it isn't that the types are *primitive*. The types must be integer, pointer, or floating point types. No other types are allowed. I've improved the assert and added a helper to the vectorizer to handle the element type validity checks. It now re-uses the VectorType static function and then further excludes weird target-specific types that we probably shouldn't be touching here (x86_fp80 and ppc_fp128). Neither of these are really reachable anyways (neither 80-bit nor 128-bit things will get vectorized) but it seems better to just eagerly exclude such nonesense. I've added a test case, but while it definitely covers two of the paths through this code there may be more paths that would benefit from test coverage. I'm not familiar enough with the SLP vectorizer to synthesize test cases for all of these, but was able to update the code itself by inspection. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228940 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Type.cpp | 7 +-- lib/Transforms/Vectorize/SLPVectorizer.cpp | 20 ++++++-- .../Transforms/SLPVectorizer/X86/bad_types.ll | 50 +++++++++++++++++++ 3 files changed, 70 insertions(+), 7 deletions(-) create mode 100644 test/Transforms/SLPVectorizer/X86/bad_types.ll diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index 889705e95fc2..65060dc39d27 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -708,9 +708,10 @@ VectorType::VectorType(Type *ElType, unsigned NumEl) VectorType *VectorType::get(Type *elementType, unsigned NumElements) { Type *ElementType = const_cast(elementType); assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0"); - assert(isValidElementType(ElementType) && - "Elements of a VectorType must be a primitive type"); - + assert(isValidElementType(ElementType) && "Element type of a VectorType must " + "be an integer, floating point, or " + "pointer type."); + LLVMContextImpl *pImpl = ElementType->getContext().pImpl; VectorType *&Entry = ElementType->getContext().pImpl ->VectorTypes[std::make_pair(ElementType, NumElements)]; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2213031ecdb9..bd8a4b3fd3d0 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -75,6 +75,18 @@ static const unsigned MinVecRegSize = 128; static const unsigned RecursionMaxDepth = 12; +/// \brief Predicate for the element types that the SLP vectorizer supports. +/// +/// The most important thing to filter here are types which are invalid in LLVM +/// vectors. We also filter target specific types which have absolutely no +/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just +/// avoids spending time checking the cost model and realizing that they will +/// be inevitably scalarized. +static bool isValidElementType(Type *Ty) { + return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() && + !Ty->isPPC_FP128Ty(); +} + /// \returns the parent basic block if all of the instructions in \p VL /// are in the same block or null otherwise. static BasicBlock *getSameBlock(ArrayRef VL) { @@ -1216,7 +1228,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { Type *SrcTy = VL0->getOperand(0)->getType(); for (unsigned i = 0; i < VL.size(); ++i) { Type *Ty = cast(VL[i])->getOperand(0)->getType(); - if (Ty != SrcTy || Ty->isAggregateType() || Ty->isVectorTy()) { + if (Ty != SrcTy || !isValidElementType(Ty)) { BS.cancelScheduling(VL); newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); @@ -3130,7 +3142,7 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { // Check that the pointer points to scalars. Type *Ty = SI->getValueOperand()->getType(); - if (Ty->isAggregateType() || Ty->isVectorTy()) + if (!isValidElementType(Ty)) continue; // Find the base pointer. @@ -3171,7 +3183,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, for (int i = 0, e = VL.size(); i < e; ++i) { Type *Ty = VL[i]->getType(); - if (Ty->isAggregateType() || Ty->isVectorTy()) + if (!isValidElementType(Ty)) return false; Instruction *Inst = dyn_cast(VL[i]); if (!Inst || Inst->getOpcode() != Opcode0) @@ -3391,7 +3403,7 @@ class HorizontalReduction { return false; Type *Ty = B->getType(); - if (Ty->isVectorTy()) + if (!isValidElementType(Ty)) return false; ReductionOpcode = B->getOpcode(); diff --git a/test/Transforms/SLPVectorizer/X86/bad_types.ll b/test/Transforms/SLPVectorizer/X86/bad_types.ll new file mode 100644 index 000000000000..38ed18dad2ac --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/bad_types.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) { +; Ensure we can handle x86_mmx values which are primitive and can be bitcast +; with integer types but can't be put into a vector. +; +; CHECK-LABEL: @test1 +; CHECK: store i64 +; CHECK: store i64 +; CHECK: ret void +entry: + %a.cast = bitcast x86_mmx %a to i64 + %b.cast = bitcast x86_mmx %b to i64 + %a.and = and i64 %a.cast, 42 + %b.and = and i64 %b.cast, 42 + %gep = getelementptr i64* %ptr, i32 1 + store i64 %a.and, i64* %ptr + store i64 %b.and, i64* %gep + ret void +} + +define void @test2(x86_mmx %a, x86_mmx %b) { +; Same as @test1 but using phi-input vectorization instead of store +; vectorization. +; +; CHECK-LABEL: @test2 +; CHECK: and i64 +; CHECK: and i64 +; CHECK: ret void +entry: + br i1 undef, label %if.then, label %exit + +if.then: + %a.cast = bitcast x86_mmx %a to i64 + %b.cast = bitcast x86_mmx %b to i64 + %a.and = and i64 %a.cast, 42 + %b.and = and i64 %b.cast, 42 + br label %exit + +exit: + %a.phi = phi i64 [ 0, %entry ], [ %a.and, %if.then ] + %b.phi = phi i64 [ 0, %entry ], [ %b.and, %if.then ] + tail call void @f(i64 %a.phi, i64 %b.phi) + ret void +} + +declare void @f(i64, i64) From b36941f86a33c25ac24aac1586456a03d2e507da Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 12 Feb 2015 17:53:49 +0000 Subject: [PATCH 07/17] Merging r228842: ------------------------------------------------------------------------ r228842 | jvoung | 2015-02-11 08:12:50 -0800 (Wed, 11 Feb 2015) | 17 lines Gold-plugin: Broaden scope of get/release_input_file to scope of Module. Summary: Move calls to get_input_file and release_input_file out of getModuleForFile(). Otherwise release_input_file may end up unmapping a view of the file while the view is still being used by the Module (on 32-bit hosts). Fix for PR22482. Test Plan: Add test using --no-map-whole-files. Reviewers: rafael, nlewycky Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D7539 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228942 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/tools/gold/no-map-whole-file.ll | 9 +++++++++ tools/gold/gold-plugin.cpp | 19 ++++++++++--------- 2 files changed, 19 insertions(+), 9 deletions(-) create mode 100644 test/tools/gold/no-map-whole-file.ll diff --git a/test/tools/gold/no-map-whole-file.ll b/test/tools/gold/no-map-whole-file.ll new file mode 100644 index 000000000000..21a0c46d28b0 --- /dev/null +++ b/test/tools/gold/no-map-whole-file.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as -o %t.bc %s +; RUN: ld -plugin %llvmshlibdir/LLVMgold.so -plugin-opt=emit-llvm \ +; RUN: --no-map-whole-files -r -o %t2.bc %t.bc +; RUN: llvm-dis < %t2.bc -o - | FileCheck %s + +; CHECK: main +define i32 @main() { + ret i32 0 +} diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index 5524bb9922c5..a9909a721c1b 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -559,11 +559,9 @@ static void freeSymName(ld_plugin_symbol &Sym) { } static std::unique_ptr -getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile, +getModuleForFile(LLVMContext &Context, claimed_file &F, + off_t Filesize, raw_fd_ostream *ApiFile, StringSet<> &Internalize, StringSet<> &Maybe) { - ld_plugin_input_file File; - if (get_input_file(F.handle, &File) != LDPS_OK) - message(LDPL_FATAL, "Failed to get file information"); if (get_symbols(F.handle, F.syms.size(), &F.syms[0]) != LDPS_OK) message(LDPL_FATAL, "Failed to get symbol information"); @@ -572,7 +570,7 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile, if (get_view(F.handle, &View) != LDPS_OK) message(LDPL_FATAL, "Failed to get a view of file"); - MemoryBufferRef BufferRef(StringRef((const char *)View, File.filesize), ""); + MemoryBufferRef BufferRef(StringRef((const char *)View, Filesize), ""); ErrorOr> ObjOrErr = object::IRObjectFile::create(BufferRef, Context); @@ -580,9 +578,6 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile, message(LDPL_FATAL, "Could not read bitcode from file : %s", EC.message().c_str()); - if (release_input_file(F.handle) != LDPS_OK) - message(LDPL_FATAL, "Failed to release file information"); - object::IRObjectFile &Obj = **ObjOrErr; Module &M = Obj.getModule(); @@ -798,8 +793,12 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) { StringSet<> Internalize; StringSet<> Maybe; for (claimed_file &F : Modules) { + ld_plugin_input_file File; + if (get_input_file(F.handle, &File) != LDPS_OK) + message(LDPL_FATAL, "Failed to get file information"); std::unique_ptr M = - getModuleForFile(Context, F, ApiFile, Internalize, Maybe); + getModuleForFile(Context, F, File.filesize, ApiFile, + Internalize, Maybe); if (!options::triple.empty()) M->setTargetTriple(options::triple.c_str()); else if (M->getTargetTriple().empty()) { @@ -808,6 +807,8 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) { if (L.linkInModule(M.get())) message(LDPL_FATAL, "Failed to link module"); + if (release_input_file(F.handle) != LDPS_OK) + message(LDPL_FATAL, "Failed to release file information"); } for (const auto &Name : Internalize) { From e13563e5ba51fb89cc41e88acecf7ad96543be05 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 12 Feb 2015 21:28:02 +0000 Subject: [PATCH 08/17] Merging r228957: ------------------------------------------------------------------------ r228957 | bsteinbr | 2015-02-12 13:04:22 -0800 (Thu, 12 Feb 2015) | 14 lines Fix a crash in the assumption cache when inlining indirect function calls Summary: Instances of the AssumptionCache are per function, so we can't re-use the same AssumptionCache instance when recursing in the CallAnalyzer to analyze a different function. Instead we have to pass the AssumptionCacheTracker to the CallAnalyzer so it can get the right AssumptionCache on demand. Reviewers: hfinkel Subscribers: llvm-commits, hans Differential Revision: http://reviews.llvm.org/D7533 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228965 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/IPA/InlineCost.cpp | 12 ++++++------ test/Transforms/Inline/inline-indirect.ll | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 test/Transforms/Inline/inline-indirect.ll diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 58ac5d33409e..86e7fc21b950 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -52,7 +52,7 @@ class CallAnalyzer : public InstVisitor { const TargetTransformInfo &TTI; /// The cache of @llvm.assume intrinsics. - AssumptionCache &AC; + AssumptionCacheTracker *ACT; // The called function. Function &F; @@ -146,8 +146,8 @@ class CallAnalyzer : public InstVisitor { public: CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI, - AssumptionCache &AC, Function &Callee, int Threshold) - : DL(DL), TTI(TTI), AC(AC), F(Callee), Threshold(Threshold), Cost(0), + AssumptionCacheTracker *ACT, Function &Callee, int Threshold) + : DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), @@ -783,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(DL, TTI, AC, *F, InlineConstants::IndirectCallThreshold); + CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -1110,7 +1110,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // the ephemeral values multiple times (and they're completely determined by // the callee, so this is purely duplicate work). SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(&F, &AC, EphValues); + CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues); // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this @@ -1310,7 +1310,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, << "...\n"); CallAnalyzer CA(Callee->getDataLayout(), *TTI, - ACT->getAssumptionCache(*Callee), *Callee, Threshold); + ACT, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); diff --git a/test/Transforms/Inline/inline-indirect.ll b/test/Transforms/Inline/inline-indirect.ll new file mode 100644 index 000000000000..f6eb528e0650 --- /dev/null +++ b/test/Transforms/Inline/inline-indirect.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -inline -disable-output 2>/dev/null +; This test used to trigger an assertion in the assumption cache when +; inlining the indirect call +declare void @llvm.assume(i1) + +define void @foo() { + ret void +} + +define void @bar(void ()*) { + call void @llvm.assume(i1 true) + call void %0(); + ret void +} + +define void @baz() { + call void @bar(void ()* @foo) + ret void +} From a6c8e652209ad69b25ac24f68e0c13912da3d226 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 12 Feb 2015 23:42:03 +0000 Subject: [PATCH 09/17] Merging r228979: ------------------------------------------------------------------------ r228979 | majnemer | 2015-02-12 15:26:26 -0800 (Thu, 12 Feb 2015) | 8 lines X86: Don't crash if we can't decode the pshufb mask Constant pool entries are uniqued by their contents regardless of their type. This means that a pshufb can have a shuffle mask which isn't a simple array of bytes. The code path which attempts to decode the mask didn't check for failure, causing PR22559. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228983 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 ++ test/CodeGen/X86/pshufb-mask-comments.ll | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a1fd34ea8000..78a11e64fef0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5473,6 +5473,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, if (auto *C = dyn_cast(MaskCP->getConstVal())) { DecodePSHUFBMask(C, Mask); + if (Mask.empty()) + return false; break; } diff --git a/test/CodeGen/X86/pshufb-mask-comments.ll b/test/CodeGen/X86/pshufb-mask-comments.ll index 303c4a684761..ca5a02ce8d3a 100644 --- a/test/CodeGen/X86/pshufb-mask-comments.ll +++ b/test/CodeGen/X86/pshufb-mask-comments.ll @@ -37,4 +37,16 @@ define <16 x i8> @test4(<2 x i64>* %V) { ret <16 x i8> %1 } +define <16 x i8> @test5() { +; CHECK-LABEL: test5 +; CHECK: pshufb {{.*}} + store <2 x i64> , <2 x i64>* undef, align 16 + %l = load <2 x i64>* undef, align 16 + %shuffle = shufflevector <2 x i64> %l, <2 x i64> undef, <2 x i32> zeroinitializer + store <2 x i64> %shuffle, <2 x i64>* undef, align 16 + %1 = load <16 x i8>* undef, align 16 + %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> undef, <16 x i8> %1) + ret <16 x i8> %2 +} + declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone From 488379ab3fc857228bd425f3f69836615bdd8a05 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 12 Feb 2015 23:45:01 +0000 Subject: [PATCH 10/17] Merging r226588: ------------------------------------------------------------------------ r226588 | adrian | 2015-01-20 10:03:37 -0800 (Tue, 20 Jan 2015) | 1 line Add an assertion and prefer a crash over an infinite loop. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228984 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DebugInfo.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 290dbe29c707..443db5eec468 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -525,12 +525,15 @@ bool DISubprogram::Verify() const { while ((IA = DL.getInlinedAt())) DL = DebugLoc::getFromDILocation(IA); DL.getScopeAndInlinedAt(Scope, IA); + assert(Scope && "debug location has no scope"); assert(!IA); while (!DIDescriptor(Scope).isSubprogram()) { DILexicalBlockFile D(Scope); Scope = D.isLexicalBlockFile() ? D.getScope() : DebugLoc::getFromDILexicalBlock(Scope).getScope(); + if (!Scope) + llvm_unreachable("lexical block file has no scope"); } if (!DISubprogram(Scope).describes(F)) return false; From 22606ee99b4e0484d81bf125a768f34f225c58b8 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 12 Feb 2015 23:46:59 +0000 Subject: [PATCH 11/17] Merging r226616: ------------------------------------------------------------------------ r226616 | adrian | 2015-01-20 14:37:25 -0800 (Tue, 20 Jan 2015) | 2 lines DebugLocs without a scope should fail the verification. Follow-up to r226588. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228985 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/DebugInfo.cpp | 6 +++--- test/DebugInfo/location-verifier.ll | 33 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 test/DebugInfo/location-verifier.ll diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 443db5eec468..71b43942e882 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -525,15 +525,15 @@ bool DISubprogram::Verify() const { while ((IA = DL.getInlinedAt())) DL = DebugLoc::getFromDILocation(IA); DL.getScopeAndInlinedAt(Scope, IA); - assert(Scope && "debug location has no scope"); + if (!Scope) + return false; assert(!IA); while (!DIDescriptor(Scope).isSubprogram()) { DILexicalBlockFile D(Scope); Scope = D.isLexicalBlockFile() ? D.getScope() : DebugLoc::getFromDILexicalBlock(Scope).getScope(); - if (!Scope) - llvm_unreachable("lexical block file has no scope"); + assert(Scope && "lexical block file has no scope"); } if (!DISubprogram(Scope).describes(F)) return false; diff --git a/test/DebugInfo/location-verifier.ll b/test/DebugInfo/location-verifier.ll new file mode 100644 index 000000000000..0e56be42e1f9 --- /dev/null +++ b/test/DebugInfo/location-verifier.ll @@ -0,0 +1,33 @@ +; RUN: not llvm-as -disable-output -verify-debug-info < %s 2>&1 | FileCheck %s +; ModuleID = 'test.c' +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; Function Attrs: nounwind ssp uwtable +define i32 @foo() #0 { +entry: + ret i32 42, !dbg !13 +} + +attributes #0 = { nounwind ssp uwtable } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10, !11} +!llvm.ident = !{!12} + +!0 = !{!"0x11\0012\00clang version 3.7.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/test.c] [DW_LANG_C99] +!1 = !{!"test.c", !""} +!2 = !{} +!3 = !{!4} +!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\000\000\000\001", !1, !5, !6, null, i32 ()* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo] +!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/test.c] +!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = !{!8} +!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = !{i32 2, !"Dwarf Version", i32 2} +!10 = !{i32 2, !"Debug Info Version", i32 2} +!11 = !{i32 1, !"PIC Level", i32 2} +!12 = !{!"clang version 3.7.0 "} +; An old-style MDLocation should not pass verify. +; CHECK: DISubprogram does not Verify +!13 = !{i32 2, i32 2, !4, null} From ae64fdfc2ae931b84a423b1515fccd0feea80245 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 12 Feb 2015 23:51:24 +0000 Subject: [PATCH 12/17] Merging r228969: ------------------------------------------------------------------------ r228969 | hfinkel | 2015-02-12 14:43:52 -0800 (Thu, 12 Feb 2015) | 7 lines [SDAG] Don't try to use FP_EXTEND/FP_ROUND for int<->fp promotions The PowerPC backend has long promoted some floating-point vector operations (such as select) to integer vector operations. Unfortunately, this behavior was broken by r216555. When using FP_EXTEND/FP_ROUND for promotions, we must check that both the old and new types are floating-point types. Otherwise, we must use BITCAST as we did prior to r216555 for everything. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228986 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/LegalizeVectorOps.cpp | 8 ++++--- test/CodeGen/PowerPC/vsel-prom.ll | 23 +++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/PowerPC/vsel-prom.ll diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 11e6b38f0762..3a8c276e2618 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -390,7 +390,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) { if (Op.getOperand(j) .getValueType() .getVectorElementType() - .isFloatingPoint()) + .isFloatingPoint() && + NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); else Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); @@ -399,8 +400,9 @@ SDValue VectorLegalizer::Promote(SDValue Op) { } Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); - if (VT.isFloatingPoint() || - (VT.isVector() && VT.getVectorElementType().isFloatingPoint())) + if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || + (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && + NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0)); else return DAG.getNode(ISD::BITCAST, dl, VT, Op); diff --git a/test/CodeGen/PowerPC/vsel-prom.ll b/test/CodeGen/PowerPC/vsel-prom.ll new file mode 100644 index 000000000000..dd219ec0da6f --- /dev/null +++ b/test/CodeGen/PowerPC/vsel-prom.ll @@ -0,0 +1,23 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @Compute_Lateral() #0 { +entry: + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %entry + unreachable + +if.end: ; preds = %entry + %0 = select i1 undef, <2 x double> undef, <2 x double> zeroinitializer + %1 = extractelement <2 x double> %0, i32 1 + store double %1, double* undef, align 8 + ret void + +; CHECK-LABEL: @Compute_Lateral +} + +attributes #0 = { nounwind } + From 0b37b7bf52b92f9e1e7ffaff4063fd1184d2eec3 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 13 Feb 2015 03:19:15 +0000 Subject: [PATCH 13/17] Merging r229029, minus the test which didn't work on the branch: ------------------------------------------------------------------------ r229029 | chandlerc | 2015-02-12 18:30:01 -0800 (Thu, 12 Feb 2015) | 16 lines [IC] Fix a bug with the instcombine canonicalizing of loads and propagating of metadata. We were propagating !nonnull metadata even when the newly formed load is no longer of a pointer type. This is clearly broken and results in LLVM failing the verifier and aborting. This patch just restricts the propagation of !nonnull metadata to when we actually have a pointer type. This bug report and the initial version of this patch was provided by Charles Davis! Many thanks for finding this! We still need to add logic to round-trip the metadata correctly if we combine from pointer types to integer types and then back by using range metadata for the integer type loads. But this is the minimal and safe version of the patch, which is important so we can backport it into 3.6. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@229036 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 11 +++++++++-- test/Transforms/InstCombine/loadstore-metadata.ll | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index af1694d3453c..6230c000cf57 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -330,11 +330,17 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT case LLVMContext::MD_noalias: case LLVMContext::MD_nontemporal: case LLVMContext::MD_mem_parallel_loop_access: - case LLVMContext::MD_nonnull: // All of these directly apply. NewLoad->setMetadata(ID, N); break; + case LLVMContext::MD_nonnull: + // FIXME: We should translate this into range metadata for integer types + // and vice versa. + if (NewTy->isPointerTy()) + NewLoad->setMetadata(ID, N); + break; + case LLVMContext::MD_range: // FIXME: It would be nice to propagate this in some way, but the type // conversions make it hard. @@ -548,13 +554,14 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { case LLVMContext::MD_noalias: case LLVMContext::MD_nontemporal: case LLVMContext::MD_mem_parallel_loop_access: - case LLVMContext::MD_nonnull: // All of these directly apply. NewStore->setMetadata(ID, N); break; case LLVMContext::MD_invariant_load: + case LLVMContext::MD_nonnull: case LLVMContext::MD_range: + // These don't apply for stores. break; } } diff --git a/test/Transforms/InstCombine/loadstore-metadata.ll b/test/Transforms/InstCombine/loadstore-metadata.ll index ad6a11cf6eb1..3d18ac0e3344 100644 --- a/test/Transforms/InstCombine/loadstore-metadata.ll +++ b/test/Transforms/InstCombine/loadstore-metadata.ll @@ -1,5 +1,7 @@ ; RUN: opt -instcombine -S < %s | FileCheck %s +target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128" + define i32 @test_load_cast_combine_tbaa(float* %ptr) { ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA. ; CHECK-LABEL: @test_load_cast_combine_tbaa( From 1ee84b8db2f4c1a1cc7423a17a3157474260c81e Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 25 Jun 2014 20:33:49 -0700 Subject: [PATCH 14/17] Add a NullCheckElimination pass This pass is not Rust-specific, in that all of its transformations are intended to be correct for arbitrary LLVM IR, but it targets idioms found in IR generated by `rustc`, e.g. heavy use of `inbounds` GEPs. --- include/llvm/InitializePasses.h | 3 + include/llvm/LinkAllPasses.h | 3 + include/llvm/Transforms/Scalar.h | 7 + lib/Transforms/Scalar/CMakeLists.txt | 1 + .../Scalar/NullCheckElimination.cpp | 273 ++++++++++++++++++ lib/Transforms/Scalar/Scalar.cpp | 1 + test/Transforms/NullCheckElimination/basic.ll | 165 +++++++++++ 7 files changed, 453 insertions(+) create mode 100644 lib/Transforms/Scalar/NullCheckElimination.cpp create mode 100644 test/Transforms/NullCheckElimination/basic.ll diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 30280033ee20..b9ea8489aeb2 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -287,6 +287,9 @@ void initializeStackMapLivenessPass(PassRegistry&); void initializeMachineCombinerPass(PassRegistry &); void initializeLoadCombinePass(PassRegistry&); void initializeRewriteSymbolsPass(PassRegistry&); + +// Specific to the rust-lang llvm branch: +void initializeNullCheckEliminationPass(PassRegistry&); } #endif diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 2e8feab6d29d..1764652e37e7 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -167,6 +167,9 @@ namespace { (void) llvm::createSeparateConstOffsetFromGEPPass(); (void) llvm::createRewriteSymbolsPass(); + // Specific to the rust-lang llvm branch: + (void) llvm::createNullCheckEliminationPass(); + (void)new llvm::IntervalPartition(); (void)new llvm::ScalarEvolution(); ((llvm::Function*)nullptr)->viewCFGOnly(); diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 5dcd89948759..12fd59b57919 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -405,6 +405,13 @@ createSeparateConstOffsetFromGEPPass(const TargetMachine *TM = nullptr, // BasicBlockPass *createLoadCombinePass(); +// Specific to the rust-lang llvm branch: +//===----------------------------------------------------------------------===// +// +// NullCheckElimination - Eliminate null checks. +// +FunctionPass *createNullCheckEliminationPass(); + } // End llvm namespace #endif diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index b3ee11ed67cd..4218a5bbbbf8 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -24,6 +24,7 @@ add_llvm_library(LLVMScalarOpts LowerAtomic.cpp MemCpyOptimizer.cpp MergedLoadStoreMotion.cpp + NullCheckElimination.cpp PartiallyInlineLibCalls.cpp Reassociate.cpp Reg2Mem.cpp diff --git a/lib/Transforms/Scalar/NullCheckElimination.cpp b/lib/Transforms/Scalar/NullCheckElimination.cpp new file mode 100644 index 000000000000..4f04bd65c204 --- /dev/null +++ b/lib/Transforms/Scalar/NullCheckElimination.cpp @@ -0,0 +1,273 @@ +//===-- NullCheckElimination.cpp - Null Check Elimination Pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +using namespace llvm; + +#define DEBUG_TYPE "null-check-elimination" + +namespace { + struct NullCheckElimination : public FunctionPass { + static char ID; + NullCheckElimination() : FunctionPass(ID) { + initializeNullCheckEliminationPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + + private: + static const unsigned kPhiLimit = 16; + typedef SmallPtrSet SmallPhiSet; + enum NullCheckResult { + NotNullCheck, + NullCheckEq, + NullCheckNe, + }; + + bool isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis, PHINode*); + + NullCheckResult isCmpNullCheck(ICmpInst*); + std::pair findNullCheck(Use*); + + bool blockContainsLoadDerivedFrom(BasicBlock*, Value*); + + DenseSet NonNullOrPoisonValues; + }; +} + +char NullCheckElimination::ID = 0; +INITIALIZE_PASS_BEGIN(NullCheckElimination, + "null-check-elimination", + "Null Check Elimination", + false, false) +INITIALIZE_PASS_END(NullCheckElimination, + "null-check-elimination", + "Null Check Elimination", + false, false) + +FunctionPass *llvm::createNullCheckEliminationPass() { + return new NullCheckElimination(); +} + +bool NullCheckElimination::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; + + bool Changed = false; + + // Collect argumetns with the `nonnull` attribute. + for (auto &Arg : F.args()) { + if (Arg.hasNonNullAttr()) + NonNullOrPoisonValues.insert(&Arg); + } + + // Collect instructions that definitely produce nonnull-or-poison values. + // At the moment, this is restricted to inbounds GEPs. It would be slightly + // more difficult to include uses of values dominated by a null check, since + // then we would have to consider uses instead of mere values. + for (auto &BB : F) { + for (auto &I : BB) { + if (auto *GEP = dyn_cast(&I)) { + if (GEP->isInBounds()) { + NonNullOrPoisonValues.insert(GEP); + } + } + } + } + + // Find phis that are derived entirely from nonnull-or-poison values, + // including other phis that are themselves derived entirely from these + // values. + for (auto &BB : F) { + for (auto &I : BB) { + auto *PN = dyn_cast(&I); + if (!PN) + break; + + SmallPhiSet VisitedPHIs; + if (isNonNullOrPoisonPhi(&VisitedPHIs, PN)) + NonNullOrPoisonValues.insert(PN); + } + } + + for (auto &BB : F) { + // This could also be extended to handle SwitchInst, but using a SwitchInst + // for a null check seems unlikely. + auto *BI = dyn_cast(BB.getTerminator()); + if (!BI || BI->isUnconditional()) + continue; + + // The first operand of a conditional branch is the condition. + auto result = findNullCheck(&BI->getOperandUse(0)); + if (!result.first) + continue; + assert((result.second == NullCheckEq || result.second == NullCheckNe) && + "valid null check kind expected if ICmpInst was found"); + + BasicBlock *NonNullBB; + if (result.second == NullCheckEq) { + // If the comparison instruction is checking for equaliity with null, + // then the pointer is nonnull on the `false` branch. + NonNullBB = BI->getSuccessor(1); + } else { + // Otherwise, if the comparison instruction is checking for inequality + // with null, the pointer is nonnull on the `true` branch. + NonNullBB = BI->getSuccessor(0); + } + + Use *U = result.first; + ICmpInst *CI = cast(U->get()); + unsigned nonConstantIndex; + if (isa(CI->getOperand(0))) + nonConstantIndex = 1; + else + nonConstantIndex = 0; + + // Due to the semantics of poison values in LLVM, we have to check that + // there is actually some externally visible side effect that is dependent + // on the poison value. Since poison values are otherwise treated as undef, + // and a load of undef is undefined behavior (which is externally visible), + // it suffices to look for a load of the nonnull-or-poison value. + // + // This could be extended to any block control-dependent on this branch of + // the null check, it's unclear if that will actually catch more cases in + // real code. + Value *PtrV = CI->getOperand(nonConstantIndex); + if (blockContainsLoadDerivedFrom(NonNullBB, PtrV)) { + Type *BoolTy = CI->getType(); + Value *NewV = ConstantInt::get(BoolTy, result.second == NullCheckNe); + U->set(NewV); + } + } + + NonNullOrPoisonValues.clear(); + + return Changed; +} + +/// Checks whether a phi is derived from known nonnnull-or-poison values, +/// including other phis that are derived from the same. May return `false` +/// conservatively in some cases, e.g. if exploring a large cycle of phis. +bool +NullCheckElimination::isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis, + PHINode *PN) { + // If we've already seen this phi, return `true`, even though it may not be + // nonnull, since some other operand in a cycle of phis may invalidate the + // optimistic assumption that the entire cycle is nonnull, including this phi. + if (!VisitedPhis->insert(PN).second) + return true; + + // Use a sensible limit to avoid iterating over long chains of phis that are + // unlikely to be nonnull. + if (VisitedPhis->size() >= kPhiLimit) + return false; + + unsigned numOperands = PN->getNumOperands(); + for (unsigned i = 0; i < numOperands; ++i) { + Value *SrcValue = PN->getOperand(i); + if (NonNullOrPoisonValues.count(SrcValue)) { + continue; + } else if (auto *SrcPN = dyn_cast(SrcValue)) { + if (!isNonNullOrPoisonPhi(VisitedPhis, SrcPN)) + return false; + } else { + return false; + } + } + + return true; +} + +/// Determines whether an ICmpInst is a null check of a known nonnull-or-poison +/// value. +NullCheckElimination::NullCheckResult +NullCheckElimination::isCmpNullCheck(ICmpInst *CI) { + if (!CI->isEquality()) + return NotNullCheck; + + unsigned constantIndex; + if (NonNullOrPoisonValues.count(CI->getOperand(0))) + constantIndex = 1; + else if (NonNullOrPoisonValues.count(CI->getOperand(1))) + constantIndex = 0; + else + return NotNullCheck; + + auto *C = dyn_cast(CI->getOperand(constantIndex)); + if (!C || !C->isZeroValue()) + return NotNullCheck; + + return + CI->getPredicate() == llvm::CmpInst::ICMP_EQ ? NullCheckEq : NullCheckNe; +} + +/// Finds the Use, if any, of an ICmpInst null check of a nonnull-or-poison +/// value. +std::pair +NullCheckElimination::findNullCheck(Use *U) { + auto *I = dyn_cast(U->get()); + if (!I) + return std::make_pair(nullptr, NotNullCheck); + + if (auto *CI = dyn_cast(I)) { + NullCheckResult result = isCmpNullCheck(CI); + if (result == NotNullCheck) + return std::make_pair(nullptr, NotNullCheck); + else + return std::make_pair(U, result); + } + + unsigned opcode = I->getOpcode(); + if (opcode == Instruction::Or || opcode == Instruction::And) { + auto result = findNullCheck(&I->getOperandUse(0)); + if (result.second == NotNullCheck) + return findNullCheck(&I->getOperandUse(1)); + else + return result; + } + + return std::make_pair(nullptr, NotNullCheck); +} + +/// Determines whether `BB` contains a load from `PtrV`, or any inbounds GEP +/// derived from `PtrV`. +bool +NullCheckElimination::blockContainsLoadDerivedFrom(BasicBlock *BB, + Value *PtrV) { + for (auto &I : *BB) { + auto *LI = dyn_cast(&I); + if (!LI) + continue; + + Value *V = LI->getPointerOperand(); + while (NonNullOrPoisonValues.count(V)) { + if (V == PtrV) + return true; + + auto *GEP = dyn_cast(V); + if (!GEP) + break; + + V = GEP->getOperand(0); + } + } + + return false; +} + diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index a16e9e29a1f1..8dcd6720597d 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -69,6 +69,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); initializeLoadCombinePass(Registry); + initializeNullCheckEliminationPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { diff --git a/test/Transforms/NullCheckElimination/basic.ll b/test/Transforms/NullCheckElimination/basic.ll new file mode 100644 index 000000000000..f176fb32c3fd --- /dev/null +++ b/test/Transforms/NullCheckElimination/basic.ll @@ -0,0 +1,165 @@ +; RUN: opt < %s -null-check-elimination -instsimplify -S | FileCheck %s + +define i64 @test_arg_simple(i64* nonnull %p) { +entry: + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ] + %b0 = icmp eq i64* %p0, null + br i1 %b0, label %return, label %match_else + +; CHECK-LABEL: @test_arg_simple +; CHECK-NOT: , null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %b1 = icmp ugt i64 %i0, 10 + br i1 %b1, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_arg_simple_fail(i64* %p) { +entry: + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ] + %b0 = icmp eq i64* %p0, null + br i1 %b0, label %return, label %match_else + +; CHECK-LABEL: @test_arg_simple_fail +; CHECK: null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %b1 = icmp ugt i64 %i0, 10 + br i1 %b1, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_simple(i64* %p) { +entry: + %p0 = getelementptr inbounds i64* %p, i64 0 + br label %loop_body + +loop_body: + %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ] + %b0 = icmp eq i64* %p1, null + br i1 %b0, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_simple +; CHECK-NOT: null + +match_else: + %i0 = load i64* %p1 + %p2 = getelementptr inbounds i64* %p1, i64 1 + %b1 = icmp ugt i64 %i0, 10 + br i1 %b1, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_simple_fail(i64* %p) { +entry: + %p0 = getelementptr i64* %p, i64 0 + br label %loop_body + +loop_body: + %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ] + %b0 = icmp eq i64* %p1, null + br i1 %b0, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_simple_fail +; CHECK: null + +match_else: + %i0 = load i64* %p1 + %p2 = getelementptr inbounds i64* %p1, i64 1 + %b1 = icmp ugt i64 %i0, 10 + br i1 %b1, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_or(i64* %p, i64* %q) { +entry: + %p0 = getelementptr inbounds i64* %p, i64 0 + br label %loop_body + +loop_body: + %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ] + %b0 = icmp eq i64* %p1, %q + %b1 = icmp eq i64* %p1, null + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_or +; CHECK-NOT: null + +match_else: + %i0 = load i64* %p1 + %p2 = getelementptr inbounds i64* %p1, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_and(i64* %p, i64* %q) { +entry: + %p0 = getelementptr inbounds i64* %p, i64 0 + br label %loop_body + +loop_body: + %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ] + %b0 = icmp eq i64* %p1, %q + %b1 = icmp eq i64* %p1, null + %b2 = and i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_and +; CHECK-NOT: null + +match_else: + %i0 = load i64* %p1 + %p2 = getelementptr inbounds i64* %p1, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_derived_load(i64* %p) { +entry: + %p0 = getelementptr inbounds i64* %p, i64 0 + br label %loop_body + +loop_body: + %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ] + %b0 = icmp eq i64* %p1, null + br i1 %b0, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_derived_load +; CHECK-NOT: null + +match_else: + %p2 = getelementptr inbounds i64* %p1, i64 1 + %i0 = load i64* %p2 + %b1 = icmp ugt i64 %i0, 10 + br i1 %b1, label %return, label %loop_body + +return: + ret i64 0 +} + From 26495818bf5c05152836af87019c2a75560d9b7d Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Thu, 26 Jun 2014 23:41:06 -0700 Subject: [PATCH 15/17] Add the NullCheckElimination pass to the default pass list Since the NullCheckElimination pass has a similar intent to the CorrelatedValuePropagation pass, I decided to run it right after the both places that the latter runs. --- lib/Transforms/IPO/PassManagerBuilder.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 0414caa61fca..906d56fa6dcc 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -220,6 +220,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createEarlyCSEPass()); // Catch trivial redundancies MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + // Specific to the rust-lang llvm branch: + MPM.add(createNullCheckEliminationPass()); // Eliminate null checks MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); @@ -255,6 +257,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { addExtensionsToPM(EP_Peephole, MPM); MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); + // Specific to the rust-lang llvm branch: + MPM.add(createNullCheckEliminationPass()); // Eliminate null checks MPM.add(createDeadStoreEliminationPass()); // Delete dead stores addExtensionsToPM(EP_ScalarOptimizerLate, MPM); From 34683aff21703409a163b2ab17e925a139d9fc52 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 29 Jun 2014 11:18:05 -0700 Subject: [PATCH 16/17] Improve the NullCheckElimination pass Teach the NullCheckElimination pass about recurrences involving inbounds GEPs. This allows the pass to optimize null checks from most uses of single slice and vector iterators. This still isn't sufficient to eliminate null checks from uses of multiple iterators with zip(). --- .../Scalar/NullCheckElimination.cpp | 373 ++++++++++++------ test/Transforms/NullCheckElimination/basic.ll | 120 ++++++ .../NullCheckElimination/complex.ll | 316 +++++++++++++++ 3 files changed, 688 insertions(+), 121 deletions(-) create mode 100644 test/Transforms/NullCheckElimination/complex.ll diff --git a/lib/Transforms/Scalar/NullCheckElimination.cpp b/lib/Transforms/Scalar/NullCheckElimination.cpp index 4f04bd65c204..f0fa3aed6ff9 100644 --- a/lib/Transforms/Scalar/NullCheckElimination.cpp +++ b/lib/Transforms/Scalar/NullCheckElimination.cpp @@ -34,78 +34,119 @@ namespace { private: static const unsigned kPhiLimit = 16; typedef SmallPtrSet SmallPhiSet; - enum NullCheckResult { - NotNullCheck, - NullCheckEq, - NullCheckNe, + + enum CmpKind { + /// A null check of an unconditionally nonnull-or-poison value. + NullCheckDefiniteCmp, + + /// A null check of the phi representing a nontrivial inbounds recurrence, + /// which is not known to be unconditionally nonnull-or-poison. + NullCheckRecurrenceCmp, + + /// A comparison of the phi representing a nontrivial inbounds recurrence + /// with an inbounds GEP derived from the base of the recurrence, which + /// will typically represent a bound on the recurrence. + RecurrencePhiBoundCmp, + }; + + enum CmpPred { + CmpEq, + CmpNe, + }; + + struct CmpDesc { + CmpDesc(CmpKind k, CmpPred p, Use *u, Value *v) + : kind(k), pred(p), use(u), ptrValue(v) { } + + CmpKind kind; + CmpPred pred; + Use *use; + Value *ptrValue; }; + typedef SmallVector CmpDescVec; + bool isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis, PHINode*); + Value *isNontrivialInBoundsRecurrence(PHINode*); - NullCheckResult isCmpNullCheck(ICmpInst*); - std::pair findNullCheck(Use*); + bool classifyCmp(CmpDescVec*, Use*); + bool findRelevantCmps(CmpDescVec*, Use*); bool blockContainsLoadDerivedFrom(BasicBlock*, Value*); + /// Tracks values that are unconditionally nonnull-or-poison by definition, + /// but not values that are known nonnull-or-poison in a given context by + /// their uses, e.g. in recurrences. DenseSet NonNullOrPoisonValues; + + /// Tracks values that are bases of nontrivial inbounds recurrences. + DenseSet InBoundsRecurrenceBases; + + /// Maps phis that correspond to nontrivial inbounds recurrences to their + /// base values. + DenseMap InBoundsRecurrenceBaseMap; }; } char NullCheckElimination::ID = 0; INITIALIZE_PASS_BEGIN(NullCheckElimination, - "null-check-elimination", - "Null Check Elimination", - false, false) + "null-check-elimination", + "Null Check Elimination", + false, false) INITIALIZE_PASS_END(NullCheckElimination, - "null-check-elimination", - "Null Check Elimination", - false, false) + "null-check-elimination", + "Null Check Elimination", + false, false) FunctionPass *llvm::createNullCheckEliminationPass() { return new NullCheckElimination(); } +static GetElementPtrInst *castToInBoundsGEP(Value *V) { + auto *GEP = dyn_cast(V); + if (!GEP || !GEP->isInBounds()) + return nullptr; + return GEP; +} + +static bool isZeroConstant(Value *V) { + auto *C = dyn_cast(V); + return C && C->isZeroValue(); +} + bool NullCheckElimination::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; bool Changed = false; - // Collect argumetns with the `nonnull` attribute. + // Collect arguments with the `nonnull` attribute. for (auto &Arg : F.args()) { if (Arg.hasNonNullAttr()) NonNullOrPoisonValues.insert(&Arg); } - // Collect instructions that definitely produce nonnull-or-poison values. - // At the moment, this is restricted to inbounds GEPs. It would be slightly - // more difficult to include uses of values dominated by a null check, since - // then we would have to consider uses instead of mere values. + // Collect instructions that definitely produce nonnull-or-poison values. At + // the moment, this is restricted to inbounds GEPs, and phis that are derived + // entirely from nonnull-or-poison-values (including other phis that are + // themselves derived from the same). for (auto &BB : F) { for (auto &I : BB) { - if (auto *GEP = dyn_cast(&I)) { - if (GEP->isInBounds()) { - NonNullOrPoisonValues.insert(GEP); - } + if (auto *GEP = castToInBoundsGEP(&I)) { + NonNullOrPoisonValues.insert(GEP); + } else if (auto *PN = dyn_cast(&I)) { + SmallPhiSet VisitedPHIs; + if (isNonNullOrPoisonPhi(&VisitedPHIs, PN)) + NonNullOrPoisonValues.insert(PN); + + if (auto *BaseV = isNontrivialInBoundsRecurrence(PN)) { + InBoundsRecurrenceBases.insert(BaseV); + InBoundsRecurrenceBaseMap[PN] = BaseV; + } } } } - // Find phis that are derived entirely from nonnull-or-poison values, - // including other phis that are themselves derived entirely from these - // values. - for (auto &BB : F) { - for (auto &I : BB) { - auto *PN = dyn_cast(&I); - if (!PN) - break; - - SmallPhiSet VisitedPHIs; - if (isNonNullOrPoisonPhi(&VisitedPHIs, PN)) - NonNullOrPoisonValues.insert(PN); - } - } - for (auto &BB : F) { // This could also be extended to handle SwitchInst, but using a SwitchInst // for a null check seems unlikely. @@ -114,49 +155,75 @@ bool NullCheckElimination::runOnFunction(Function &F) { continue; // The first operand of a conditional branch is the condition. - auto result = findNullCheck(&BI->getOperandUse(0)); - if (!result.first) + CmpDescVec Cmps; + if (!findRelevantCmps(&Cmps, &BI->getOperandUse(0))) continue; - assert((result.second == NullCheckEq || result.second == NullCheckNe) && - "valid null check kind expected if ICmpInst was found"); - - BasicBlock *NonNullBB; - if (result.second == NullCheckEq) { - // If the comparison instruction is checking for equaliity with null, - // then the pointer is nonnull on the `false` branch. - NonNullBB = BI->getSuccessor(1); - } else { - // Otherwise, if the comparison instruction is checking for inequality - // with null, the pointer is nonnull on the `true` branch. - NonNullBB = BI->getSuccessor(0); - } - Use *U = result.first; - ICmpInst *CI = cast(U->get()); - unsigned nonConstantIndex; - if (isa(CI->getOperand(0))) - nonConstantIndex = 1; - else - nonConstantIndex = 0; - - // Due to the semantics of poison values in LLVM, we have to check that - // there is actually some externally visible side effect that is dependent - // on the poison value. Since poison values are otherwise treated as undef, - // and a load of undef is undefined behavior (which is externally visible), - // it suffices to look for a load of the nonnull-or-poison value. - // - // This could be extended to any block control-dependent on this branch of - // the null check, it's unclear if that will actually catch more cases in - // real code. - Value *PtrV = CI->getOperand(nonConstantIndex); - if (blockContainsLoadDerivedFrom(NonNullBB, PtrV)) { - Type *BoolTy = CI->getType(); - Value *NewV = ConstantInt::get(BoolTy, result.second == NullCheckNe); - U->set(NewV); + for (auto &Cmp : Cmps) { + // We are only tracking comparisons of inbounds recurrence phis with their + // bounds so that we can eliminate null checks based on them, which are of + // kind NullCheckRecurrenceCmp. We can't use a lone RecurrencePhiBoundCmp + // to perform any optimizations. + if (Cmp.kind == RecurrencePhiBoundCmp) + continue; + + if (Cmp.kind == NullCheckRecurrenceCmp) { + // Look for a matching RecurrencePhiBoundCmp. If one exists, then we can + // be sure that this branch condition depends on the recurrence. Since + // both the bounds and the recurrence successor value are inbounds, and + // they are both derived from the base, the base being null would imply + // that the bounds and recurrence successor values are poison. + bool FoundMatchingCmp = false; + for (auto &OtherCmp : Cmps) { + if (OtherCmp.kind == RecurrencePhiBoundCmp && + OtherCmp.ptrValue == Cmp.ptrValue) { + FoundMatchingCmp = true; + break; + } + } + if (!FoundMatchingCmp) + continue; + } + + BasicBlock *NonNullBB; + if (Cmp.pred == CmpEq) { + // If the comparison instruction is checking for equality with null then + // the pointer is nonnull on the `false` branch. + NonNullBB = BI->getSuccessor(1); + } else { + // Otherwise, if the comparison instruction is checking for inequality + // with null, the pointer is nonnull on the `true` branch. + NonNullBB = BI->getSuccessor(0); + } + + // This is a crude approximation of control dependence: if the branch + // target has a single predecessor edge, then it must be control- + // dependent on the branch. + if (!NonNullBB->getSinglePredecessor()) + continue; + + // Due to the semantics of poison values in LLVM, we have to check that + // there is actually some externally visible side effect that is dependent + // on the poison value. Since poison values are otherwise treated as + // undef, and a load of undef is undefined behavior (which is externally + // visible), it suffices to look for a load of the nonnull-or-poison + // value. + // + // This could be extended to any block control-dependent on this branch of + // the null check, it's unclear if that will actually catch more cases in + // real code. + if (blockContainsLoadDerivedFrom(NonNullBB, Cmp.ptrValue)) { + Type *BoolTy = Type::getInt1Ty(F.getContext()); + Value *NewV = ConstantInt::get(BoolTy, Cmp.pred == CmpNe); + Cmp.use->set(NewV); + Changed = true; + } } } NonNullOrPoisonValues.clear(); + InBoundsRecurrenceBases.clear(); + InBoundsRecurrenceBaseMap.clear(); return Changed; } @@ -164,9 +231,12 @@ bool NullCheckElimination::runOnFunction(Function &F) { /// Checks whether a phi is derived from known nonnnull-or-poison values, /// including other phis that are derived from the same. May return `false` /// conservatively in some cases, e.g. if exploring a large cycle of phis. +/// +/// This function may also insert any inbounds GEPs that it finds into +/// NonNullOrPoisonValues. bool NullCheckElimination::isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis, - PHINode *PN) { + PHINode *PN) { // If we've already seen this phi, return `true`, even though it may not be // nonnull, since some other operand in a cycle of phis may invalidate the // optimistic assumption that the entire cycle is nonnull, including this phi. @@ -183,9 +253,11 @@ NullCheckElimination::isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis, Value *SrcValue = PN->getOperand(i); if (NonNullOrPoisonValues.count(SrcValue)) { continue; + } else if (auto *GEP = castToInBoundsGEP(SrcValue)) { + NonNullOrPoisonValues.insert(GEP); } else if (auto *SrcPN = dyn_cast(SrcValue)) { if (!isNonNullOrPoisonPhi(VisitedPhis, SrcPN)) - return false; + return false; } else { return false; } @@ -194,75 +266,134 @@ NullCheckElimination::isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis, return true; } -/// Determines whether an ICmpInst is a null check of a known nonnull-or-poison -/// value. -NullCheckElimination::NullCheckResult -NullCheckElimination::isCmpNullCheck(ICmpInst *CI) { +/// Determines whether a phi corresponds to an inbounds recurrence where the +/// base is not a known nonnull-or-poison value. Returns the base value, or +/// null if the phi doesn't correspond to such a recurrence. +Value *NullCheckElimination::isNontrivialInBoundsRecurrence(PHINode *PN) { + if (PN->getNumOperands() != 2) + return nullptr; + + Value *BaseV; + GetElementPtrInst *SuccessorI; + if (auto *GEP = castToInBoundsGEP(PN->getOperand(0))) { + BaseV = PN->getOperand(1); + SuccessorI = GEP; + } else if (auto *GEP = castToInBoundsGEP(PN->getOperand(1))) { + BaseV = PN->getOperand(0); + SuccessorI = GEP; + } else { + return nullptr; + } + + if (NonNullOrPoisonValues.count(BaseV) || SuccessorI->getOperand(0) != PN) + return nullptr; + + return BaseV; +} + +/// Determines whether an ICmpInst is one of the forms that is relevant to +/// null check elimination, and then adds a CmpDesc to Cmps when applicable. +/// The ICmpInst is passed as a Use so this Use can be placed into the CmpDesc, +/// but the Use parameter must be a Use of an ICmpInst. +bool NullCheckElimination::classifyCmp(CmpDescVec *Cmps, Use *U) { + auto *CI = cast(U); if (!CI->isEquality()) - return NotNullCheck; - - unsigned constantIndex; - if (NonNullOrPoisonValues.count(CI->getOperand(0))) - constantIndex = 1; - else if (NonNullOrPoisonValues.count(CI->getOperand(1))) - constantIndex = 0; - else - return NotNullCheck; - - auto *C = dyn_cast(CI->getOperand(constantIndex)); - if (!C || !C->isZeroValue()) - return NotNullCheck; - - return - CI->getPredicate() == llvm::CmpInst::ICMP_EQ ? NullCheckEq : NullCheckNe; + return false; + + CmpPred Pred = (CI->getPredicate() == llvm::CmpInst::ICMP_EQ) ? CmpEq : CmpNe; + Value *Op0 = CI->getOperand(0); + Value *Op1 = CI->getOperand(1); + + if (NonNullOrPoisonValues.count(Op0)) { + if (isZeroConstant(Op1)) { + Cmps->push_back(CmpDesc(NullCheckDefiniteCmp, Pred, U, Op0)); + return true; + } + + auto it = InBoundsRecurrenceBaseMap.find(Op1); + if (it == InBoundsRecurrenceBaseMap.end()) + return false; + + auto *GEP = castToInBoundsGEP(Op0); + if (!GEP) + return false; + + auto *BaseV = it->second; + if (GEP->getOperand(0) != BaseV) + return false; + + Cmps->push_back(CmpDesc(RecurrencePhiBoundCmp, Pred, U, Op1)); + return true; + } + + // Since InstCombine or InstSimplify should have canonicalized a comparison + // with `null` to have the `null` in the second operand, we don't need to + // handle the case where Op0 is `null` like we did with Op1 above. + if (NonNullOrPoisonValues.count(Op1)) { + auto it = InBoundsRecurrenceBaseMap.find(Op0); + if (it == InBoundsRecurrenceBaseMap.end()) + return false; + + auto *GEP = castToInBoundsGEP(Op1); + if (!GEP) + return false; + + auto *BaseV = it->second; + if (GEP->getOperand(0) != BaseV) + return false; + + Cmps->push_back(CmpDesc(RecurrencePhiBoundCmp, Pred, U, Op0)); + return true; + } + + if (InBoundsRecurrenceBaseMap.count(Op0)) { + if (isZeroConstant(Op1)) { + Cmps->push_back(CmpDesc(NullCheckRecurrenceCmp, Pred, U, Op0)); + return true; + } + } + + return false; } -/// Finds the Use, if any, of an ICmpInst null check of a nonnull-or-poison -/// value. -std::pair -NullCheckElimination::findNullCheck(Use *U) { +/// Classifies the comparisons that are relevant to null check elimination, +/// starting from a Use. The CmpDescs of the comparisons are collected in Cmps. +bool NullCheckElimination::findRelevantCmps(CmpDescVec *Cmps, Use *U) { auto *I = dyn_cast(U->get()); if (!I) - return std::make_pair(nullptr, NotNullCheck); - - if (auto *CI = dyn_cast(I)) { - NullCheckResult result = isCmpNullCheck(CI); - if (result == NotNullCheck) - return std::make_pair(nullptr, NotNullCheck); - else - return std::make_pair(U, result); - } + return false; - unsigned opcode = I->getOpcode(); - if (opcode == Instruction::Or || opcode == Instruction::And) { - auto result = findNullCheck(&I->getOperandUse(0)); - if (result.second == NotNullCheck) - return findNullCheck(&I->getOperandUse(1)); - else - return result; + if (isa(I)) + return classifyCmp(Cmps, U); + + unsigned Opcode = I->getOpcode(); + if (Opcode == Instruction::Or || Opcode == Instruction::And) { + bool FoundCmps = findRelevantCmps(Cmps, &I->getOperandUse(0)); + FoundCmps |= findRelevantCmps(Cmps, &I->getOperandUse(1)); + return FoundCmps; } - return std::make_pair(nullptr, NotNullCheck); + return false; } /// Determines whether `BB` contains a load from `PtrV`, or any inbounds GEP /// derived from `PtrV`. bool NullCheckElimination::blockContainsLoadDerivedFrom(BasicBlock *BB, - Value *PtrV) { + Value *PtrV) { for (auto &I : *BB) { auto *LI = dyn_cast(&I); if (!LI) continue; Value *V = LI->getPointerOperand(); - while (NonNullOrPoisonValues.count(V)) { + while (1) { if (V == PtrV) - return true; + return true; - auto *GEP = dyn_cast(V); + auto *GEP = castToInBoundsGEP(V); if (!GEP) - break; + break; V = GEP->getOperand(0); } diff --git a/test/Transforms/NullCheckElimination/basic.ll b/test/Transforms/NullCheckElimination/basic.ll index f176fb32c3fd..8ea5851f627a 100644 --- a/test/Transforms/NullCheckElimination/basic.ll +++ b/test/Transforms/NullCheckElimination/basic.ll @@ -22,6 +22,28 @@ return: ret i64 0 } +define i64 @test_arg_simple_ne(i64* nonnull %p) { +entry: + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ] + %b0 = icmp ne i64* %p0, null + br i1 %b0, label %match_else, label %return + +; CHECK-LABEL: @test_arg_simple +; CHECK-NOT: , null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %b1 = icmp ugt i64 %i0, 10 + br i1 %b1, label %return, label %loop_body + +return: + ret i64 0 +} + define i64 @test_arg_simple_fail(i64* %p) { entry: br label %loop_body @@ -44,6 +66,31 @@ return: ret i64 0 } +define i64 @test_arg_simple_fail_control_dep(i64* nonnull %p) { +entry: + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ] + br i1 undef, label %loop_body2, label %match_else + +loop_body2: + %b0 = icmp eq i64* %p0, null + br i1 %b0, label %return, label %match_else + +; CHECK-LABEL: @test_arg_simple_fail_control_dep +; CHECK: null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %b1 = icmp ugt i64 %i0, 10 + br i1 %b1, label %return, label %loop_body + +return: + ret i64 0 +} + define i64 @test_inbounds_simple(i64* %p) { entry: %p0 = getelementptr inbounds i64* %p, i64 0 @@ -67,6 +114,29 @@ return: ret i64 0 } +define i64 @test_inbounds_simple_ne(i64* %p) { +entry: + %p0 = getelementptr inbounds i64* %p, i64 0 + br label %loop_body + +loop_body: + %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ] + %b0 = icmp ne i64* %p1, null + br i1 %b0, label %match_else, label %return + +; CHECK-LABEL: @test_inbounds_simple +; CHECK-NOT: null + +match_else: + %i0 = load i64* %p1 + %p2 = getelementptr inbounds i64* %p1, i64 1 + %b1 = icmp ugt i64 %i0, 10 + br i1 %b1, label %return, label %loop_body + +return: + ret i64 0 +} + define i64 @test_inbounds_simple_fail(i64* %p) { entry: %p0 = getelementptr i64* %p, i64 0 @@ -90,6 +160,32 @@ return: ret i64 0 } +define i64 @test_inbounds_simple_fail_control_dep(i64* %p) { +entry: + %p0 = getelementptr i64* %p, i64 0 + br label %loop_body + +loop_body: + %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ] + br i1 undef, label %loop_body2, label %match_else + +loop_body2: + %b0 = icmp eq i64* %p1, null + br i1 %b0, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_simple_fail_control_dep +; CHECK: null + +match_else: + %i0 = load i64* %p1 + %p2 = getelementptr inbounds i64* %p1, i64 1 + %b1 = icmp ugt i64 %i0, 10 + br i1 %b1, label %return, label %loop_body + +return: + ret i64 0 +} + define i64 @test_inbounds_or(i64* %p, i64* %q) { entry: %p0 = getelementptr inbounds i64* %p, i64 0 @@ -163,3 +259,27 @@ return: ret i64 0 } +define i64 @test_inbounds_derived_load_fail(i64* %p) { +entry: + %p0 = getelementptr inbounds i64* %p, i64 0 + br label %loop_body + +loop_body: + %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ] + %b0 = icmp eq i64* %p1, null + br i1 %b0, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_derived_load_fail +; CHECK: icmp eq i64* %p1, null + +match_else: + %p2 = getelementptr inbounds i64* %p1, i64 1 + %p3 = getelementptr i64* %p1, i64 1 + %i0 = load i64* %p3 + %b1 = icmp ugt i64 %i0, 10 + br i1 %b1, label %return, label %loop_body + +return: + ret i64 0 +} + diff --git a/test/Transforms/NullCheckElimination/complex.ll b/test/Transforms/NullCheckElimination/complex.ll new file mode 100644 index 000000000000..3059d02ad8dc --- /dev/null +++ b/test/Transforms/NullCheckElimination/complex.ll @@ -0,0 +1,316 @@ +; RUN: opt < %s -null-check-elimination -instsimplify -S | FileCheck %s + +define i64 @test_arg_multiple(i64* nonnull %p, i64* nonnull %q) { +entry: + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ] + %q0 = phi i64* [ %q, %entry ], [ %q1, %match_else ] + %b0 = icmp eq i64* %p0, null + %b1 = icmp eq i64* %q0, null + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_arg_multiple +; CHECK-NOT: , null + +match_else: + %i0 = load i64* %p0 + %i1 = load i64* %q0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %q1 = getelementptr inbounds i64* %q0, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_arg_multiple_fail(i64* nonnull %p, i64* %q) { +entry: + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ] + %q0 = phi i64* [ %q, %entry ], [ %q1, %match_else ] + %b0 = icmp eq i64* %p0, null + %b1 = icmp eq i64* %q0, null + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_arg_multiple_fail +; CHECK-NOT: icmp eq i64* %p0, null +; CHECK: icmp eq i64* %q0, null + +match_else: + %i0 = load i64* %p0 + %i1 = load i64* %q0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %q1 = getelementptr inbounds i64* %q0, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_multiple(i64* %p, i64* %q) { +entry: + %p0 = getelementptr inbounds i64* %p, i64 0 + %q0 = getelementptr inbounds i64* %q, i64 0 + br label %loop_body + +loop_body: + %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ] + %q1 = phi i64* [ %q0, %entry ], [ %q2, %match_else ] + %b0 = icmp eq i64* %p1, null + %b1 = icmp eq i64* %q1, null + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_multiple +; CHECK-NOT: , null + +match_else: + %i0 = load i64* %p1 + %i1 = load i64* %q1 + %p2 = getelementptr inbounds i64* %p1, i64 1 + %q2 = getelementptr inbounds i64* %q1, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_multiple_fail(i64* %p, i64* %q) { +entry: + %p0 = getelementptr i64* %p, i64 0 + %q0 = getelementptr inbounds i64* %q, i64 0 + br label %loop_body + +loop_body: + %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ] + %q1 = phi i64* [ %q0, %entry ], [ %q2, %match_else ] + %b0 = icmp eq i64* %p1, null + %b1 = icmp eq i64* %q1, null + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_multiple_fail +; CHECK: icmp eq i64* %p1, null +; CHECK-NOT: icmp eq i64* %q1, null + +match_else: + %i0 = load i64* %p1 + %i1 = load i64* %q1 + %p2 = getelementptr inbounds i64* %p1, i64 1 + %q2 = getelementptr inbounds i64* %q1, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_recurrence_or(i64* %p, i64 %len) { +entry: + %q = getelementptr inbounds i64* %p, i64 %len + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ] + %b0 = icmp eq i64* %p0, %q + %b1 = icmp eq i64* %p0, null + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_recurrence_or +; CHECK-NOT: null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_recurrence_or_fail1(i64* %p, i64 %len) { +entry: + %q = getelementptr i64* %p, i64 %len + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ] + %b0 = icmp eq i64* %p0, %q + %b1 = icmp eq i64* %p0, null + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_recurrence_or_fail1 +; CHECK: icmp eq i64* %p0, null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_recurrence_or_fail2(i64* %p, i64 %len) { +entry: + %q = getelementptr inbounds i64* %p, i64 %len + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ] + %b0 = icmp eq i64* %p0, %q + %b1 = icmp eq i64* %p0, null + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_recurrence_or_fail2 +; CHECK: icmp eq i64* %p0, null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr i64* %p0, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_recurrence_and(i64* %p, i64 %len) { +entry: + %q = getelementptr inbounds i64* %p, i64 %len + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ] + %b0 = icmp eq i64* %p0, %q + %b1 = icmp eq i64* %p0, null + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_recurrence_and +; CHECK-NOT: null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_recurrence_or_rev(i64* %p, i64 %len) { +entry: + %q = getelementptr inbounds i64* %p, i64 %len + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p1, %match_else ], [ %p, %entry ] + %b0 = icmp eq i64* %p0, null + %b1 = icmp eq i64* %q, %p0 + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_recurrence_or_rev +; CHECK-NOT: null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_recurrence_or_rev_fail1(i64* %p, i64 %len) { +entry: + %q = getelementptr i64* %p, i64 %len + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p1, %match_else ], [ %p, %entry ] + %b0 = icmp eq i64* %p0, null + %b1 = icmp eq i64* %q, %p0 + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_recurrence_or_rev_fail1 +; CHECK: icmp eq i64* %p0, null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_recurrence_or_rev_fail2(i64* %p, i64 %len) { +entry: + %q = getelementptr inbounds i64* %p, i64 %len + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p1, %match_else ], [ %p, %entry ] + %b0 = icmp eq i64* %p0, null + %b1 = icmp eq i64* %q, %p0 + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_recurrence_or_rev_fail2 +; CHECK: icmp eq i64* %p0, null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr i64* %p0, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + +define i64 @test_inbounds_recurrence_and_rev(i64* %p, i64 %len) { +entry: + %q = getelementptr inbounds i64* %p, i64 %len + br label %loop_body + +loop_body: + %p0 = phi i64* [ %p1, %match_else ], [ %p, %entry ] + %b0 = icmp eq i64* %p0, null + %b1 = icmp eq i64* %q, %p0 + %b2 = or i1 %b0, %b1 + br i1 %b2, label %return, label %match_else + +; CHECK-LABEL: @test_inbounds_recurrence_and_rev +; CHECK-NOT: null + +match_else: + %i0 = load i64* %p0 + %p1 = getelementptr inbounds i64* %p0, i64 1 + %b3 = icmp ugt i64 %i0, 10 + br i1 %b3, label %return, label %loop_body + +return: + ret i64 0 +} + From 2089cab13e7f92b487ba0dc1df9f6c05116b004a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Steinbrink?= Date: Thu, 12 Feb 2015 21:24:36 +0100 Subject: [PATCH 17/17] Disable the PassInfo cache assertions to make the cache effective in builds with assertions enabld Since the PassInfo cache does a regular, uncached, slow lookup for the asserted condition, it's not very effective *cough* when assertions are enabled. Since disabling these assertions gives quite a nice perf boost and it's not really worse than the patch we had previously, let's just do that. --- lib/IR/LegacyPassManager.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index fa8d50ec160c..6bf6060c47b7 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -733,9 +733,6 @@ const PassInfo *PMTopLevelManager::findAnalysisPassInfo(AnalysisID AID) const { const PassInfo *&PI = AnalysisPassInfos[AID]; if (!PI) PI = PassRegistry::getPassRegistry()->getPassInfo(AID); - else - assert(PI == PassRegistry::getPassRegistry()->getPassInfo(AID) && - "The pass info pointer changed for an analysis ID!"); return PI; }