From 7ade561d7f475d3cfb3c94d1668425299e5b431d Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Wed, 11 Nov 2015 16:19:39 +0000 Subject: [PATCH 0001/1132] [CMake] Make llvm_install_library_symlink respect LLVM_LIBDIR_SUFFIX. This is required to support multilib install targets, and addresses a regression introduced in r252093. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252749 91177308-0d34-0410-b5e6-96231b3b80d8 --- cmake/modules/AddLLVM.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 16514d6a1cd..cee383353ad 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -1069,7 +1069,7 @@ function(llvm_install_library_symlink name dest type) set(full_name ${CMAKE_${type}_LIBRARY_PREFIX}${name}${CMAKE_${type}_LIBRARY_SUFFIX}) set(full_dest ${CMAKE_${type}_LIBRARY_PREFIX}${dest}${CMAKE_${type}_LIBRARY_SUFFIX}) - set(output_dir lib) + set(output_dir lib${LLVM_LIBDIR_SUFFIX}) if(WIN32 AND "${type}" STREQUAL "SHARED") set(output_dir bin) endif() From 46be9ff861fc267b4d1cfe64889a18ddf1564a4f Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Fri, 13 Nov 2015 10:29:12 -0800 Subject: [PATCH 0002/1132] Annoint John McCall as the owner of Swift language-specific changes in LLVM --- CODE_OWNERS.TXT | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT index 0a6f057341e..b5e25077d85 100644 --- a/CODE_OWNERS.TXT +++ b/CODE_OWNERS.TXT @@ -118,6 +118,10 @@ N: David Majnemer E: david.majnemer@gmail.com D: IR Constant Folder, InstCombine +N: John McCall +E: rjmccall@apple.com +D: Swift language-specific changes + N: Dylan McKay E: dylanmckay34@gmail.com D: AVR Backend From 7bae82deaac4030fd6547571e838e97eb3bc045e Mon Sep 17 00:00:00 2001 From: Jordan Rose Date: Sat, 5 Dec 2015 11:49:46 -0800 Subject: [PATCH 0003/1132] Add legal notice for pull requests and reference to contribution guidelines. GitHub will look for this file when a new pull request is opened and offer it to the user. --- CONTRIBUTING.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..a0c1644fc66 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,14 @@ +By submitting a pull request, you represent that you have the right to license +your contribution to Apple and the community, and agree by submitting the patch +that your contributions are licensed under the [Swift +license](https://swift.org/LICENSE.txt). + +--- + +Changes to this repository follow special considerations as described on +Swift.org under "[LLVM and Swift](https://swift.org/contributing/#llvm-and-swift)". +Please make sure your change is appropriate for this repository. + +Before submitting a pull request, please make sure you have tested your +changes and that they follow the Swift project [guidelines for contributing +code](https://swift.org/contributing/#contributing-code). From 78090fcc93f3a9e8062fb19f3fa10abfb9c7e58b Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Wed, 9 Dec 2015 01:19:50 +0000 Subject: [PATCH 0004/1132] [AArch64][ARM] Don't base interleaved op legality on type alloc size. Otherwise, we think that most types that look like they'd fit in a legal vector type are legal (so, basically, *any* vector type with a size between 33 and 128 bits, I think, since we use pow2 alignment; e.g., v2i25, v3f32, ...). DataLayout::getTypeAllocSize rounds up based on alignment. When checking for target intrinsic legality, that's not what we want: if rounding makes a difference, the type isn't legal, and the target intrinsics shouldn't be used, as they are always assumed legal. One could make the argument that alloc size is ultimately the most relevant here, since we're dealing with LD/ST intrinsics. That's only true if we did legalize them though; that's a problem for another day. Use DataLayout::getTypeSizeInBits instead of getTypeAllocSizeInBits. Type::getSizeInBits can't be used because that'd gratuitously break pointer vector support. Some of these uses are currently fine, because we only hit them when the type is already known legal (e.g., r114454). Update them for consistency. It's faster to avoid the rounding anyway! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255089 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 8 ++-- .../AArch64/AArch64TargetTransformInfo.cpp | 2 +- lib/Target/ARM/ARMISelLowering.cpp | 12 +++--- lib/Target/ARM/ARMTargetTransformInfo.cpp | 4 +- .../AArch64/aarch64-interleaved-accesses.ll | 40 ++++++++++++++++++- test/CodeGen/ARM/arm-interleaved-accesses.ll | 40 +++++++++++++++++++ 6 files changed, 91 insertions(+), 15 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 14d2f6fb61a..4e986890741 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6705,7 +6705,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_ld4r: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; @@ -6731,7 +6731,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); @@ -6974,7 +6974,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad( const DataLayout &DL = LI->getModule()->getDataLayout(); VectorType *VecTy = Shuffles[0]->getType(); - unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); + unsigned VecSize = DL.getTypeSizeInBits(VecTy); // Skip if we do not have NEON and skip illegal vector types. if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128)) @@ -7060,7 +7060,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // Skip if we do not have NEON and skip illegal vector types. if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128)) diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ab17bb810d4..ff3a8b1c4de 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -447,7 +447,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, if (Factor <= TLI->getMaxSupportedInterleaveFactor()) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // ldN/stN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e24f2f316ba..cdcb253d957 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -11470,7 +11470,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); - uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; @@ -11496,7 +11496,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); @@ -11831,8 +11831,8 @@ bool ARMTargetLowering::lowerInterleavedLoad( Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); - unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); - bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; + unsigned VecSize = DL.getTypeSizeInBits(VecTy); + bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; // Skip if we do not have NEON and skip illegal vector types and vector types // with i64/f64 elements (vldN doesn't support i64/f64 elements). @@ -11921,8 +11921,8 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); - bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); + bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; // Skip if we do not have NEON and skip illegal vector types and vector types // with i64/f64 elements (vstN doesn't support i64/f64 elements). diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 45a45a7013c..9f0e4c3dcae 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -481,12 +481,12 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, assert(isa(VecTy) && "Expect a vector type"); // vldN/vstN doesn't support vector types of i64/f64 element. - bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64; + bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64; if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // vldN/vstN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll index 545aeda8860..1bc2a3ccb1c 100644 --- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll +++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON -; RUN: llc -march=aarch64 -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON +; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON +; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON ; NEON-LABEL: load_factor2: ; NEON: ld2 { v0.8b, v1.8b }, [x0] @@ -232,3 +232,39 @@ define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, < store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4 ret void } + +; Check that we do something sane with illegal types. + +; NEON-LABEL: load_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: ldr q[[V:[0-9]+]], [x0] +; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s +; NEON-NEXT: ret +; NONEON-LABEL: load_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: ldr s0, [x0] +; NONEON-NEXT: ldr s1, [x0, #8] +; NONEON-NEXT: ret +define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind { + %tmp1 = load <3 x float>, <3 x float>* %p, align 16 + %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> + ret <3 x float> %tmp2 +} + +; NEON-LABEL: store_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s +; NEON-NEXT: st1 { v0.d }[0], [x0] +; NEON-NEXT: ret +; NONEON-LABEL: store_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2 +; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0 +; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32 +; NONEON-NEXT: str x[[RES]], [x0] +; NONEON-NEXT: ret +define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind { + %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> + store <3 x float> %tmp1, <3 x float>* %p, align 16 + ret void +} diff --git a/test/CodeGen/ARM/arm-interleaved-accesses.ll b/test/CodeGen/ARM/arm-interleaved-accesses.ll index c3aa2d6b4da..002e71f6d9b 100644 --- a/test/CodeGen/ARM/arm-interleaved-accesses.ll +++ b/test/CodeGen/ARM/arm-interleaved-accesses.ll @@ -264,3 +264,43 @@ define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspa store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C ret void } + +; Check that we do something sane with illegal types. + +; NEON-LABEL: load_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: vld1.64 {d16, d17}, [r0:128] +; NEON-NEXT: vuzp.32 q8, {{.*}} +; NEON-NEXT: vmov r0, r1, d16 +; NEON-NEXT: vmov r2, r3, {{.*}} +; NEON-NEXT: mov pc, lr +; NONEON-LABEL: load_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: ldr [[ELT0:r[0-9]+]], [r0] +; NONEON-NEXT: ldr r1, [r0, #8] +; NONEON-NEXT: mov r0, [[ELT0]] +; NONEON-NEXT: mov pc, lr +define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind { + %tmp1 = load <3 x float>, <3 x float>* %p, align 16 + %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> + ret <3 x float> %tmp2 +} + +; This lowering isn't great, but it's at least correct. + +; NEON-LABEL: store_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: vldr d17, [sp] +; NEON-NEXT: vmov d16, r2, r3 +; NEON-NEXT: vuzp.32 q8, {{.*}} +; NEON-NEXT: vstr d16, [r0] +; NEON-NEXT: mov pc, lr +; NONEON-LABEL: store_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: stm r0, {r1, r3} +; NONEON-NEXT: mov pc, lr +define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind { + %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> + store <3 x float> %tmp1, <3 x float>* %p, align 16 + ret void +} From cce515504daf19f040560b871f6ab3fc84154168 Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Sat, 14 Nov 2015 05:51:41 +0000 Subject: [PATCH 0005/1132] Don't recompute LCSSA after loop-unrolling when possible. Summary: Currently we always recompute LCSSA for outer loops after unrolling an inner loop. That leads to compile time problem when we have big loop nests, and we can solve it by avoiding unnecessary work. For instance, if w eonly do partial unrolling, we don't break LCSSA, so we don't need to rebuild it. Also, if all exits from the inner loop are inside the enclosing loop, then complete unrolling won't break LCSSA either. I replaced unconditional LCSSA recomputation with conditional recomputation + unconditional assert and added several tests, which were failing when I experimented with it. Soon I plan to follow up with a similar patch for recalculation of dominators tree. Reviewers: hfinkel, dexonsmith, bogner, joker.eph, chandlerc Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D14526 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253126 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/LoopUnroll.cpp | 12 +- test/Transforms/LoopUnroll/rebuild_lcssa.ll | 119 ++++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/LoopUnroll/rebuild_lcssa.ll diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index ad4c388e406..3999989ec89 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -221,6 +221,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + Loop *ParentL = L->getParentLoop(); + bool AllExitsAreInsideParentLoop = !ParentL || + std::all_of(ExitBlocks.begin(), ExitBlocks.end(), + [&](BasicBlock *BB) { return ParentL->contains(BB); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime @@ -554,7 +560,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - formLCSSARecursively(*OuterL, *DT, LI, SE); + if (CompletelyUnroll && !AllExitsAreInsideParentLoop) + formLCSSARecursively(*OuterL, *DT, LI, SE); + else + assert(OuterL->isLCSSAForm(*DT) && + "Loops should be in LCSSA form after loop-unroll."); } } diff --git a/test/Transforms/LoopUnroll/rebuild_lcssa.ll b/test/Transforms/LoopUnroll/rebuild_lcssa.ll new file mode 100644 index 00000000000..49498492344 --- /dev/null +++ b/test/Transforms/LoopUnroll/rebuild_lcssa.ll @@ -0,0 +1,119 @@ +; RUN: opt < %s -loop-unroll -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; This test shows how unrolling an inner loop could break LCSSA for an outer +; loop, and there is no cheap way to recover it. +; +; In this case the inner loop, L3, is being unrolled. It only runs one +; iteration, so unrolling basically means replacing +; br i1 true, label %exit, label %L3_header +; with +; br label %exit +; +; However, this change messes up the loops structure: for instance, block +; L3_body no longer belongs to L2. It becomes an exit block for L2, so LCSSA +; phis for definitions in L2 should now be placed there. In particular, we need +; to insert such a definition for %y1. + +; CHECK-LABEL: @foo1 +define void @foo1() { +entry: + br label %L1_header + +L1_header: + br label %L2_header + +L2_header: + %y1 = phi i64 [ undef, %L1_header ], [ %x.lcssa, %L2_latch ] + br label %L3_header + +L3_header: + %y2 = phi i64 [ 0, %L3_latch ], [ %y1, %L2_header ] + %x = add i64 undef, -1 + br i1 true, label %L2_latch, label %L3_body + +L2_latch: + %x.lcssa = phi i64 [ %x, %L3_header ] + br label %L2_header + +; CHECK: L3_body: +; CHECK-NEXT: %y1.lcssa = phi i64 [ %y1, %L3_header ] +L3_body: + store i64 %y1, i64* undef + br i1 false, label %L3_latch, label %L1_latch + +L3_latch: + br i1 true, label %exit, label %L3_header + +L1_latch: + %y.lcssa = phi i64 [ %y2, %L3_body ] + br label %L1_header + +exit: + ret void +} + +; Additional tests for some corner cases. +; +; CHECK-LABEL: @foo2 +define void @foo2() { +entry: + br label %L1_header + +L1_header: + br label %L2_header + +L2_header: + %a = phi i64 [ undef, %L1_header ], [ %dec_us, %L3_header ] + br label %L3_header + +L3_header: + %b = phi i64 [ 0, %L3_latch ], [ %a, %L2_header ] + %dec_us = add i64 undef, -1 + br i1 true, label %L2_header, label %L3_break_to_L1 + +; CHECK: L3_break_to_L1: +; CHECK-NEXT: %a.lcssa = phi i64 [ %a, %L3_header ] +L3_break_to_L1: + br i1 false, label %L3_latch, label %L1_latch + +L1_latch: + %b_lcssa = phi i64 [ %b, %L3_break_to_L1 ] + br label %L1_header + +L3_latch: + br i1 true, label %Exit, label %L3_header + +Exit: + ret void +} + +; CHECK-LABEL: @foo3 +define void @foo3() { +entry: + br label %L1_header + +L1_header: + %a = phi i8* [ %b, %L1_latch ], [ null, %entry ] + br i1 undef, label %L2_header, label %L1_latch + +L2_header: + br i1 undef, label %L2_latch, label %L1_latch + +; CHECK: L2_latch: +; CHECK-NEXT: %a.lcssa = phi i8* [ %a, %L2_header ] +L2_latch: + br i1 true, label %L2_exit, label %L2_header + +L1_latch: + %b = phi i8* [ undef, %L1_header ], [ null, %L2_header ] + br label %L1_header + +L2_exit: + %a_lcssa1 = phi i8* [ %a, %L2_latch ] + br label %Exit + +Exit: + %a_lcssa2 = phi i8* [ %a_lcssa1, %L2_exit ] + ret void +} From 74aaeb9e09a8f1a27af8437b2f3770ad547c9786 Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Mon, 16 Nov 2015 21:17:26 +0000 Subject: [PATCH 0006/1132] [PR25538]: Fix a failure caused by r253126. In r253126 we stopped to recompute LCSSA after loop unrolling in all cases, except the unrolling is full and at least one of the loop exits is outside the parent loop. In other cases the transformation should not break LCSSA, but it turned out, that we also call SimplifyLoop on the parent loop, which might break LCSSA by itself. This fix just triggers LCSSA recomputation in this case as well. I'm committing it without a test case for now, but I'll try to invent one. It's a bit tricky because in an isolated test LoopSimplify would be scheduled before LoopUnroll, and thus will change the test and hide the problem. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253253 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/LoopUnroll.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 3999989ec89..44dde1b51cf 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -550,7 +550,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { - simplifyLoop(OuterL, DT, LI, PP, SE, AC); + bool Simplified = simplifyLoop(OuterL, DT, LI, PP, SE, AC); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after @@ -560,7 +560,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - if (CompletelyUnroll && !AllExitsAreInsideParentLoop) + if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified)) formLCSSARecursively(*OuterL, *DT, LI, SE); else assert(OuterL->isLCSSAForm(*DT) && From ef2471f98e7f89cf66ec30b8d6a61eb5e638c6c9 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 8 Dec 2015 00:13:12 +0000 Subject: [PATCH 0007/1132] Add Instruction::getFunction; NFC Will be used in a upcoming patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254975 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/Instruction.h | 7 +++++++ lib/IR/Instruction.cpp | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index 581ac09cf0c..c356c4fb5ad 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -65,6 +65,13 @@ class Instruction : public User, public ilist_node { const Module *getModule() const; Module *getModule(); + /// \brief Return the function this instruction belongs to. + /// + /// Note: it is undefined behavior to call this on an instruction not + /// currently inserted into a function. + const Function *getFunction() const; + Function *getFunction(); + /// removeFromParent - This method unlinks 'this' from the containing basic /// block, but does not delete it. /// diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index b5a30a4969b..7bd50328b12 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -62,6 +62,11 @@ Module *Instruction::getModule() { return getParent()->getModule(); } +Function *Instruction::getFunction() { return getParent()->getParent(); } + +const Function *Instruction::getFunction() const { + return getParent()->getParent(); +} void Instruction::removeFromParent() { getParent()->getInstList().remove(getIterator()); From 28416c564303a7dbcb7edd3728fdc862b07bac03 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 8 Dec 2015 00:13:17 +0000 Subject: [PATCH 0008/1132] [SCEVExpander] Have hoistIVInc preserve LCSSA Summary: (Note: the problematic invocation of hoistIVInc that caused PR24804 came from IndVarSimplify, not from SCEVExpander itself) Fixes PR24804. Test case by David Majnemer. Reviewers: hfinkel, majnemer, atrick, mzolotukhin Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D15058 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254976 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/LoopInfo.h | 73 +++++++++++++++++++++++ lib/Analysis/ScalarEvolutionExpander.cpp | 3 + test/Transforms/IndVarSimplify/pr24804.ll | 25 ++++++++ 3 files changed, 101 insertions(+) create mode 100644 test/Transforms/IndVarSimplify/pr24804.ll diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 19ebabf6d40..84425daf6ed 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -37,6 +37,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include @@ -683,6 +684,78 @@ class LoopInfo : public LoopInfoBase { // it as a replacement will not break LCSSA form. return ToLoop->contains(getLoopFor(From->getParent())); } + + /// \brief Checks if moving a specific instruction can break LCSSA in any + /// loop. + /// + /// Return true if moving \p Inst to before \p NewLoc will break LCSSA, + /// assuming that the function containing \p Inst and \p NewLoc is currently + /// in LCSSA form. + bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc) { + assert(Inst->getFunction() == NewLoc->getFunction() && + "Can't reason about IPO!"); + + auto *OldBB = Inst->getParent(); + auto *NewBB = NewLoc->getParent(); + + // Movement within the same loop does not break LCSSA (the equality check is + // to avoid doing a hashtable lookup in case of intra-block movement). + if (OldBB == NewBB) + return true; + + auto *OldLoop = getLoopFor(OldBB); + auto *NewLoop = getLoopFor(NewBB); + + if (OldLoop == NewLoop) + return true; + + // Check if Outer contains Inner; with the null loop counting as the + // "outermost" loop. + auto Contains = [](const Loop *Outer, const Loop *Inner) { + return !Outer || Outer->contains(Inner); + }; + + // To check that the movement of Inst to before NewLoc does not break LCSSA, + // we need to check two sets of uses for possible LCSSA violations at + // NewLoc: the users of NewInst, and the operands of NewInst. + + // If we know we're hoisting Inst out of an inner loop to an outer loop, + // then the uses *of* Inst don't need to be checked. + + if (!Contains(NewLoop, OldLoop)) { + for (Use &U : Inst->uses()) { + auto *UI = cast(U.getUser()); + auto *UBB = isa(UI) ? cast(UI)->getIncomingBlock(U) + : UI->getParent(); + if (UBB != NewBB && getLoopFor(UBB) != NewLoop) + return false; + } + } + + // If we know we're sinking Inst from an outer loop into an inner loop, then + // the *operands* of Inst don't need to be checked. + + if (!Contains(OldLoop, NewLoop)) { + // See below on why we can't handle phi nodes here. + if (isa(Inst)) + return false; + + for (Use &U : Inst->operands()) { + auto *DefI = dyn_cast(U.get()); + if (!DefI) + return false; + + // This would need adjustment if we allow Inst to be a phi node -- the + // new use block won't simply be NewBB. + + auto *DefBlock = DefI->getParent(); + if (DefBlock != NewBB && getLoopFor(DefBlock) != NewLoop) + return false; + } + } + + return true; + } }; // Allow clients to walk the list of nested loops... diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index d763ca4c132..01df859d037 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -944,6 +944,9 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) return false; + if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) + return false; + // Check that the chain of IV operands leading back to Phi can be hoisted. SmallVector IVIncs; for(;;) { diff --git a/test/Transforms/IndVarSimplify/pr24804.ll b/test/Transforms/IndVarSimplify/pr24804.ll new file mode 100644 index 00000000000..6f89481853a --- /dev/null +++ b/test/Transforms/IndVarSimplify/pr24804.ll @@ -0,0 +1,25 @@ +; RUN: opt -indvars -loop-idiom -loop-deletion -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Checking for a crash + +define void @f(i32* %a) { +; CHECK-LABEL: @f( +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %for.cond, %entry + %iv = phi i32 [ 0, %entry ], [ %add, %for.inc ], [ %iv, %for.cond ] + %add = add nsw i32 %iv, 1 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom + br i1 undef, label %for.cond, label %for.inc + +for.inc: ; preds = %for.cond + br i1 undef, label %for.cond, label %for.end + +for.end: ; preds = %for.inc + ret void +} From 01feb75c536aa97a69acf021a01fdd160a11475d Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 8 Dec 2015 00:13:21 +0000 Subject: [PATCH 0009/1132] [IndVars] Have getInsertPointForUses preserve LCSSA Summary: Also add a stricter post-condition for IndVarSimplify. Fixes PR25578. Test case by Michael Zolotukhin. Reviewers: hfinkel, atrick, mzolotukhin Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D15059 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254977 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/LoopInfo.h | 3 ++ lib/Analysis/LoopInfo.cpp | 9 +++++ lib/Transforms/Scalar/IndVarSimplify.cpp | 42 ++++++++++++++------- test/Transforms/IndVarSimplify/pr25578.ll | 45 +++++++++++++++++++++++ 4 files changed, 85 insertions(+), 14 deletions(-) create mode 100644 test/Transforms/IndVarSimplify/pr25578.ll diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 84425daf6ed..8a17d5dce1a 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -402,6 +402,9 @@ class Loop : public LoopBase { /// isLCSSAForm - Return true if the Loop is in LCSSA form bool isLCSSAForm(DominatorTree &DT) const; + /// \brief Return true if this Loop and all inner subloops are in LCSSA form. + bool isRecursivelyLCSSAForm(DominatorTree &DT) const; + /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 9ee72361c92..33b4d6794f8 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -193,6 +193,15 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const { return true; } +bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const { + if (!isLCSSAForm(DT)) + return false; + + return std::all_of(begin(), end(), [&](const Loop *L) { + return L->isRecursivelyLCSSAForm(DT); + }); +} + /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 3dd3cfd4187..b86fd7edc4d 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -50,6 +50,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; @@ -215,7 +216,7 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { /// loop. For PHI nodes, there may be multiple uses, so compute the nearest /// common dominator for the incoming blocks. static Instruction *getInsertPointForUses(Instruction *User, Value *Def, - DominatorTree *DT) { + DominatorTree *DT, LoopInfo *LI) { PHINode *PHI = dyn_cast(User); if (!PHI) return User; @@ -234,10 +235,21 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def, InsertPt = InsertBB->getTerminator(); } assert(InsertPt && "Missing phi operand"); - assert((!isa(Def) || - DT->dominates(cast(Def), InsertPt)) && - "def does not dominate all uses"); - return InsertPt; + + auto *DefI = dyn_cast(Def); + if (!DefI) + return InsertPt; + + assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); + + auto *L = LI->getLoopFor(DefI->getParent()); + assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); + + for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) + if (LI->getLoopFor(DTN->getBlock()) == L) + return DTN->getBlock()->getTerminator(); + + llvm_unreachable("DefI dominates InsertPt!"); } //===----------------------------------------------------------------------===// @@ -528,8 +540,8 @@ Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { - // Verify the input to the pass in already in LCSSA form. - assert(L->isLCSSAForm(*DT)); + // Check a pre-condition. + assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!"); SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -1177,10 +1189,11 @@ const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) { /// This IV user cannot be widen. Replace this use of the original narrow IV /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. -static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) { +static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " << *DU.NarrowUse << "\n"); - IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + IRBuilder<> Builder( + getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI)); Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); } @@ -1217,7 +1230,8 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { assert (CastWidth <= IVWidth && "Unexpected width while widening compare."); // Widen the compare instruction. - IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + IRBuilder<> Builder( + getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI)); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); // Widen the other operand of the compare, if necessary. @@ -1239,7 +1253,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // After SimplifyCFG most loop exit targets have a single predecessor. // Otherwise fall back to a truncate within the loop. if (UsePhi->getNumOperands() != 1) - truncateIVUse(DU, DT); + truncateIVUse(DU, DT, LI); else { PHINode *WidePhi = PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", @@ -1307,7 +1321,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - truncateIVUse(DU, DT); + truncateIVUse(DU, DT, LI); return nullptr; } // Assume block terminators cannot evaluate to a recurrence. We can't to @@ -2175,9 +2189,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader(), TLI); + // Check a post-condition. - assert(L->isLCSSAForm(*DT) && - "Indvars did not leave the loop in lcssa form!"); + assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!"); // Verify that LFTR, and any other change have not interfered with SCEV's // ability to compute trip count. diff --git a/test/Transforms/IndVarSimplify/pr25578.ll b/test/Transforms/IndVarSimplify/pr25578.ll new file mode 100644 index 00000000000..bc648b517bb --- /dev/null +++ b/test/Transforms/IndVarSimplify/pr25578.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -indvars -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK-LABEL: @foo +define void @foo() { +entry: + br label %L1_header + +L1_header: + br label %L2_header + +; CHECK: L2_header: +; CHECK: %[[INDVAR:.*]] = phi i64 +; CHECK: %[[TRUNC:.*]] = trunc i64 %[[INDVAR]] to i32 +L2_header: + %i = phi i32 [ 0, %L1_header ], [ %i_next, %L2_latch ] + %i_prom = sext i32 %i to i64 + br label %L3_header + +L3_header: + br i1 undef, label %L3_latch, label %L2_exiting_1 + +L3_latch: + br i1 undef, label %L3_header, label %L2_exiting_2 + +L2_exiting_1: + br i1 undef, label %L2_latch, label %L1_latch + +L2_exiting_2: + br i1 undef, label %L2_latch, label %L1_latch + +L2_latch: + %i_next = add nsw i32 %i, 1 + br label %L2_header + +L1_latch: +; CHECK: L1_latch: +; CHECK: %i_lcssa = phi i32 [ %[[TRUNC]], %L2_exiting_1 ], [ %[[TRUNC]], %L2_exiting_2 ] + + %i_lcssa = phi i32 [ %i, %L2_exiting_1 ], [ %i, %L2_exiting_2 ] + br i1 undef, label %exit, label %L1_header + +exit: + ret void +} From be313a72b8499cdbae947336b26aad747705af34 Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Mon, 14 Dec 2015 13:26:06 -0800 Subject: [PATCH 0010/1132] Revert "[IndVars] Have getInsertPointForUses preserve LCSSA" This reverts commit 01feb75c536aa97a69acf021a01fdd160a11475d. --- include/llvm/Analysis/LoopInfo.h | 3 -- lib/Analysis/LoopInfo.cpp | 9 ----- lib/Transforms/Scalar/IndVarSimplify.cpp | 42 +++++++-------------- test/Transforms/IndVarSimplify/pr25578.ll | 45 ----------------------- 4 files changed, 14 insertions(+), 85 deletions(-) delete mode 100644 test/Transforms/IndVarSimplify/pr25578.ll diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 8a17d5dce1a..84425daf6ed 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -402,9 +402,6 @@ class Loop : public LoopBase { /// isLCSSAForm - Return true if the Loop is in LCSSA form bool isLCSSAForm(DominatorTree &DT) const; - /// \brief Return true if this Loop and all inner subloops are in LCSSA form. - bool isRecursivelyLCSSAForm(DominatorTree &DT) const; - /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 33b4d6794f8..9ee72361c92 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -193,15 +193,6 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const { return true; } -bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const { - if (!isLCSSAForm(DT)) - return false; - - return std::all_of(begin(), end(), [&](const Loop *L) { - return L->isRecursivelyLCSSAForm(DT); - }); -} - /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index b86fd7edc4d..3dd3cfd4187 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -50,7 +50,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; @@ -216,7 +215,7 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { /// loop. For PHI nodes, there may be multiple uses, so compute the nearest /// common dominator for the incoming blocks. static Instruction *getInsertPointForUses(Instruction *User, Value *Def, - DominatorTree *DT, LoopInfo *LI) { + DominatorTree *DT) { PHINode *PHI = dyn_cast(User); if (!PHI) return User; @@ -235,21 +234,10 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def, InsertPt = InsertBB->getTerminator(); } assert(InsertPt && "Missing phi operand"); - - auto *DefI = dyn_cast(Def); - if (!DefI) - return InsertPt; - - assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); - - auto *L = LI->getLoopFor(DefI->getParent()); - assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); - - for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) - if (LI->getLoopFor(DTN->getBlock()) == L) - return DTN->getBlock()->getTerminator(); - - llvm_unreachable("DefI dominates InsertPt!"); + assert((!isa(Def) || + DT->dominates(cast(Def), InsertPt)) && + "def does not dominate all uses"); + return InsertPt; } //===----------------------------------------------------------------------===// @@ -540,8 +528,8 @@ Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { - // Check a pre-condition. - assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!"); + // Verify the input to the pass in already in LCSSA form. + assert(L->isLCSSAForm(*DT)); SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -1189,11 +1177,10 @@ const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) { /// This IV user cannot be widen. Replace this use of the original narrow IV /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. -static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { +static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) { DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " << *DU.NarrowUse << "\n"); - IRBuilder<> Builder( - getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI)); + IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); } @@ -1230,8 +1217,7 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { assert (CastWidth <= IVWidth && "Unexpected width while widening compare."); // Widen the compare instruction. - IRBuilder<> Builder( - getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI)); + IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); // Widen the other operand of the compare, if necessary. @@ -1253,7 +1239,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // After SimplifyCFG most loop exit targets have a single predecessor. // Otherwise fall back to a truncate within the loop. if (UsePhi->getNumOperands() != 1) - truncateIVUse(DU, DT, LI); + truncateIVUse(DU, DT); else { PHINode *WidePhi = PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", @@ -1321,7 +1307,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - truncateIVUse(DU, DT, LI); + truncateIVUse(DU, DT); return nullptr; } // Assume block terminators cannot evaluate to a recurrence. We can't to @@ -2189,9 +2175,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader(), TLI); - // Check a post-condition. - assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!"); + assert(L->isLCSSAForm(*DT) && + "Indvars did not leave the loop in lcssa form!"); // Verify that LFTR, and any other change have not interfered with SCEV's // ability to compute trip count. diff --git a/test/Transforms/IndVarSimplify/pr25578.ll b/test/Transforms/IndVarSimplify/pr25578.ll deleted file mode 100644 index bc648b517bb..00000000000 --- a/test/Transforms/IndVarSimplify/pr25578.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: opt < %s -indvars -S | FileCheck %s -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" - -; CHECK-LABEL: @foo -define void @foo() { -entry: - br label %L1_header - -L1_header: - br label %L2_header - -; CHECK: L2_header: -; CHECK: %[[INDVAR:.*]] = phi i64 -; CHECK: %[[TRUNC:.*]] = trunc i64 %[[INDVAR]] to i32 -L2_header: - %i = phi i32 [ 0, %L1_header ], [ %i_next, %L2_latch ] - %i_prom = sext i32 %i to i64 - br label %L3_header - -L3_header: - br i1 undef, label %L3_latch, label %L2_exiting_1 - -L3_latch: - br i1 undef, label %L3_header, label %L2_exiting_2 - -L2_exiting_1: - br i1 undef, label %L2_latch, label %L1_latch - -L2_exiting_2: - br i1 undef, label %L2_latch, label %L1_latch - -L2_latch: - %i_next = add nsw i32 %i, 1 - br label %L2_header - -L1_latch: -; CHECK: L1_latch: -; CHECK: %i_lcssa = phi i32 [ %[[TRUNC]], %L2_exiting_1 ], [ %[[TRUNC]], %L2_exiting_2 ] - - %i_lcssa = phi i32 [ %i, %L2_exiting_1 ], [ %i, %L2_exiting_2 ] - br i1 undef, label %exit, label %L1_header - -exit: - ret void -} From 8a04a15978ac4e59c988baf34c6372faf333de62 Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Mon, 14 Dec 2015 13:26:19 -0800 Subject: [PATCH 0011/1132] Revert "[SCEVExpander] Have hoistIVInc preserve LCSSA" This reverts commit 28416c564303a7dbcb7edd3728fdc862b07bac03. --- include/llvm/Analysis/LoopInfo.h | 73 ----------------------- lib/Analysis/ScalarEvolutionExpander.cpp | 3 - test/Transforms/IndVarSimplify/pr24804.ll | 25 -------- 3 files changed, 101 deletions(-) delete mode 100644 test/Transforms/IndVarSimplify/pr24804.ll diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 84425daf6ed..19ebabf6d40 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -37,7 +37,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include @@ -684,78 +683,6 @@ class LoopInfo : public LoopInfoBase { // it as a replacement will not break LCSSA form. return ToLoop->contains(getLoopFor(From->getParent())); } - - /// \brief Checks if moving a specific instruction can break LCSSA in any - /// loop. - /// - /// Return true if moving \p Inst to before \p NewLoc will break LCSSA, - /// assuming that the function containing \p Inst and \p NewLoc is currently - /// in LCSSA form. - bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc) { - assert(Inst->getFunction() == NewLoc->getFunction() && - "Can't reason about IPO!"); - - auto *OldBB = Inst->getParent(); - auto *NewBB = NewLoc->getParent(); - - // Movement within the same loop does not break LCSSA (the equality check is - // to avoid doing a hashtable lookup in case of intra-block movement). - if (OldBB == NewBB) - return true; - - auto *OldLoop = getLoopFor(OldBB); - auto *NewLoop = getLoopFor(NewBB); - - if (OldLoop == NewLoop) - return true; - - // Check if Outer contains Inner; with the null loop counting as the - // "outermost" loop. - auto Contains = [](const Loop *Outer, const Loop *Inner) { - return !Outer || Outer->contains(Inner); - }; - - // To check that the movement of Inst to before NewLoc does not break LCSSA, - // we need to check two sets of uses for possible LCSSA violations at - // NewLoc: the users of NewInst, and the operands of NewInst. - - // If we know we're hoisting Inst out of an inner loop to an outer loop, - // then the uses *of* Inst don't need to be checked. - - if (!Contains(NewLoop, OldLoop)) { - for (Use &U : Inst->uses()) { - auto *UI = cast(U.getUser()); - auto *UBB = isa(UI) ? cast(UI)->getIncomingBlock(U) - : UI->getParent(); - if (UBB != NewBB && getLoopFor(UBB) != NewLoop) - return false; - } - } - - // If we know we're sinking Inst from an outer loop into an inner loop, then - // the *operands* of Inst don't need to be checked. - - if (!Contains(OldLoop, NewLoop)) { - // See below on why we can't handle phi nodes here. - if (isa(Inst)) - return false; - - for (Use &U : Inst->operands()) { - auto *DefI = dyn_cast(U.get()); - if (!DefI) - return false; - - // This would need adjustment if we allow Inst to be a phi node -- the - // new use block won't simply be NewBB. - - auto *DefBlock = DefI->getParent(); - if (DefBlock != NewBB && getLoopFor(DefBlock) != NewLoop) - return false; - } - } - - return true; - } }; // Allow clients to walk the list of nested loops... diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 01df859d037..d763ca4c132 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -944,9 +944,6 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) return false; - if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) - return false; - // Check that the chain of IV operands leading back to Phi can be hoisted. SmallVector IVIncs; for(;;) { diff --git a/test/Transforms/IndVarSimplify/pr24804.ll b/test/Transforms/IndVarSimplify/pr24804.ll deleted file mode 100644 index 6f89481853a..00000000000 --- a/test/Transforms/IndVarSimplify/pr24804.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: opt -indvars -loop-idiom -loop-deletion -S < %s | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Checking for a crash - -define void @f(i32* %a) { -; CHECK-LABEL: @f( -entry: - br label %for.cond - -for.cond: ; preds = %for.inc, %for.cond, %entry - %iv = phi i32 [ 0, %entry ], [ %add, %for.inc ], [ %iv, %for.cond ] - %add = add nsw i32 %iv, 1 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom - br i1 undef, label %for.cond, label %for.inc - -for.inc: ; preds = %for.cond - br i1 undef, label %for.cond, label %for.end - -for.end: ; preds = %for.inc - ret void -} From 8416ea0250d14a15bb67417b6e5134a80440f033 Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Mon, 14 Dec 2015 13:26:20 -0800 Subject: [PATCH 0012/1132] Revert "Add Instruction::getFunction; NFC" This reverts commit ef2471f98e7f89cf66ec30b8d6a61eb5e638c6c9. --- include/llvm/IR/Instruction.h | 7 ------- lib/IR/Instruction.cpp | 5 ----- 2 files changed, 12 deletions(-) diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index c356c4fb5ad..581ac09cf0c 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -65,13 +65,6 @@ class Instruction : public User, public ilist_node { const Module *getModule() const; Module *getModule(); - /// \brief Return the function this instruction belongs to. - /// - /// Note: it is undefined behavior to call this on an instruction not - /// currently inserted into a function. - const Function *getFunction() const; - Function *getFunction(); - /// removeFromParent - This method unlinks 'this' from the containing basic /// block, but does not delete it. /// diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index 7bd50328b12..b5a30a4969b 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -62,11 +62,6 @@ Module *Instruction::getModule() { return getParent()->getModule(); } -Function *Instruction::getFunction() { return getParent()->getParent(); } - -const Function *Instruction::getFunction() const { - return getParent()->getParent(); -} void Instruction::removeFromParent() { getParent()->getInstList().remove(getIterator()); From a710e5e6bde669a4542045f97727cfd2d0acf332 Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Mon, 14 Dec 2015 13:26:21 -0800 Subject: [PATCH 0013/1132] Revert "[PR25538]: Fix a failure caused by r253126." This reverts commit 74aaeb9e09a8f1a27af8437b2f3770ad547c9786. --- lib/Transforms/Utils/LoopUnroll.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 44dde1b51cf..3999989ec89 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -550,7 +550,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { - bool Simplified = simplifyLoop(OuterL, DT, LI, PP, SE, AC); + simplifyLoop(OuterL, DT, LI, PP, SE, AC); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after @@ -560,7 +560,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified)) + if (CompletelyUnroll && !AllExitsAreInsideParentLoop) formLCSSARecursively(*OuterL, *DT, LI, SE); else assert(OuterL->isLCSSAForm(*DT) && From 3ebdbb2c7e5ce577363994fd0aa0f8409bc68490 Mon Sep 17 00:00:00 2001 From: Michael Zolotukhin Date: Mon, 14 Dec 2015 13:26:23 -0800 Subject: [PATCH 0014/1132] Revert "Don't recompute LCSSA after loop-unrolling when possible." This reverts commit cce515504daf19f040560b871f6ab3fc84154168. --- lib/Transforms/Utils/LoopUnroll.cpp | 12 +- test/Transforms/LoopUnroll/rebuild_lcssa.ll | 119 -------------------- 2 files changed, 1 insertion(+), 130 deletions(-) delete mode 100644 test/Transforms/LoopUnroll/rebuild_lcssa.ll diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 3999989ec89..ad4c388e406 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -221,12 +221,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; - SmallVector ExitBlocks; - L->getExitBlocks(ExitBlocks); - Loop *ParentL = L->getParentLoop(); - bool AllExitsAreInsideParentLoop = !ParentL || - std::all_of(ExitBlocks.begin(), ExitBlocks.end(), - [&](BasicBlock *BB) { return ParentL->contains(BB); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime @@ -560,11 +554,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - if (CompletelyUnroll && !AllExitsAreInsideParentLoop) - formLCSSARecursively(*OuterL, *DT, LI, SE); - else - assert(OuterL->isLCSSAForm(*DT) && - "Loops should be in LCSSA form after loop-unroll."); + formLCSSARecursively(*OuterL, *DT, LI, SE); } } diff --git a/test/Transforms/LoopUnroll/rebuild_lcssa.ll b/test/Transforms/LoopUnroll/rebuild_lcssa.ll deleted file mode 100644 index 49498492344..00000000000 --- a/test/Transforms/LoopUnroll/rebuild_lcssa.ll +++ /dev/null @@ -1,119 +0,0 @@ -; RUN: opt < %s -loop-unroll -S | FileCheck %s -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" - -; This test shows how unrolling an inner loop could break LCSSA for an outer -; loop, and there is no cheap way to recover it. -; -; In this case the inner loop, L3, is being unrolled. It only runs one -; iteration, so unrolling basically means replacing -; br i1 true, label %exit, label %L3_header -; with -; br label %exit -; -; However, this change messes up the loops structure: for instance, block -; L3_body no longer belongs to L2. It becomes an exit block for L2, so LCSSA -; phis for definitions in L2 should now be placed there. In particular, we need -; to insert such a definition for %y1. - -; CHECK-LABEL: @foo1 -define void @foo1() { -entry: - br label %L1_header - -L1_header: - br label %L2_header - -L2_header: - %y1 = phi i64 [ undef, %L1_header ], [ %x.lcssa, %L2_latch ] - br label %L3_header - -L3_header: - %y2 = phi i64 [ 0, %L3_latch ], [ %y1, %L2_header ] - %x = add i64 undef, -1 - br i1 true, label %L2_latch, label %L3_body - -L2_latch: - %x.lcssa = phi i64 [ %x, %L3_header ] - br label %L2_header - -; CHECK: L3_body: -; CHECK-NEXT: %y1.lcssa = phi i64 [ %y1, %L3_header ] -L3_body: - store i64 %y1, i64* undef - br i1 false, label %L3_latch, label %L1_latch - -L3_latch: - br i1 true, label %exit, label %L3_header - -L1_latch: - %y.lcssa = phi i64 [ %y2, %L3_body ] - br label %L1_header - -exit: - ret void -} - -; Additional tests for some corner cases. -; -; CHECK-LABEL: @foo2 -define void @foo2() { -entry: - br label %L1_header - -L1_header: - br label %L2_header - -L2_header: - %a = phi i64 [ undef, %L1_header ], [ %dec_us, %L3_header ] - br label %L3_header - -L3_header: - %b = phi i64 [ 0, %L3_latch ], [ %a, %L2_header ] - %dec_us = add i64 undef, -1 - br i1 true, label %L2_header, label %L3_break_to_L1 - -; CHECK: L3_break_to_L1: -; CHECK-NEXT: %a.lcssa = phi i64 [ %a, %L3_header ] -L3_break_to_L1: - br i1 false, label %L3_latch, label %L1_latch - -L1_latch: - %b_lcssa = phi i64 [ %b, %L3_break_to_L1 ] - br label %L1_header - -L3_latch: - br i1 true, label %Exit, label %L3_header - -Exit: - ret void -} - -; CHECK-LABEL: @foo3 -define void @foo3() { -entry: - br label %L1_header - -L1_header: - %a = phi i8* [ %b, %L1_latch ], [ null, %entry ] - br i1 undef, label %L2_header, label %L1_latch - -L2_header: - br i1 undef, label %L2_latch, label %L1_latch - -; CHECK: L2_latch: -; CHECK-NEXT: %a.lcssa = phi i8* [ %a, %L2_header ] -L2_latch: - br i1 true, label %L2_exit, label %L2_header - -L1_latch: - %b = phi i8* [ undef, %L1_header ], [ null, %L2_header ] - br label %L1_header - -L2_exit: - %a_lcssa1 = phi i8* [ %a, %L2_latch ] - br label %Exit - -Exit: - %a_lcssa2 = phi i8* [ %a_lcssa1, %L2_exit ] - ret void -} From f95d47afa74ff9cc94fb6a62158328cd4023a20a Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 20 Jan 2016 18:57:48 +0000 Subject: [PATCH 0015/1132] Accept subtractions involving a weak symbol. When a symbol S shows up in an expression in assembly there are two possible interpretations * The expression is referring to the value of S in this file. * The expression is referring to the value after symbol resolution. In the first case the assembler can reason about the value and try to produce a relocation. In the second case, that is only possible if the symbol cannot be preempted. Assemblers are not very consistent about which interpretation gets used. This changes MC to agree with GAS in the case of an expression of the form "Sym - WeakSym". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258329 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/ELFObjectWriter.cpp | 4 ---- test/MC/ELF/relocation.s | 6 ++++++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 230d3d775b1..7890c728818 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -649,10 +649,6 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, Asm.getContext().reportFatalError( Fixup.getLoc(), "Cannot represent a difference across sections"); - if (::isWeak(SymB)) - Asm.getContext().reportFatalError( - Fixup.getLoc(), "Cannot represent a subtraction with a weak symbol"); - uint64_t SymBOffset = Layout.getSymbolOffset(SymB); uint64_t K = SymBOffset - FixupOffset; IsPCRel = true; diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s index 0fec7679281..e0313904563 100644 --- a/test/MC/ELF/relocation.s +++ b/test/MC/ELF/relocation.s @@ -63,6 +63,11 @@ pr24486: .code16 call pr23771 + .weak weak_sym +weak_sym: + .long pr23272-weak_sym + + // CHECK: Section { // CHECK: Name: .rela.text // CHECK: Relocations [ @@ -101,5 +106,6 @@ pr24486: // CHECK-NEXT: 0xDC R_X86_64_PLT32 foo 0x0 // CHECK-NEXT: 0xF0 R_X86_64_32 .text 0xF0 // CHECK-NEXT: 0xF5 R_X86_64_PC16 pr23771 0xFFFFFFFFFFFFFFFE +// CHECK-NEXT: 0xF7 R_X86_64_PC32 pr23272 0x0 // CHECK-NEXT: ] // CHECK-NEXT: } From b140d3b6fc9b18a2cc9091af1cedc048fdd56c98 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Sat, 14 Nov 2015 00:16:15 +0000 Subject: [PATCH 0016/1132] [RuntimeDyld] Fix indentation and whitespace; NFC Whitespace-only change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253105 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../RuntimeDyld/RuntimeDyldELF.cpp | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 99cec0edcd5..5358f58e622 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1638,37 +1638,38 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( StubMap::const_iterator i = Stubs.find(Value); uintptr_t StubAddress; if (i != Stubs.end()) { - StubAddress = uintptr_t(Section.Address) + i->second; - DEBUG(dbgs() << " Stub function found\n"); + StubAddress = uintptr_t(Section.Address) + i->second; + DEBUG(dbgs() << " Stub function found\n"); } else { - // Create a new stub function (equivalent to a PLT entry). - DEBUG(dbgs() << " Create a new stub function\n"); + // Create a new stub function (equivalent to a PLT entry). + DEBUG(dbgs() << " Create a new stub function\n"); - uintptr_t BaseAddress = uintptr_t(Section.Address); - uintptr_t StubAlignment = getStubAlignment(); - StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) & - -StubAlignment; - unsigned StubOffset = StubAddress - BaseAddress; - Stubs[Value] = StubOffset; - createStubFunction((uint8_t *)StubAddress); + uintptr_t BaseAddress = uintptr_t(Section.Address); + uintptr_t StubAlignment = getStubAlignment(); + StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) & + -StubAlignment; + unsigned StubOffset = StubAddress - BaseAddress; + Stubs[Value] = StubOffset; + createStubFunction((uint8_t *)StubAddress); - // Bump our stub offset counter - Section.StubOffset = StubOffset + getMaxStubSize(); + // Bump our stub offset counter + Section.StubOffset = StubOffset + getMaxStubSize(); - // Allocate a GOT Entry - uint64_t GOTOffset = allocateGOTEntries(SectionID, 1); + // Allocate a GOT Entry + uint64_t GOTOffset = allocateGOTEntries(SectionID, 1); - // The load of the GOT address has an addend of -4 - resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4); + // The load of the GOT address has an addend of -4 + resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4); - // Fill in the value of the symbol we're targeting into the GOT - addRelocationForSymbol(computeGOTOffsetRE(SectionID,GOTOffset,0,ELF::R_X86_64_64), - Value.SymbolName); + // Fill in the value of the symbol we're targeting into the GOT + addRelocationForSymbol( + computeGOTOffsetRE(SectionID, GOTOffset, 0, ELF::R_X86_64_64), + Value.SymbolName); } // Make the target call a call into the stub table. resolveRelocation(Section, Offset, StubAddress, ELF::R_X86_64_PC32, - Addend); + Addend); } else { RelocationEntry RE(SectionID, Offset, ELF::R_X86_64_PC32, Value.Addend, Value.Offset); From 3712f0932616be38fa33253fa764585169c48e66 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 23 Nov 2015 21:47:41 +0000 Subject: [PATCH 0017/1132] [RuntimeDyld] Add accessors to `SectionEntry`; NFC Summary: Remove naked access to the data members in `SectionEntry` and route accesses through accessor functions. This makes it obvious how the instances of the class are used, and will also facilitate adding bounds checking to `advanceStubOffset` in a later change. Reviewers: lhames, loladiro, andrew.w.kaylor Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D14674 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253918 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../RuntimeDyld/RuntimeDyld.cpp | 39 ++- .../RuntimeDyld/RuntimeDyldChecker.cpp | 23 +- .../RuntimeDyld/RuntimeDyldELF.cpp | 259 ++++++++++-------- .../RuntimeDyld/RuntimeDyldImpl.h | 31 ++- .../RuntimeDyld/RuntimeDyldMachO.cpp | 33 ++- .../RuntimeDyld/RuntimeDyldMachO.h | 2 +- .../RuntimeDyld/Targets/RuntimeDyldCOFFI386.h | 16 +- .../Targets/RuntimeDyldCOFFX86_64.h | 12 +- .../Targets/RuntimeDyldMachOAArch64.h | 14 +- .../RuntimeDyld/Targets/RuntimeDyldMachOARM.h | 30 +- .../Targets/RuntimeDyldMachOI386.h | 10 +- .../Targets/RuntimeDyldMachOX86_64.h | 20 +- 12 files changed, 266 insertions(+), 223 deletions(-) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index f24a49e8b29..adf24a87e41 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -41,20 +41,21 @@ void RuntimeDyldImpl::deregisterEHFrames() {} #ifndef NDEBUG static void dumpSectionMemory(const SectionEntry &S, StringRef State) { - dbgs() << "----- Contents of section " << S.Name << " " << State << " -----"; + dbgs() << "----- Contents of section " << S.getName() << " " << State + << " -----"; - if (S.Address == nullptr) { + if (S.getAddress() == nullptr) { dbgs() << "\n
\n"; return; } const unsigned ColsPerRow = 16; - uint8_t *DataAddr = S.Address; - uint64_t LoadAddr = S.LoadAddress; + uint8_t *DataAddr = S.getAddress(); + uint64_t LoadAddr = S.getLoadAddress(); unsigned StartPadding = LoadAddr & (ColsPerRow - 1); - unsigned BytesRemaining = S.Size; + unsigned BytesRemaining = S.getSize(); if (StartPadding) { dbgs() << "\n" << format("0x%016" PRIx64, @@ -97,7 +98,7 @@ void RuntimeDyldImpl::resolveRelocations() { // symbol for the relocation is located. The SectionID in the relocation // entry provides the section to which the relocation will be applied. int Idx = it->getFirst(); - uint64_t Addr = Sections[Idx].LoadAddress; + uint64_t Addr = Sections[Idx].getLoadAddress(); DEBUG(dbgs() << "Resolving relocations Section #" << Idx << "\t" << format("%p", (uintptr_t)Addr) << "\n"); resolveRelocationList(it->getSecond(), Addr); @@ -116,7 +117,7 @@ void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress) { MutexGuard locked(lock); for (unsigned i = 0, e = Sections.size(); i != e; ++i) { - if (Sections[i].Address == LocalAddress) { + if (Sections[i].getAddress() == LocalAddress) { reassignSectionAddress(i, TargetAddress); return; } @@ -778,11 +779,11 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID, // Addr is a uint64_t because we can't assume the pointer width // of the target is the same as that of the host. Just use a generic // "big enough" type. - DEBUG(dbgs() << "Reassigning address for section " - << SectionID << " (" << Sections[SectionID].Name << "): " - << format("0x%016" PRIx64, Sections[SectionID].LoadAddress) << " -> " - << format("0x%016" PRIx64, Addr) << "\n"); - Sections[SectionID].LoadAddress = Addr; + DEBUG(dbgs() << "Reassigning address for section " << SectionID << " (" + << Sections[SectionID].getName() << "): " + << format("0x%016" PRIx64, Sections[SectionID].getLoadAddress()) + << " -> " << format("0x%016" PRIx64, Addr) << "\n"); + Sections[SectionID].setLoadAddress(Addr); } void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, @@ -790,7 +791,7 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { const RelocationEntry &RE = Relocs[i]; // Ignore relocations for sections that were not loaded - if (Sections[RE.SectionID].Address == nullptr) + if (Sections[RE.SectionID].getAddress() == nullptr) continue; resolveRelocation(RE, Value); } @@ -856,17 +857,9 @@ void RuntimeDyldImpl::resolveExternalSymbols() { uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress( const object::SectionRef &Sec) const { -// llvm::dbgs() << "Searching for " << Sec.getRawDataRefImpl() << " in:\n"; -// for (auto E : ObjSecToIDMap) -// llvm::dbgs() << "Added: " << E.first.getRawDataRefImpl() << " -> " << E.second << "\n"; - auto I = ObjSecToIDMap.find(Sec); - if (I != ObjSecToIDMap.end()) { -// llvm::dbgs() << "Found ID " << I->second << " for Sec: " << Sec.getRawDataRefImpl() << ", LoadAddress = " << RTDyld.Sections[I->second].LoadAddress << "\n"; - return RTDyld.Sections[I->second].LoadAddress; - } else { -// llvm::dbgs() << "Not found.\n"; - } + if (I != ObjSecToIDMap.end()) + return RTDyld.Sections[I->second].getLoadAddress(); return 0; } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index 18a555ae206..58ce88a68f2 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -799,11 +799,10 @@ std::pair RuntimeDyldCheckerImpl::getSectionAddr( unsigned SectionID = SectionInfo->SectionID; uint64_t Addr; if (IsInsideLoad) - Addr = - static_cast( - reinterpret_cast(getRTDyld().Sections[SectionID].Address)); + Addr = static_cast(reinterpret_cast( + getRTDyld().Sections[SectionID].getAddress())); else - Addr = getRTDyld().Sections[SectionID].LoadAddress; + Addr = getRTDyld().Sections[SectionID].getLoadAddress(); return std::make_pair(Addr, std::string("")); } @@ -835,11 +834,11 @@ std::pair RuntimeDyldCheckerImpl::getStubAddrFor( uint64_t Addr; if (IsInsideLoad) { - uintptr_t SectionBase = - reinterpret_cast(getRTDyld().Sections[SectionID].Address); + uintptr_t SectionBase = reinterpret_cast( + getRTDyld().Sections[SectionID].getAddress()); Addr = static_cast(SectionBase) + StubOffset; } else { - uint64_t SectionBase = getRTDyld().Sections[SectionID].LoadAddress; + uint64_t SectionBase = getRTDyld().Sections[SectionID].getLoadAddress(); Addr = SectionBase + StubOffset; } @@ -855,16 +854,16 @@ RuntimeDyldCheckerImpl::getSubsectionStartingAt(StringRef Name) const { const auto &SymInfo = pos->second; uint8_t *SectionAddr = getRTDyld().getSectionAddress(SymInfo.getSectionID()); return StringRef(reinterpret_cast(SectionAddr) + - SymInfo.getOffset(), - getRTDyld().Sections[SymInfo.getSectionID()].Size - - SymInfo.getOffset()); + SymInfo.getOffset(), + getRTDyld().Sections[SymInfo.getSectionID()].getSize() - + SymInfo.getOffset()); } void RuntimeDyldCheckerImpl::registerSection( StringRef FilePath, unsigned SectionID) { StringRef FileName = sys::path::filename(FilePath); const SectionEntry &Section = getRTDyld().Sections[SectionID]; - StringRef SectionName = Section.Name; + StringRef SectionName = Section.getName(); Stubs[FileName][SectionName].SectionID = SectionID; } @@ -874,7 +873,7 @@ void RuntimeDyldCheckerImpl::registerStubMap( const RuntimeDyldImpl::StubMap &RTDyldStubs) { StringRef FileName = sys::path::filename(FilePath); const SectionEntry &Section = getRTDyld().Sections[SectionID]; - StringRef SectionName = Section.Name; + StringRef SectionName = Section.getName(); Stubs[FileName][SectionName].SectionID = SectionID; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 5358f58e622..00c91b506f1 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -198,9 +198,9 @@ RuntimeDyldELF::~RuntimeDyldELF() {} void RuntimeDyldELF::registerEHFrames() { for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) { SID EHFrameSID = UnregisteredEHFrameSections[i]; - uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; - uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; - size_t EHFrameSize = Sections[EHFrameSID].Size; + uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress(); + uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress(); + size_t EHFrameSize = Sections[EHFrameSID].getSize(); MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); RegisteredEHFrameSections.push_back(EHFrameSID); } @@ -210,9 +210,9 @@ void RuntimeDyldELF::registerEHFrames() { void RuntimeDyldELF::deregisterEHFrames() { for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) { SID EHFrameSID = RegisteredEHFrameSections[i]; - uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; - uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; - size_t EHFrameSize = Sections[EHFrameSID].Size; + uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress(); + uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress(); + size_t EHFrameSize = Sections[EHFrameSID].getSize(); MemMgr.deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); } RegisteredEHFrameSections.clear(); @@ -232,9 +232,10 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, llvm_unreachable("Relocation type not implemented yet!"); break; case ELF::R_X86_64_64: { - support::ulittle64_t::ref(Section.Address + Offset) = Value + Addend; + support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = + Value + Addend; DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at " - << format("%p\n", Section.Address + Offset)); + << format("%p\n", Section.getAddressWithOffset(Offset))); break; } case ELF::R_X86_64_32: @@ -244,31 +245,34 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, (Type == ELF::R_X86_64_32S && ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN))); uint32_t TruncatedAddr = (Value & 0xFFFFFFFF); - support::ulittle32_t::ref(Section.Address + Offset) = TruncatedAddr; + support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = + TruncatedAddr; DEBUG(dbgs() << "Writing " << format("%p", TruncatedAddr) << " at " - << format("%p\n", Section.Address + Offset)); + << format("%p\n", Section.getAddressWithOffset(Offset))); break; } case ELF::R_X86_64_PC8: { - uint64_t FinalAddress = Section.LoadAddress + Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int64_t RealOffset = Value + Addend - FinalAddress; assert(isInt<8>(RealOffset)); int8_t TruncOffset = (RealOffset & 0xFF); - Section.Address[Offset] = TruncOffset; + Section.getAddress()[Offset] = TruncOffset; break; } case ELF::R_X86_64_PC32: { - uint64_t FinalAddress = Section.LoadAddress + Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int64_t RealOffset = Value + Addend - FinalAddress; assert(isInt<32>(RealOffset)); int32_t TruncOffset = (RealOffset & 0xFFFFFFFF); - support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset; + support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = + TruncOffset; break; } case ELF::R_X86_64_PC64: { - uint64_t FinalAddress = Section.LoadAddress + Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int64_t RealOffset = Value + Addend - FinalAddress; - support::ulittle64_t::ref(Section.Address + Offset) = RealOffset; + support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = + RealOffset; break; } } @@ -279,13 +283,16 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section, uint32_t Type, int32_t Addend) { switch (Type) { case ELF::R_386_32: { - support::ulittle32_t::ref(Section.Address + Offset) = Value + Addend; + support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = + Value + Addend; break; } case ELF::R_386_PC32: { - uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF); + uint32_t FinalAddress = + Section.getLoadAddressWithOffset(Offset) & 0xFFFFFFFF; uint32_t RealOffset = Value + Addend - FinalAddress; - support::ulittle32_t::ref(Section.Address + Offset) = RealOffset; + support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = + RealOffset; break; } default: @@ -299,11 +306,12 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section, void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { - uint32_t *TargetPtr = reinterpret_cast(Section.Address + Offset); - uint64_t FinalAddress = Section.LoadAddress + Offset; + uint32_t *TargetPtr = + reinterpret_cast(Section.getAddressWithOffset(Offset)); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x" - << format("%llx", Section.Address + Offset) + << format("%llx", Section.getAddressWithOffset(Offset)) << " FinalAddress: 0x" << format("%llx", FinalAddress) << " Value: 0x" << format("%llx", Value) << " Type: 0x" << format("%x", Type) << " Addend: 0x" << format("%llx", Addend) @@ -315,7 +323,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, break; case ELF::R_AARCH64_ABS64: { uint64_t *TargetPtr = - reinterpret_cast(Section.Address + Offset); + reinterpret_cast(Section.getAddressWithOffset(Offset)); *TargetPtr = Value + Addend; break; } @@ -438,12 +446,13 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend) { // TODO: Add Thumb relocations. - uint32_t *TargetPtr = (uint32_t *)(Section.Address + Offset); - uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF); + uint32_t *TargetPtr = + reinterpret_cast(Section.getAddressWithOffset(Offset)); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset) & 0xFFFFFFFF; Value += Addend; DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: " - << Section.Address + Offset + << Section.getAddressWithOffset(Offset) << " FinalAddress: " << format("%p", FinalAddress) << " Value: " << format("%x", Value) << " Type: " << format("%x", Type) << " Addend: " << format("%x", Addend) << "\n"); @@ -487,13 +496,14 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend) { - uint8_t *TargetPtr = Section.Address + Offset; + uint8_t *TargetPtr = Section.getAddressWithOffset(Offset); Value += Addend; DEBUG(dbgs() << "resolveMIPSRelocation, LocalAddress: " - << Section.Address + Offset << " FinalAddress: " - << format("%p", Section.LoadAddress + Offset) << " Value: " - << format("%x", Value) << " Type: " << format("%x", Type) + << Section.getAddressWithOffset(Offset) << " FinalAddress: " + << format("%p", Section.getLoadAddressWithOffset(Offset)) + << " Value: " << format("%x", Value) + << " Type: " << format("%x", Type) << " Addend: " << format("%x", Addend) << "\n"); uint32_t Insn = readBytesUnaligned(TargetPtr, 4); @@ -522,47 +532,47 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, writeBytesUnaligned(Insn, TargetPtr, 4); break; case ELF::R_MIPS_PC32: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); writeBytesUnaligned(Value - FinalAddress, (uint8_t *)TargetPtr, 4); break; } case ELF::R_MIPS_PC16: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xffff0000; Insn |= ((Value - FinalAddress) >> 2) & 0xffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; } case ELF::R_MIPS_PC19_S2: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xfff80000; Insn |= ((Value - (FinalAddress & ~0x3)) >> 2) & 0x7ffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; } case ELF::R_MIPS_PC21_S2: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xffe00000; Insn |= ((Value - FinalAddress) >> 2) & 0x1fffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; } case ELF::R_MIPS_PC26_S2: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xfc000000; Insn |= ((Value - FinalAddress) >> 2) & 0x3ffffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; } case ELF::R_MIPS_PCHI16: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xffff0000; Insn |= ((Value - FinalAddress + 0x8000) >> 16) & 0xffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; } case ELF::R_MIPS_PCLO16: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xffff0000; Insn |= (Value - FinalAddress) & 0xffff; writeBytesUnaligned(Insn, TargetPtr, 4); @@ -613,7 +623,8 @@ void RuntimeDyldELF::resolveMIPS64Relocation(const SectionEntry &Section, CalculatedValue, SymOffset, SectionID); } - applyMIPS64Relocation(Section.Address + Offset, CalculatedValue, RelType); + applyMIPS64Relocation(Section.getAddressWithOffset(Offset), CalculatedValue, + RelType); } int64_t @@ -623,13 +634,12 @@ RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section, uint64_t SymOffset, SID SectionID) { DEBUG(dbgs() << "evaluateMIPS64Relocation, LocalAddress: 0x" - << format("%llx", Section.Address + Offset) + << format("%llx", Section.getAddressWithOffset(Offset)) << " FinalAddress: 0x" - << format("%llx", Section.LoadAddress + Offset) + << format("%llx", Section.getLoadAddressWithOffset(Offset)) << " Value: 0x" << format("%llx", Value) << " Type: 0x" << format("%x", Type) << " Addend: 0x" << format("%llx", Addend) - << " SymOffset: " << format("%x", SymOffset) - << "\n"); + << " SymOffset: " << format("%x", SymOffset) << "\n"); switch (Type) { default: @@ -682,35 +692,35 @@ RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section, return Value + Addend - (GOTAddr + 0x7ff0); } case ELF::R_MIPS_PC16: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return ((Value + Addend - FinalAddress) >> 2) & 0xffff; } case ELF::R_MIPS_PC32: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return Value + Addend - FinalAddress; } case ELF::R_MIPS_PC18_S3: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return ((Value + Addend - (FinalAddress & ~0x7)) >> 3) & 0x3ffff; } case ELF::R_MIPS_PC19_S2: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return ((Value + Addend - (FinalAddress & ~0x3)) >> 2) & 0x7ffff; } case ELF::R_MIPS_PC21_S2: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return ((Value + Addend - FinalAddress) >> 2) & 0x1fffff; } case ELF::R_MIPS_PC26_S2: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return ((Value + Addend - FinalAddress) >> 2) & 0x3ffffff; } case ELF::R_MIPS_PCHI16: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return ((Value + Addend - FinalAddress + 0x8000) >> 16) & 0xffff; } case ELF::R_MIPS_PCLO16: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return (Value + Addend - FinalAddress) & 0xffff; } } @@ -898,7 +908,7 @@ static inline uint16_t applyPPChighesta (uint64_t value) { void RuntimeDyldELF::resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { - uint8_t *LocalAddress = Section.Address + Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: llvm_unreachable("Relocation type not implemented yet!"); @@ -918,7 +928,7 @@ void RuntimeDyldELF::resolvePPC32Relocation(const SectionEntry &Section, void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { - uint8_t *LocalAddress = Section.Address + Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: llvm_unreachable("Relocation type not implemented yet!"); @@ -960,17 +970,17 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc)); } break; case ELF::R_PPC64_REL16_LO: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt16BE(LocalAddress, applyPPClo(Delta)); } break; case ELF::R_PPC64_REL16_HI: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt16BE(LocalAddress, applyPPChi(Delta)); } break; case ELF::R_PPC64_REL16_HA: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt16BE(LocalAddress, applyPPCha(Delta)); } break; @@ -981,7 +991,7 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, writeInt32BE(LocalAddress, Result); } break; case ELF::R_PPC64_REL24: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int32_t delta = static_cast(Value - FinalAddress + Addend); if (SignExtend32<24>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL24 overflow"); @@ -989,14 +999,14 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC)); } break; case ELF::R_PPC64_REL32: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int32_t delta = static_cast(Value - FinalAddress + Addend); if (SignExtend32<32>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL32 overflow"); writeInt32BE(LocalAddress, delta); } break; case ELF::R_PPC64_REL64: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt64BE(LocalAddress, Delta); } break; @@ -1009,27 +1019,27 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { - uint8_t *LocalAddress = Section.Address + Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: llvm_unreachable("Relocation type not implemented yet!"); break; case ELF::R_390_PC16DBL: case ELF::R_390_PLT16DBL: { - int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset); + int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int16_t(Delta / 2) * 2 == Delta && "R_390_PC16DBL overflow"); writeInt16BE(LocalAddress, Delta / 2); break; } case ELF::R_390_PC32DBL: case ELF::R_390_PLT32DBL: { - int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset); + int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int32_t(Delta / 2) * 2 == Delta && "R_390_PC32DBL overflow"); writeInt32BE(LocalAddress, Delta / 2); break; } case ELF::R_390_PC32: { - int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset); + int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int32_t(Delta) == Delta && "R_390_PC32 overflow"); writeInt32BE(LocalAddress, Delta); break; @@ -1119,7 +1129,7 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, } void *RuntimeDyldELF::computePlaceholderAddress(unsigned SectionID, uint64_t Offset) const { - return (void*)(Sections[SectionID].ObjAddress + Offset); + return (void *)(Sections[SectionID].getObjAddress() + Offset); } void RuntimeDyldELF::processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value) { @@ -1234,24 +1244,28 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // Look for an existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { - resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second, + resolveRelocation(Section, Offset, + (uint64_t)Section.getAddressWithOffset(i->second), RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); - Stubs[Value] = Section.StubOffset; - uint8_t *StubTargetAddr = - createStubFunction(Section.Address + Section.StubOffset); + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset())); - RelocationEntry REmovz_g3(SectionID, StubTargetAddr - Section.Address, + RelocationEntry REmovz_g3(SectionID, + StubTargetAddr - Section.getAddress(), ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend); - RelocationEntry REmovk_g2(SectionID, StubTargetAddr - Section.Address + 4, + RelocationEntry REmovk_g2(SectionID, StubTargetAddr - + Section.getAddressWithOffset(4), ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend); - RelocationEntry REmovk_g1(SectionID, StubTargetAddr - Section.Address + 8, + RelocationEntry REmovk_g1(SectionID, StubTargetAddr - + Section.getAddressWithOffset(8), ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend); - RelocationEntry REmovk_g0(SectionID, - StubTargetAddr - Section.Address + 12, + RelocationEntry REmovk_g0(SectionID, StubTargetAddr - + Section.getAddressWithOffset(12), ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend); if (Value.SymbolName) { @@ -1266,9 +1280,10 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( addRelocationForSection(REmovk_g0, Value.SectionID); } resolveRelocation(Section, Offset, - (uint64_t)Section.Address + Section.StubOffset, RelType, - 0); - Section.StubOffset += getMaxStubSize(); + reinterpret_cast(Section.getAddressWithOffset( + Section.getStubOffset())), + RelType, 0); + Section.advanceStubOffset(getMaxStubSize()); } } else if (Arch == Triple::arm) { if (RelType == ELF::R_ARM_PC24 || RelType == ELF::R_ARM_CALL || @@ -1280,26 +1295,29 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // Look for an existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { - resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second, - RelType, 0); + resolveRelocation( + Section, Offset, + reinterpret_cast(Section.getAddressWithOffset(i->second)), + RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); - Stubs[Value] = Section.StubOffset; - uint8_t *StubTargetAddr = - createStubFunction(Section.Address + Section.StubOffset); - RelocationEntry RE(SectionID, StubTargetAddr - Section.Address, - ELF::R_ARM_ABS32, Value.Addend); + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset())); + RelocationEntry RE(SectionID, StubTargetAddr - Section.getAddress(), + ELF::R_ARM_ABS32, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); - resolveRelocation(Section, Offset, - (uint64_t)Section.Address + Section.StubOffset, RelType, - 0); - Section.StubOffset += getMaxStubSize(); + resolveRelocation(Section, Offset, reinterpret_cast( + Section.getAddressWithOffset( + Section.getStubOffset())), + RelType, 0); + Section.advanceStubOffset(getMaxStubSize()); } } else { uint32_t *Placeholder = @@ -1338,15 +1356,16 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( } else { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); - Stubs[Value] = Section.StubOffset; - uint8_t *StubTargetAddr = - createStubFunction(Section.Address + Section.StubOffset); + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset())); // Creating Hi and Lo relocations for the filled stub instructions. - RelocationEntry REHi(SectionID, StubTargetAddr - Section.Address, - ELF::R_MIPS_HI16, Value.Addend); - RelocationEntry RELo(SectionID, StubTargetAddr - Section.Address + 4, - ELF::R_MIPS_LO16, Value.Addend); + RelocationEntry REHi(SectionID, StubTargetAddr - Section.getAddress(), + ELF::R_MIPS_HI16, Value.Addend); + RelocationEntry RELo(SectionID, + StubTargetAddr - Section.getAddressWithOffset(4), + ELF::R_MIPS_LO16, Value.Addend); if (Value.SymbolName) { addRelocationForSymbol(REHi, Value.SymbolName); @@ -1357,9 +1376,9 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( addRelocationForSection(RELo, Value.SectionID); } - RelocationEntry RE(SectionID, Offset, RelType, Section.StubOffset); + RelocationEntry RE(SectionID, Offset, RelType, Section.getStubOffset()); addRelocationForSection(RE, SectionID); - Section.StubOffset += getMaxStubSize(); + Section.advanceStubOffset(getMaxStubSize()); } } else if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16) { int64_t Addend = (Opcode & 0x0000ffff) << 16; @@ -1427,7 +1446,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // an external symbol (Symbol::ST_Unknown) or if the target address // is not within the signed 24-bits branch address. SectionEntry &Section = Sections[SectionID]; - uint8_t *Target = Section.Address + Offset; + uint8_t *Target = Section.getAddressWithOffset(Offset); bool RangeOverflow = false; if (SymType != SymbolRef::ST_Unknown) { if (AbiVariant != 2) { @@ -1441,7 +1460,8 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( uint8_t SymOther = Symbol->getOther(); Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther); } - uint8_t *RelocTarget = Sections[Value.SectionID].Address + Value.Addend; + uint8_t *RelocTarget = + Sections[Value.SectionID].getAddressWithOffset(Value.Addend); int32_t delta = static_cast(Target - RelocTarget); // If it is within 24-bits branch range, just set the branch target if (SignExtend32<24>(delta) == delta) { @@ -1461,23 +1481,25 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( if (i != Stubs.end()) { // Symbol function stub already created, just relocate to it resolveRelocation(Section, Offset, - (uint64_t)Section.Address + i->second, RelType, 0); + reinterpret_cast( + Section.getAddressWithOffset(i->second)), + RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); - Stubs[Value] = Section.StubOffset; - uint8_t *StubTargetAddr = - createStubFunction(Section.Address + Section.StubOffset, - AbiVariant); - RelocationEntry RE(SectionID, StubTargetAddr - Section.Address, + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset()), + AbiVariant); + RelocationEntry RE(SectionID, StubTargetAddr - Section.getAddress(), ELF::R_PPC64_ADDR64, Value.Addend); // Generates the 64-bits address loads as exemplified in section // 4.5.1 in PPC64 ELF ABI. Note that the relocations need to // apply to the low part of the instructions, so we have to update // the offset according to the target endianness. - uint64_t StubRelocOffset = StubTargetAddr - Section.Address; + uint64_t StubRelocOffset = StubTargetAddr - Section.getAddress(); if (!IsTargetLittleEndian) StubRelocOffset += 2; @@ -1502,10 +1524,11 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( addRelocationForSection(REl, Value.SectionID); } - resolveRelocation(Section, Offset, - (uint64_t)Section.Address + Section.StubOffset, + resolveRelocation(Section, Offset, reinterpret_cast( + Section.getAddressWithOffset( + Section.getStubOffset())), RelType, 0); - Section.StubOffset += getMaxStubSize(); + Section.advanceStubOffset(getMaxStubSize()); } if (SymType == SymbolRef::ST_Unknown) { // Restore the TOC for external calls @@ -1585,16 +1608,17 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( StubMap::const_iterator i = Stubs.find(Value); uintptr_t StubAddress; if (i != Stubs.end()) { - StubAddress = uintptr_t(Section.Address) + i->second; + StubAddress = uintptr_t(Section.getAddressWithOffset(i->second)); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); - uintptr_t BaseAddress = uintptr_t(Section.Address); + uintptr_t BaseAddress = uintptr_t(Section.getAddress()); uintptr_t StubAlignment = getStubAlignment(); - StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) & - -StubAlignment; + StubAddress = + (BaseAddress + Section.getStubOffset() + StubAlignment - 1) & + -StubAlignment; unsigned StubOffset = StubAddress - BaseAddress; Stubs[Value] = StubOffset; @@ -1605,7 +1629,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); - Section.StubOffset = StubOffset + getMaxStubSize(); + Section.advanceStubOffset(getMaxStubSize()); } if (RelType == ELF::R_390_GOTENT) @@ -1638,22 +1662,23 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( StubMap::const_iterator i = Stubs.find(Value); uintptr_t StubAddress; if (i != Stubs.end()) { - StubAddress = uintptr_t(Section.Address) + i->second; + StubAddress = uintptr_t(Section.getAddress()) + i->second; DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function (equivalent to a PLT entry). DEBUG(dbgs() << " Create a new stub function\n"); - uintptr_t BaseAddress = uintptr_t(Section.Address); + uintptr_t BaseAddress = uintptr_t(Section.getAddress()); uintptr_t StubAlignment = getStubAlignment(); - StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) & - -StubAlignment; + StubAddress = + (BaseAddress + Section.getStubOffset() + StubAlignment - 1) & + -StubAlignment; unsigned StubOffset = StubAddress - BaseAddress; Stubs[Value] = StubOffset; createStubFunction((uint8_t *)StubAddress); // Bump our stub offset counter - Section.StubOffset = StubOffset + getMaxStubSize(); + Section.advanceStubOffset(getMaxStubSize()); // Allocate a GOT Entry uint64_t GOTOffset = allocateGOTEntries(SectionID, 1); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index c0a16174d31..d005099535f 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -50,7 +50,6 @@ class Twine; /// SectionEntry - represents a section emitted into memory by the dynamic /// linker. class SectionEntry { -public: /// Name - section name. std::string Name; @@ -74,11 +73,37 @@ class SectionEntry { /// for calculating relocations in some object formats (like MachO). uintptr_t ObjAddress; +public: SectionEntry(StringRef name, uint8_t *address, size_t size, uintptr_t objAddress) : Name(name), Address(address), Size(size), LoadAddress(reinterpret_cast(address)), StubOffset(size), ObjAddress(objAddress) {} + + StringRef getName() const { return Name; } + + uint8_t *getAddress() const { return Address; } + + /// \brief Return the address of this section with an offset. + uint8_t *getAddressWithOffset(unsigned OffsetBytes) const { + return Address + OffsetBytes; + } + + size_t getSize() const { return Size; } + + uint64_t getLoadAddress() const { return LoadAddress; } + void setLoadAddress(uint64_t LA) { LoadAddress = LA; } + + /// \brief Return the load address of this section with an offset. + uint64_t getLoadAddressWithOffset(unsigned OffsetBytes) const { + return LoadAddress + OffsetBytes; + } + + uintptr_t getStubOffset() const { return StubOffset; } + + void advanceStubOffset(unsigned StubSize) { StubOffset += StubSize; } + + uintptr_t getObjAddress() const { return ObjAddress; } }; /// RelocationEntry - used to represent relocations internally in the dynamic @@ -271,11 +296,11 @@ class RuntimeDyldImpl { } uint64_t getSectionLoadAddress(unsigned SectionID) const { - return Sections[SectionID].LoadAddress; + return Sections[SectionID].getLoadAddress(); } uint8_t *getSectionAddress(unsigned SectionID) const { - return (uint8_t *)Sections[SectionID].Address; + return Sections[SectionID].getAddress(); } void writeInt16BE(uint8_t *Addr, uint16_t Value) { diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 7601ba26f90..739e8d65dbf 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -45,7 +45,7 @@ namespace llvm { int64_t RuntimeDyldMachO::memcpyAddend(const RelocationEntry &RE) const { unsigned NumBytes = 1 << RE.Size; - uint8_t *Src = Sections[RE.SectionID].Address + RE.Offset; + uint8_t *Src = Sections[RE.SectionID].getAddress() + RE.Offset; return static_cast(readBytesUnaligned(Src, NumBytes)); } @@ -64,7 +64,7 @@ relocation_iterator RuntimeDyldMachO::processScatteredVANILLA( bool IsPCRel = Obj.getAnyRelocationPCRel(RE); unsigned Size = Obj.getAnyRelocationLength(RE); uint64_t Offset = RelI->getOffset(); - uint8_t *LocalAddress = Section.Address + Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); unsigned NumBytes = 1 << Size; int64_t Addend = readBytesUnaligned(LocalAddress, NumBytes); @@ -135,8 +135,8 @@ void RuntimeDyldMachO::makeValueAddendPCRel(RelocationValueRef &Value, void RuntimeDyldMachO::dumpRelocationToResolve(const RelocationEntry &RE, uint64_t Value) const { const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint8_t *LocalAddress = Section.getAddress() + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddress() + RE.Offset; dbgs() << "resolveRelocation Section: " << RE.SectionID << " LocalAddress: " << format("%p", LocalAddress) @@ -183,10 +183,9 @@ void RuntimeDyldMachO::populateIndirectSymbolPointersSection( "Pointers section does not contain a whole number of stubs?"); DEBUG(dbgs() << "Populating pointer table section " - << Sections[PTSectionID].Name - << ", Section ID " << PTSectionID << ", " - << NumPTEntries << " entries, " << PTEntrySize - << " bytes each:\n"); + << Sections[PTSectionID].getName() << ", Section ID " + << PTSectionID << ", " << NumPTEntries << " entries, " + << PTEntrySize << " bytes each:\n"); for (unsigned i = 0; i < NumPTEntries; ++i) { unsigned SymbolIndex = @@ -240,7 +239,7 @@ void RuntimeDyldMachOCRTPBase::finalizeLoad(const ObjectFile &Obj, } template -unsigned char *RuntimeDyldMachOCRTPBase::processFDE(unsigned char *P, +unsigned char *RuntimeDyldMachOCRTPBase::processFDE(uint8_t *P, int64_t DeltaForText, int64_t DeltaForEH) { typedef typename Impl::TargetPtrT TargetPtrT; @@ -249,7 +248,7 @@ unsigned char *RuntimeDyldMachOCRTPBase::processFDE(unsigned char *P, << ", Delta for EH: " << DeltaForEH << "\n"); uint32_t Length = readBytesUnaligned(P, 4); P += 4; - unsigned char *Ret = P + Length; + uint8_t *Ret = P + Length; uint32_t Offset = readBytesUnaligned(P, 4); if (Offset == 0) // is a CIE return Ret; @@ -276,9 +275,9 @@ unsigned char *RuntimeDyldMachOCRTPBase::processFDE(unsigned char *P, } static int64_t computeDelta(SectionEntry *A, SectionEntry *B) { - int64_t ObjDistance = - static_cast(A->ObjAddress) - static_cast(B->ObjAddress); - int64_t MemDistance = A->LoadAddress - B->LoadAddress; + int64_t ObjDistance = static_cast(A->getObjAddress()) - + static_cast(B->getObjAddress()); + int64_t MemDistance = A->getLoadAddress() - B->getLoadAddress(); return ObjDistance - MemDistance; } @@ -301,14 +300,14 @@ void RuntimeDyldMachOCRTPBase::registerEHFrames() { if (ExceptTab) DeltaForEH = computeDelta(ExceptTab, EHFrame); - unsigned char *P = EHFrame->Address; - unsigned char *End = P + EHFrame->Size; + uint8_t *P = EHFrame->getAddress(); + uint8_t *End = P + EHFrame->getSize(); do { P = processFDE(P, DeltaForText, DeltaForEH); } while (P != End); - MemMgr.registerEHFrames(EHFrame->Address, EHFrame->LoadAddress, - EHFrame->Size); + MemMgr.registerEHFrames(EHFrame->getAddress(), EHFrame->getLoadAddress(), + EHFrame->getSize()); } UnregisteredEHFrameSections.clear(); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 2642ed2bd92..c8ae47b0db2 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -146,7 +146,7 @@ class RuntimeDyldMachOCRTPBase : public RuntimeDyldMachO { Impl &impl() { return static_cast(*this); } const Impl &impl() const { return static_cast(*this); } - unsigned char *processFDE(unsigned char *P, int64_t DeltaForText, + unsigned char *processFDE(uint8_t *P, int64_t DeltaForText, int64_t DeltaForEH); public: diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h index f593c9ab15d..fbfbb328523 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h @@ -105,7 +105,7 @@ class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF { void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override { const auto Section = Sections[RE.SectionID]; - uint8_t *Target = Section.Address + RE.Offset; + uint8_t *Target = Section.getAddressWithOffset(RE.Offset); switch (RE.RelType) { case COFF::IMAGE_REL_I386_ABSOLUTE: @@ -116,7 +116,8 @@ class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF { uint64_t Result = RE.Sections.SectionA == static_cast(-1) ? Value - : Sections[RE.Sections.SectionA].LoadAddress + RE.Addend; + : Sections[RE.Sections.SectionA].getLoadAddressWithOffset( + RE.Addend); assert(static_cast(Result) <= INT32_MAX && "relocation overflow"); assert(static_cast(Result) >= INT32_MIN && @@ -130,9 +131,10 @@ class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF { } case COFF::IMAGE_REL_I386_DIR32NB: { // The target's 32-bit RVA. - // NOTE: use Section[0].LoadAddress as an approximation of ImageBase - uint64_t Result = Sections[RE.Sections.SectionA].LoadAddress + RE.Addend - - Sections[0].LoadAddress; + // NOTE: use Section[0].getLoadAddress() as an approximation of ImageBase + uint64_t Result = + Sections[RE.Sections.SectionA].getLoadAddressWithOffset(RE.Addend) - + Sections[0].getLoadAddress(); assert(static_cast(Result) <= INT32_MAX && "relocation overflow"); assert(static_cast(Result) >= INT32_MIN && @@ -146,8 +148,8 @@ class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF { } case COFF::IMAGE_REL_I386_REL32: { // 32-bit relative displacement to the target. - uint64_t Result = Sections[RE.Sections.SectionA].LoadAddress - - Section.LoadAddress + RE.Addend - 4 - RE.Offset; + uint64_t Result = Sections[RE.Sections.SectionA].getLoadAddress() - + Section.getLoadAddress() + RE.Addend - 4 - RE.Offset; assert(static_cast(Result) <= INT32_MAX && "relocation overflow"); assert(static_cast(Result) >= INT32_MIN && diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h index 167708156dd..25f538d8f3d 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h @@ -62,7 +62,7 @@ class RuntimeDyldCOFFX86_64 : public RuntimeDyldCOFF { // symbol in the target address space. void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override { const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *Target = Section.Address + RE.Offset; + uint8_t *Target = Section.getAddressWithOffset(RE.Offset); switch (RE.RelType) { @@ -72,7 +72,7 @@ class RuntimeDyldCOFFX86_64 : public RuntimeDyldCOFF { case COFF::IMAGE_REL_AMD64_REL32_3: case COFF::IMAGE_REL_AMD64_REL32_4: case COFF::IMAGE_REL_AMD64_REL32_5: { - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); // Delta is the distance from the start of the reloc to the end of the // instruction with the reloc. uint64_t Delta = 4 + (RE.RelType - COFF::IMAGE_REL_AMD64_REL32); @@ -125,7 +125,7 @@ class RuntimeDyldCOFFX86_64 : public RuntimeDyldCOFF { uint64_t Offset = RelI->getOffset(); uint64_t Addend = 0; SectionEntry &Section = Sections[SectionID]; - uintptr_t ObjTarget = Section.ObjAddress + Offset; + uintptr_t ObjTarget = Section.getObjAddress() + Offset; switch (RelType) { @@ -178,9 +178,9 @@ class RuntimeDyldCOFFX86_64 : public RuntimeDyldCOFF { unsigned getStubAlignment() override { return 1; } void registerEHFrames() override { for (auto const &EHFrameSID : UnregisteredEHFrameSections) { - uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; - uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; - size_t EHFrameSize = Sections[EHFrameSID].Size; + uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress(); + uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress(); + size_t EHFrameSize = Sections[EHFrameSID].getSize(); MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); RegisteredEHFrameSections.push_back(EHFrameSID); } diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h index 929639cecce..dbca37747ce 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h @@ -34,7 +34,7 @@ class RuntimeDyldMachOAArch64 /// Extract the addend encoded in the instruction / memory location. int64_t decodeAddend(const RelocationEntry &RE) const { const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); unsigned NumBytes = 1 << RE.Size; int64_t Addend = 0; // Verify that the relocation has the correct size and alignment. @@ -304,7 +304,7 @@ class RuntimeDyldMachOAArch64 DEBUG(dumpRelocationToResolve(RE, Value)); const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); MachO::RelocationInfoType RelType = static_cast(RE.RelType); @@ -324,7 +324,7 @@ class RuntimeDyldMachOAArch64 case MachO::ARM64_RELOC_BRANCH26: { assert(RE.IsPCRel && "not PCRel and ARM64_RELOC_BRANCH26 not supported"); // Check if branch is in range. - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); int64_t PCRelVal = Value - FinalAddress + RE.Addend; encodeAddend(LocalAddress, /*Size=*/4, RelType, PCRelVal); break; @@ -333,7 +333,7 @@ class RuntimeDyldMachOAArch64 case MachO::ARM64_RELOC_PAGE21: { assert(RE.IsPCRel && "not PCRel and ARM64_RELOC_PAGE21 not supported"); // Adjust for PC-relative relocation and offset. - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); int64_t PCRelVal = ((Value + RE.Addend) & (-4096)) - (FinalAddress & (-4096)); encodeAddend(LocalAddress, /*Size=*/4, RelType, PCRelVal); @@ -375,10 +375,10 @@ class RuntimeDyldMachOAArch64 else { // FIXME: There must be a better way to do this then to check and fix the // alignment every time!!! - uintptr_t BaseAddress = uintptr_t(Section.Address); + uintptr_t BaseAddress = uintptr_t(Section.getAddress()); uintptr_t StubAlignment = getStubAlignment(); uintptr_t StubAddress = - (BaseAddress + Section.StubOffset + StubAlignment - 1) & + (BaseAddress + Section.getStubOffset() + StubAlignment - 1) & -StubAlignment; unsigned StubOffset = StubAddress - BaseAddress; Stubs[Value] = StubOffset; @@ -391,7 +391,7 @@ class RuntimeDyldMachOAArch64 addRelocationForSymbol(GOTRE, Value.SymbolName); else addRelocationForSection(GOTRE, Value.SectionID); - Section.StubOffset = StubOffset + getMaxStubSize(); + Section.advanceStubOffset(getMaxStubSize()); Offset = static_cast(StubOffset); } RelocationEntry TargetRE(RE.SectionID, RE.Offset, RE.RelType, Offset, diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h index 64b57484945..7731df09bd2 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h @@ -35,7 +35,7 @@ class RuntimeDyldMachOARM int64_t decodeAddend(const RelocationEntry &RE) const { const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); switch (RE.RelType) { default: @@ -94,12 +94,12 @@ class RuntimeDyldMachOARM void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override { DEBUG(dumpRelocationToResolve(RE, Value)); const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); // If the relocation is PC-relative, the value to be encoded is the // pointer difference. if (RE.IsPCRel) { - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); Value -= FinalAddress; // ARM PCRel relocations have an effective-PC offset of two instructions // (four bytes in Thumb mode, 8 bytes in ARM mode). @@ -132,8 +132,8 @@ class RuntimeDyldMachOARM break; } case MachO::ARM_RELOC_HALF_SECTDIFF: { - uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress; - uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress; + uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress(); + uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress(); assert((Value == SectionABase || Value == SectionBBase) && "Unexpected HALFSECTDIFF relocation value."); Value = SectionABase - SectionBBase + RE.Addend; @@ -180,21 +180,21 @@ class RuntimeDyldMachOARM RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value); uint8_t *Addr; if (i != Stubs.end()) { - Addr = Section.Address + i->second; + Addr = Section.getAddressWithOffset(i->second); } else { // Create a new stub function. - Stubs[Value] = Section.StubOffset; - uint8_t *StubTargetAddr = - createStubFunction(Section.Address + Section.StubOffset); - RelocationEntry StubRE(RE.SectionID, StubTargetAddr - Section.Address, - MachO::GENERIC_RELOC_VANILLA, Value.Offset, false, - 2); + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset())); + RelocationEntry StubRE( + RE.SectionID, StubTargetAddr - Section.getAddress(), + MachO::GENERIC_RELOC_VANILLA, Value.Offset, false, 2); if (Value.SymbolName) addRelocationForSymbol(StubRE, Value.SymbolName); else addRelocationForSection(StubRE, Value.SectionID); - Addr = Section.Address + Section.StubOffset; - Section.StubOffset += getMaxStubSize(); + Addr = Section.getAddressWithOffset(Section.getStubOffset()); + Section.advanceStubOffset(getMaxStubSize()); } RelocationEntry TargetRE(RE.SectionID, RE.Offset, RE.RelType, 0, RE.IsPCRel, RE.Size); @@ -223,7 +223,7 @@ class RuntimeDyldMachOARM uint32_t RelocType = MachO.getAnyRelocationType(RE); bool IsPCRel = MachO.getAnyRelocationPCRel(RE); uint64_t Offset = RelI->getOffset(); - uint8_t *LocalAddress = Section.Address + Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); int64_t Immediate = readBytesUnaligned(LocalAddress, 4); // Copy the whole instruction out. Immediate = ((Immediate >> 4) & 0xf000) | (Immediate & 0xfff); diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h index 87a248bdaa0..85059d70a3e 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h @@ -83,10 +83,10 @@ class RuntimeDyldMachOI386 DEBUG(dumpRelocationToResolve(RE, Value)); const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); if (RE.IsPCRel) { - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation. } @@ -98,8 +98,8 @@ class RuntimeDyldMachOI386 break; case MachO::GENERIC_RELOC_SECTDIFF: case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: { - uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress; - uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress; + uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress(); + uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress(); assert((Value == SectionABase || Value == SectionBBase) && "Unexpected SECTDIFF relocation value."); Value = SectionABase - SectionBBase + RE.Addend; @@ -138,7 +138,7 @@ class RuntimeDyldMachOI386 bool IsPCRel = Obj.getAnyRelocationPCRel(RE); unsigned Size = Obj.getAnyRelocationLength(RE); uint64_t Offset = RelI->getOffset(); - uint8_t *LocalAddress = Section.Address + Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); unsigned NumBytes = 1 << Size; uint64_t Addend = readBytesUnaligned(LocalAddress, NumBytes); diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h index 8b410972415..2242295bc1e 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h @@ -73,14 +73,14 @@ class RuntimeDyldMachOX86_64 void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override { DEBUG(dumpRelocationToResolve(RE, Value)); const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); // If the relocation is PC-relative, the value to be encoded is the // pointer difference. if (RE.IsPCRel) { // FIXME: It seems this value needs to be adjusted by 4 for an effective // PC address. Is that expected? Only for branches, perhaps? - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); Value -= FinalAddress + 4; } @@ -96,8 +96,8 @@ class RuntimeDyldMachOX86_64 writeBytesUnaligned(Value + RE.Addend, LocalAddress, 1 << RE.Size); break; case MachO::X86_64_RELOC_SUBTRACTOR: { - uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress; - uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress; + uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress(); + uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress(); assert((Value == SectionABase || Value == SectionBBase) && "Unexpected SUBTRACTOR relocation value."); Value = SectionABase - SectionBBase + RE.Addend; @@ -124,18 +124,18 @@ class RuntimeDyldMachOX86_64 RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value); uint8_t *Addr; if (i != Stubs.end()) { - Addr = Section.Address + i->second; + Addr = Section.getAddressWithOffset(i->second); } else { - Stubs[Value] = Section.StubOffset; - uint8_t *GOTEntry = Section.Address + Section.StubOffset; - RelocationEntry GOTRE(RE.SectionID, Section.StubOffset, + Stubs[Value] = Section.getStubOffset(); + uint8_t *GOTEntry = Section.getAddressWithOffset(Section.getStubOffset()); + RelocationEntry GOTRE(RE.SectionID, Section.getStubOffset(), MachO::X86_64_RELOC_UNSIGNED, Value.Offset, false, 3); if (Value.SymbolName) addRelocationForSymbol(GOTRE, Value.SymbolName); else addRelocationForSection(GOTRE, Value.SectionID); - Section.StubOffset += 8; + Section.advanceStubOffset(8); Addr = GOTEntry; } RelocationEntry TargetRE(RE.SectionID, RE.Offset, @@ -154,7 +154,7 @@ class RuntimeDyldMachOX86_64 unsigned Size = Obj.getAnyRelocationLength(RE); uint64_t Offset = RelI->getOffset(); - uint8_t *LocalAddress = Sections[SectionID].Address + Offset; + uint8_t *LocalAddress = Sections[SectionID].getAddressWithOffset(Offset); unsigned NumBytes = 1 << Size; ErrorOr SubtrahendNameOrErr = RelI->getSymbol()->getName(); From 2498981c492518db55d3c0fb76dbc9dc3c436f5a Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 24 Nov 2015 20:37:01 +0000 Subject: [PATCH 0018/1132] [RuntimeDyld] Fix a class of arithmetic errors introduced in r253918 r253918 had refactored expressions like "A - B.Address + C" to "A - B.getAddressWithOffset(C)". This is incorrect, since the latter really computes "A - B.Address - C". None of the tests I can run locally on x86 broke due to this bug, but it is the current suspect for breakage on the AArch64 buildbots. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254017 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 00c91b506f1..1b838e45b80 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1259,13 +1259,13 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( StubTargetAddr - Section.getAddress(), ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend); RelocationEntry REmovk_g2(SectionID, StubTargetAddr - - Section.getAddressWithOffset(4), + Section.getAddress() + 4, ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend); RelocationEntry REmovk_g1(SectionID, StubTargetAddr - - Section.getAddressWithOffset(8), + Section.getAddress() + 8, ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend); RelocationEntry REmovk_g0(SectionID, StubTargetAddr - - Section.getAddressWithOffset(12), + Section.getAddress() + 12, ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend); if (Value.SymbolName) { @@ -1364,7 +1364,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( RelocationEntry REHi(SectionID, StubTargetAddr - Section.getAddress(), ELF::R_MIPS_HI16, Value.Addend); RelocationEntry RELo(SectionID, - StubTargetAddr - Section.getAddressWithOffset(4), + StubTargetAddr - Section.getAddress() + 4, ELF::R_MIPS_LO16, Value.Addend); if (Value.SymbolName) { From 524cfeb8003231ab5517166ee30319abc52c7424 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 21 Jan 2016 21:59:50 +0000 Subject: [PATCH 0019/1132] [RuntimeDyld][AArch64] Add support for the MachO ARM64_RELOC_SUBTRACTOR reloc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258438 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Targets/RuntimeDyldMachOAArch64.h | 54 ++++++++++++++++++- .../AArch64/MachO_ARM64_relocations.s | 5 ++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h index dbca37747ce..ea2a7a2953b 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h @@ -270,6 +270,9 @@ class RuntimeDyldMachOAArch64 RelInfo = Obj.getRelocation(RelI->getRawDataRefImpl()); } + if (Obj.getAnyRelocationType(RelInfo) == MachO::ARM64_RELOC_SUBTRACTOR) + return processSubtractRelocation(SectionID, RelI, Obj, ObjSectionToID); + RelocationEntry RE(getRelocationEntry(SectionID, Obj, RelI)); RE.Addend = decodeAddend(RE); @@ -349,7 +352,15 @@ class RuntimeDyldMachOAArch64 encodeAddend(LocalAddress, /*Size=*/4, RelType, Value); break; } - case MachO::ARM64_RELOC_SUBTRACTOR: + case MachO::ARM64_RELOC_SUBTRACTOR: { + uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress(); + uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress(); + assert((Value == SectionABase || Value == SectionBBase) && + "Unexpected SUBTRACTOR relocation value."); + Value = SectionABase - SectionBBase + RE.Addend; + writeBytesUnaligned(Value, LocalAddress, 1 << RE.Size); + break; + } case MachO::ARM64_RELOC_POINTER_TO_GOT: case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21: case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12: @@ -398,6 +409,47 @@ class RuntimeDyldMachOAArch64 RE.IsPCRel, RE.Size); addRelocationForSection(TargetRE, RE.SectionID); } + + relocation_iterator + processSubtractRelocation(unsigned SectionID, relocation_iterator RelI, + const ObjectFile &BaseObjT, + ObjSectionToIDMap &ObjSectionToID) { + const MachOObjectFile &Obj = + static_cast(BaseObjT); + MachO::any_relocation_info RE = + Obj.getRelocation(RelI->getRawDataRefImpl()); + + unsigned Size = Obj.getAnyRelocationLength(RE); + uint64_t Offset = RelI->getOffset(); + uint8_t *LocalAddress = Sections[SectionID].getAddressWithOffset(Offset); + unsigned NumBytes = 1 << Size; + + ErrorOr SubtrahendNameOrErr = RelI->getSymbol()->getName(); + if (auto EC = SubtrahendNameOrErr.getError()) + report_fatal_error(EC.message()); + auto SubtrahendI = GlobalSymbolTable.find(*SubtrahendNameOrErr); + unsigned SectionBID = SubtrahendI->second.getSectionID(); + uint64_t SectionBOffset = SubtrahendI->second.getOffset(); + int64_t Addend = + SignExtend64(readBytesUnaligned(LocalAddress, NumBytes), NumBytes * 8); + + ++RelI; + ErrorOr MinuendNameOrErr = RelI->getSymbol()->getName(); + if (auto EC = MinuendNameOrErr.getError()) + report_fatal_error(EC.message()); + auto MinuendI = GlobalSymbolTable.find(*MinuendNameOrErr); + unsigned SectionAID = MinuendI->second.getSectionID(); + uint64_t SectionAOffset = MinuendI->second.getOffset(); + + RelocationEntry R(SectionID, Offset, MachO::ARM64_RELOC_SUBTRACTOR, (uint64_t)Addend, + SectionAID, SectionAOffset, SectionBID, SectionBOffset, + false, Size); + + addRelocationForSection(R, SectionAID); + + return ++RelI; + } + }; } diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s index 0387b932f1c..b29418783d6 100644 --- a/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s +++ b/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s @@ -77,3 +77,8 @@ tgt: .fill 4096, 1, 0 _ptr: .quad _foo + +# Test ARM64_RELOC_SUBTRACTOR. +# rtdyld-check: *{8}_subtractor_result = _test_branch_reloc - _foo +_subtractor_result: + .quad _test_branch_reloc - _foo From 427bd4e1f67d31cd9d46d9e1b68db7f79e3feb00 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 27 Jan 2016 19:32:29 +0000 Subject: [PATCH 0020/1132] ARMv7k: base ABI decision on v7k Arch rather than watchos OS. Various bits we want to use the new ABI actually compile with "-arch armv7k -miphoneos-version-min=9.0". Not ideal, but also not ridiculous given how slices work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258975 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/Triple.h | 4 ++++ lib/MC/MCObjectFileInfo.cpp | 4 ++-- lib/Target/ARM/ARMISelLowering.cpp | 4 ++-- lib/Target/ARM/ARMSubtarget.cpp | 4 ++-- lib/Target/ARM/ARMSubtarget.h | 1 + lib/Target/ARM/ARMTargetMachine.cpp | 2 +- lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp | 2 +- test/CodeGen/ARM/eh-resume-darwin.ll | 7 ++++--- test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll | 6 +++++- 9 files changed, 22 insertions(+), 12 deletions(-) diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index fd17f40d4e6..fb0b6b80993 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -430,6 +430,10 @@ class Triple { return getOS() == Triple::WatchOS; } + bool isWatchABI() const { + return getSubArch() == Triple::ARMSubArch_v7k; + } + /// isOSDarwin - Is this a "Darwin" OS (OS X, iOS, or watchOS). bool isOSDarwin() const { return isMacOSX() || isiOS() || isWatchOS(); diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index f86f7e40acb..0c86fa81c1b 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -30,7 +30,7 @@ static bool useCompactUnwind(const Triple &T) { return true; // armv7k always has it. - if (T.isWatchOS()) + if (T.isWatchABI()) return true; // Use it on newer version of OS X. @@ -58,7 +58,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { if (T.isOSDarwin() && T.getArch() == Triple::aarch64) SupportsCompactUnwindWithoutEHFrame = true; - if (T.isWatchOS()) + if (T.isWatchABI()) OmitDwarfIfHaveCompactUnwind = true; PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 9b8d0704de5..c99423203d0 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -240,7 +240,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Set the correct calling convention for ARMv7k WatchOS. It's just // AAPCS_VFP for functions as simple as libcalls. - if (Subtarget->isTargetWatchOS()) { + if (Subtarget->isTargetWatchABI()) { for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP); } @@ -958,7 +958,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->isTargetWatchOS()) { + if (Subtarget->isTargetWatchABI()) { setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 0fdb1959b58..0c6f01dc667 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -162,7 +162,7 @@ void ARMSubtarget::initializeEnvironment() { // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this // directly from it, but we can try to make sure they're consistent when both // available. - UseSjLjEH = isTargetDarwin() && !isTargetWatchOS(); + UseSjLjEH = isTargetDarwin() && !isTargetWatchABI(); assert((!TM.getMCAsmInfo() || (TM.getMCAsmInfo()->getExceptionHandlingType() == ExceptionHandling::SjLj) == UseSjLjEH) && @@ -343,7 +343,7 @@ bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind // format which it's more important to get right. - return isTargetWatchOS() || (isSwift() && !MF.getFunction()->optForMinSize()); + return isTargetWatchABI() || (isSwift() && !MF.getFunction()->optForMinSize()); } bool ARMSubtarget::useMovt(const MachineFunction &MF) const { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index d5f0b59a54b..84e33f4288d 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -386,6 +386,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); } + bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index fca1901dc57..4c58f8b59e1 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -85,7 +85,7 @@ computeTargetABI(const Triple &TT, StringRef CPU, (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) || CPU.startswith("cortex-m")) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - } else if (TT.isWatchOS()) { + } else if (TT.isWatchABI()) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16; } else { TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index bda37f6616a..572d1c02f52 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -33,7 +33,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) { SupportsDebugInformation = true; // Exceptions handling - ExceptionsType = TheTriple.isOSDarwin() && !TheTriple.isWatchOS() + ExceptionsType = (TheTriple.isOSDarwin() && !TheTriple.isWatchABI()) ? ExceptionHandling::SjLj : ExceptionHandling::DwarfCFI; diff --git a/test/CodeGen/ARM/eh-resume-darwin.ll b/test/CodeGen/ARM/eh-resume-darwin.ll index d3a8481275f..6c2716bffa6 100644 --- a/test/CodeGen/ARM/eh-resume-darwin.ll +++ b/test/CodeGen/ARM/eh-resume-darwin.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=IOS -; RUN: llc < %s -mtriple=armv7k-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=IOS -; RUN: llc < %s -mtriple=armv7k-apple-watchos -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=WATCHOS +; RUN: llc < %s -mtriple=armv7-apple-watchos -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=IOS +; RUN: llc < %s -mtriple=armv7k-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=WATCHABI +; RUN: llc < %s -mtriple=armv7k-apple-watchos -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=WATCHABI declare void @func() @@ -21,4 +22,4 @@ lpad: } ; IOS: __Unwind_SjLj_Resume -; WATCHOS: __Unwind_Resume +; WATCHABI: __Unwind_Resume diff --git a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll index b44b447b3df..323d5037138 100644 --- a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll +++ b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll @@ -2,7 +2,8 @@ ; RUN: llc -mtriple=armv7-apple-ios -O1 < %s | FileCheck %s ; RUN: llc -mtriple=armv7-apple-ios -O2 < %s | FileCheck %s ; RUN: llc -mtriple=armv7-apple-ios -O3 < %s | FileCheck %s -; RUN: llc -mtriple=armv7k-apple-ios < %s | FileCheck %s +; RUN: llc -mtriple=armv7-apple-watchos -O3 < %s | FileCheck %s +; RUN: llc -mtriple=armv7k-apple-ios < %s | FileCheck %s --check-prefix=CHECK-WATCH ; SjLjEHPrepare shouldn't crash when lowering empty structs. ; @@ -16,6 +17,9 @@ entry: ; CHECK: bl __Unwind_SjLj_Register ; CHECK-NEXT: {{[A-Z][a-zA-Z0-9]*}}: ; CHECK-NEXT: bl _bar + +; CHECK-WATCH-NOT: bl __Unwind_SjLj_Register + invoke void @bar () to label %unreachable unwind label %handler From e63e7e8f62e89dd20d2bd05ed5635a0c91ec21b3 Mon Sep 17 00:00:00 2001 From: Manuel Jacob Date: Tue, 19 Jan 2016 15:21:15 +0000 Subject: [PATCH 0021/1132] Rename Variable `Ptr` to `PtrTy`. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258130 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 95992a2e04ec0bf773bf73704b0370728a1135b9) --- lib/IR/ConstantFold.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index ce3fe03e2df..35855860129 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -2040,11 +2040,11 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, return C; if (isa(C)) { - PointerType *Ptr = cast(C->getType()); + PointerType *PtrTy = cast(C->getType()); Type *Ty = GetElementPtrInst::getIndexedType( - cast(Ptr->getScalarType())->getElementType(), Idxs); + cast(PtrTy->getScalarType())->getElementType(), Idxs); assert(Ty && "Invalid indices for GEP!"); - return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace())); + return UndefValue::get(PointerType::get(Ty, PtrTy->getAddressSpace())); } if (C->isNullValue()) { @@ -2055,12 +2055,12 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, break; } if (isNull) { - PointerType *Ptr = cast(C->getType()); + PointerType *PtrTy = cast(C->getType()); Type *Ty = GetElementPtrInst::getIndexedType( - cast(Ptr->getScalarType())->getElementType(), Idxs); + cast(PtrTy->getScalarType())->getElementType(), Idxs); assert(Ty && "Invalid indices for GEP!"); return ConstantPointerNull::get(PointerType::get(Ty, - Ptr->getAddressSpace())); + PtrTy->getAddressSpace())); } } From 578daec428160719dc6e4c05d319e390f066474c Mon Sep 17 00:00:00 2001 From: Manuel Jacob Date: Tue, 19 Jan 2016 16:34:31 +0000 Subject: [PATCH 0022/1132] Fix constant folding of constant vector GEPs with undef or null as pointer argument. Reviewers: eddyb Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D16321 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258134 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 5ad54e5885e8e2c546cf342610718e64f25fd27b) --- lib/IR/ConstantFold.cpp | 22 +++++++++++++--------- test/Assembler/ConstantExprFold.ll | 4 ++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 35855860129..7e73be124c3 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -2040,11 +2040,13 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, return C; if (isa(C)) { - PointerType *PtrTy = cast(C->getType()); - Type *Ty = GetElementPtrInst::getIndexedType( - cast(PtrTy->getScalarType())->getElementType(), Idxs); + PointerType *PtrTy = cast(C->getType()->getScalarType()); + Type *Ty = GetElementPtrInst::getIndexedType(PtrTy->getElementType(), Idxs); assert(Ty && "Invalid indices for GEP!"); - return UndefValue::get(PointerType::get(Ty, PtrTy->getAddressSpace())); + Type *GEPTy = PointerType::get(Ty, PtrTy->getAddressSpace()); + if (VectorType *VT = dyn_cast(C->getType())) + GEPTy = VectorType::get(GEPTy, VT->getNumElements()); + return UndefValue::get(GEPTy); } if (C->isNullValue()) { @@ -2055,12 +2057,14 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, break; } if (isNull) { - PointerType *PtrTy = cast(C->getType()); - Type *Ty = GetElementPtrInst::getIndexedType( - cast(PtrTy->getScalarType())->getElementType(), Idxs); + PointerType *PtrTy = cast(C->getType()->getScalarType()); + Type *Ty = + GetElementPtrInst::getIndexedType(PtrTy->getElementType(), Idxs); assert(Ty && "Invalid indices for GEP!"); - return ConstantPointerNull::get(PointerType::get(Ty, - PtrTy->getAddressSpace())); + Type *GEPTy = PointerType::get(Ty, PtrTy->getAddressSpace()); + if (VectorType *VT = dyn_cast(C->getType())) + GEPTy = VectorType::get(GEPTy, VT->getNumElements()); + return Constant::getNullValue(GEPTy); } } diff --git a/test/Assembler/ConstantExprFold.ll b/test/Assembler/ConstantExprFold.ll index 3314f8c1af8..94058230519 100644 --- a/test/Assembler/ConstantExprFold.ll +++ b/test/Assembler/ConstantExprFold.ll @@ -30,3 +30,7 @@ global i1 icmp slt (i32* getelementptr (%Ty, %Ty* @B, i64 0, i32 0), @cons = weak global i32 0, align 8 ; [#uses=1] global i64 and (i64 ptrtoint (i32* @cons to i64), i64 7) +global <2 x i8*> getelementptr(i8, <2 x i8*> undef, <2 x i64> ) +global <2 x i8*> getelementptr({ i8 }, <2 x { i8 }*> undef, <2 x i64> , <2 x i32> ) +global <2 x i8*> getelementptr(i8, <2 x i8*> zeroinitializer, <2 x i64> ) +global <2 x i8*> getelementptr({ i8 }, <2 x { i8 }*> zeroinitializer, <2 x i64> , <2 x i32> ) From 8b7f34d4a54e0c8a30ffbec0d7a970b304dd4e82 Mon Sep 17 00:00:00 2001 From: Geoff Berry Date: Tue, 19 Jan 2016 17:36:02 +0000 Subject: [PATCH 0023/1132] [cmake] Fix add_version_info_from_vcs git svn version bug. Summary: add_version_info_from_vcs was setting SVN_REVISION to the last fetched svn revision when using git svn instead of the svn revision corresponding to HEAD. This leads to conflicts with the definition of SVN_REVISION in SVNVersion.inc generated by GetSVN.cmake when HEAD is not the most recently fetched svn revision. Use 'git svn info' to determine SVN_REVISION when git svn is being used instead (as is done in GetSVN.cmake). Reviewers: beanz Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D16299 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258148 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 73c7b058de8838fcecbbda12dabc87edcaec5678) --- cmake/modules/VersionFromVCS.cmake | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake index 85cb8ead21a..6be4daa3166 100644 --- a/cmake/modules/VersionFromVCS.cmake +++ b/cmake/modules/VersionFromVCS.cmake @@ -27,16 +27,20 @@ function(add_version_info_from_vcs VERS) find_program(git_executable NAMES git git.exe git.cmd) if( git_executable ) set(is_git_svn_rev_exact false) - execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline + execute_process(COMMAND + ${git_executable} svn info WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} TIMEOUT 5 RESULT_VARIABLE git_result OUTPUT_VARIABLE git_output) if( git_result EQUAL 0 ) - string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output}) - string(LENGTH "${git_svn_rev}" rev_length) - math(EXPR rev_length "${rev_length}-1") - string(SUBSTRING "${git_svn_rev}" 1 ${rev_length} git_svn_rev_number) + string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output}) + if(svn_url) + set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE) + endif() + + string(REGEX REPLACE "^(.*\n)?Revision: ([^\n]+).*" + "\\2" git_svn_rev_number "${git_output}") set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE) set(git_svn_rev "-svn-${git_svn_rev}") @@ -69,18 +73,6 @@ function(add_version_info_from_vcs VERS) set(result "${result}${git_svn_rev}") endif() - execute_process(COMMAND - ${git_executable} svn info - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output) - if( git_result EQUAL 0) - string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output}) - if(svn_url) - set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE) - endif() - endif() endif() endif() endif() From 55e6c8a815b2e63d6cc83999522898ad62e8fcad Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 19 Jan 2016 20:53:51 +0000 Subject: [PATCH 0024/1132] [SCEV] Fix PR26207 In some cases, the max backedge taken count can be more conservative than the exact backedge taken count (for instance, because ScalarEvolution::getRange is not control-flow sensitive whereas computeExitLimitFromICmp can be). In these cases, computeExitLimitFromCond (specifically the bit that deals with `and` and `or` instructions) can create an ExitLimit instance with a `SCEVCouldNotCompute` max backedge count expression, but a computable exact backedge count expression. This violates an implicit SCEV assumption: a computable exact BE count should imply a computable max BE count. This change - Makes the above implicit invariant explicit by adding an assert to ExitLimit's constructor - Changes `computeExitLimitFromCond` to be more robust around conservative max backedge counts git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258184 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 96068f979145efbd5ed743fb32f6734c7ece0349) --- include/llvm/Analysis/ScalarEvolution.h | 6 +++++- lib/Analysis/ScalarEvolution.cpp | 8 ++++++++ test/Transforms/IndVarSimplify/pr26207.ll | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/IndVarSimplify/pr26207.ll diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index c08335de3e7..ef930578884 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -412,7 +412,11 @@ namespace llvm { /*implicit*/ ExitLimit(const SCEV *E) : Exact(E), Max(E) {} - ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) {} + ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) { + assert((isa(Exact) || + !isa(Max)) && + "Exact is not allowed to be less precise than Max"); + } /// Test whether this ExitLimit contains any computed information, or /// whether it's all SCEVCouldNotCompute values. diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 34074efd1ce..ef1bb3a36c8 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -5368,6 +5368,14 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, BECount = EL0.Exact; } + // There are cases (e.g. PR26207) where computeExitLimitFromCond is able + // to be more aggressive when computing BECount than when computing + // MaxBECount. In these cases it is possible for EL0.Exact and EL1.Exact + // to match, but for EL0.Max and EL1.Max to not. + if (isa(MaxBECount) && + !isa(BECount)) + MaxBECount = BECount; + return ExitLimit(BECount, MaxBECount); } if (BO->getOpcode() == Instruction::Or) { diff --git a/test/Transforms/IndVarSimplify/pr26207.ll b/test/Transforms/IndVarSimplify/pr26207.ll new file mode 100644 index 00000000000..9d351e09857 --- /dev/null +++ b/test/Transforms/IndVarSimplify/pr26207.ll @@ -0,0 +1,20 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +define void @main(i16 %in) { +; CHECK-LABEL: @main( + br label %bb2 + +bb2: ; preds = %bb1.i, %bb2, %0 + %_tmp44.i = icmp slt i16 %in, 2 + br i1 %_tmp44.i, label %bb1.i, label %bb2 + +bb1.i: ; preds = %bb1.i, %bb2 + %_tmp25.i = phi i16 [ %in, %bb2 ], [ %_tmp6.i, %bb1.i ] + %_tmp6.i = add nsw i16 %_tmp25.i, 1 + %_tmp10.i = icmp sge i16 %_tmp6.i, 2 + %exitcond.i = icmp eq i16 %_tmp6.i, 2 + %or.cond = and i1 %_tmp10.i, %exitcond.i + br i1 %or.cond, label %bb2, label %bb1.i +} From 5cea41550dbba1fa0fe3a1f4a2e58869fe92b67a Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 19 Jan 2016 21:06:38 +0000 Subject: [PATCH 0025/1132] [Orc] Refactor ObjectLinkingLayer::addObjectSet to defer loading objects until they're needed. Prior to this patch objects were loaded (via RuntimeDyld::loadObject) when they were added to the ObjectLinkingLayer, but were not relocated and finalized until a symbol address was requested. In the interim, another object could be loaded and finalized with the same memory manager, causing relocation/finalization of the first object to fail (as the first finalization call may have marked the allocated memory for the first object read-only). By deferring the loadObject call (and subsequent memory allocations) until an object file is needed we can avoid prematurely finalizing memory. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258185 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 1665b573d93b237bf81110a7981282a7ea343bf6) --- include/llvm/ExecutionEngine/JITSymbolFlags.h | 10 + .../llvm/ExecutionEngine/Orc/IRCompileLayer.h | 24 +- .../ExecutionEngine/Orc/ObjectLinkingLayer.h | 255 ++++++++++++------ lib/ExecutionEngine/Orc/OrcMCJITReplacement.h | 24 +- .../Orc/ObjectLinkingLayerTest.cpp | 72 +++++ 5 files changed, 278 insertions(+), 107 deletions(-) diff --git a/include/llvm/ExecutionEngine/JITSymbolFlags.h b/include/llvm/ExecutionEngine/JITSymbolFlags.h index 450e9481fa0..7e1d57dabc8 100644 --- a/include/llvm/ExecutionEngine/JITSymbolFlags.h +++ b/include/llvm/ExecutionEngine/JITSymbolFlags.h @@ -15,6 +15,7 @@ #define LLVM_EXECUTIONENGINE_JITSYMBOLFLAGS_H #include "llvm/IR/GlobalValue.h" +#include "llvm/Object/SymbolicFile.h" namespace llvm { @@ -69,7 +70,16 @@ class JITSymbolBase { if (!GV.hasLocalLinkage() && !GV.hasHiddenVisibility()) Flags |= JITSymbolFlags::Exported; return Flags; + } + static JITSymbolFlags + flagsFromObjectSymbol(const object::BasicSymbolRef &Symbol) { + JITSymbolFlags Flags = JITSymbolFlags::None; + if (Symbol.getFlags() & object::BasicSymbolRef::SF_Weak) + Flags |= JITSymbolFlags::Weak; + if (Symbol.getFlags() & object::BasicSymbolRef::SF_Exported) + Flags |= JITSymbolFlags::Exported; + return Flags; } private: diff --git a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index e4bed95fdab..23ce7e24ad3 100644 --- a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -37,9 +37,6 @@ template class IRCompileLayer { private: typedef typename BaseLayerT::ObjSetHandleT ObjSetHandleT; - typedef std::vector> OwningObjectVec; - typedef std::vector> OwningBufferVec; - public: /// @brief Handle to a set of compiled modules. typedef ObjSetHandleT ModuleSetHandleT; @@ -62,28 +59,29 @@ template class IRCompileLayer { ModuleSetHandleT addModuleSet(ModuleSetT Ms, MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver) { - OwningObjectVec Objects; - OwningBufferVec Buffers; + std::vector>> + Objects; for (const auto &M : Ms) { - std::unique_ptr Object; - std::unique_ptr Buffer; + auto Object = + llvm::make_unique>(); if (ObjCache) - std::tie(Object, Buffer) = tryToLoadFromObjectCache(*M).takeBinary(); + *Object = tryToLoadFromObjectCache(*M); - if (!Object) { - std::tie(Object, Buffer) = Compile(*M).takeBinary(); + if (!Object->getBinary()) { + *Object = Compile(*M); if (ObjCache) - ObjCache->notifyObjectCompiled(&*M, Buffer->getMemBufferRef()); + ObjCache->notifyObjectCompiled(&*M, + Object->getBinary()->getMemoryBufferRef()); } Objects.push_back(std::move(Object)); - Buffers.push_back(std::move(Buffer)); } ModuleSetHandleT H = - BaseLayer.addObjectSet(Objects, std::move(MemMgr), std::move(Resolver)); + BaseLayer.addObjectSet(std::move(Objects), std::move(MemMgr), + std::move(Resolver)); return H; } diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 4dc48f11488..62cac6b1fda 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -26,7 +26,6 @@ namespace orc { class ObjectLinkingLayerBase { protected: - /// @brief Holds a set of objects to be allocated/linked as a unit in the JIT. /// /// An instance of this class will be created for each set of objects added @@ -38,38 +37,32 @@ class ObjectLinkingLayerBase { LinkedObjectSet(const LinkedObjectSet&) = delete; void operator=(const LinkedObjectSet&) = delete; public: - LinkedObjectSet(RuntimeDyld::MemoryManager &MemMgr, - RuntimeDyld::SymbolResolver &Resolver, - bool ProcessAllSections) - : RTDyld(llvm::make_unique(MemMgr, Resolver)), - State(Raw) { - RTDyld->setProcessAllSections(ProcessAllSections); - } - + LinkedObjectSet() = default; virtual ~LinkedObjectSet() {} - std::unique_ptr - addObject(const object::ObjectFile &Obj) { - return RTDyld->loadObject(Obj); + virtual void finalize() = 0; + + virtual JITSymbol::GetAddressFtor + getSymbolMaterializer(std::string Name) = 0; + + virtual void mapSectionAddress(const void *LocalAddress, + TargetAddress TargetAddr) const = 0; + + JITSymbol getSymbol(StringRef Name, bool ExportedSymbolsOnly) { + auto SymEntry = SymbolTable.find(Name); + if (SymEntry == SymbolTable.end()) + return nullptr; + if (!SymEntry->second.isExported() && ExportedSymbolsOnly) + return nullptr; + if (!Finalized) + return JITSymbol(getSymbolMaterializer(Name), + SymEntry->second.getFlags()); + return JITSymbol(SymEntry->second.getAddress(), + SymEntry->second.getFlags()); } - - RuntimeDyld::SymbolInfo getSymbol(StringRef Name) const { - return RTDyld->getSymbol(Name); - } - - bool NeedsFinalization() const { return (State == Raw); } - - virtual void Finalize() = 0; - - void mapSectionAddress(const void *LocalAddress, TargetAddress TargetAddr) { - assert((State != Finalized) && - "Attempting to remap sections for finalized objects."); - RTDyld->mapSectionAddress(LocalAddress, TargetAddr); - } - protected: - std::unique_ptr RTDyld; - enum { Raw, Finalizing, Finalized } State; + StringMap SymbolTable; + bool Finalized = false; }; typedef std::list> LinkedObjectSetListT; @@ -79,6 +72,7 @@ class ObjectLinkingLayerBase { typedef LinkedObjectSetListT::iterator ObjSetHandleT; }; + /// @brief Default (no-op) action to perform when loading objects. class DoNothingOnNotifyLoaded { public: @@ -95,34 +89,124 @@ class DoNothingOnNotifyLoaded { /// symbols. template class ObjectLinkingLayer : public ObjectLinkingLayerBase { +public: + + /// @brief Functor for receiving finalization notifications. + typedef std::function NotifyFinalizedFtor; + private: - template + template class ConcreteLinkedObjectSet : public LinkedObjectSet { public: - ConcreteLinkedObjectSet(MemoryManagerPtrT MemMgr, + ConcreteLinkedObjectSet(ObjSetT Objects, MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver, + FinalizerFtor Finalizer, bool ProcessAllSections) - : LinkedObjectSet(*MemMgr, *Resolver, ProcessAllSections), - MemMgr(std::move(MemMgr)), Resolver(std::move(Resolver)) { } + : MemMgr(std::move(MemMgr)), + PFC(make_unique(std::move(Objects), + std::move(Resolver), + std::move(Finalizer), + ProcessAllSections)) { + buildInitialSymbolTable(PFC->Objects); + } + + void setHandle(ObjSetHandleT H) { + PFC->Handle = H; + } - void Finalize() override { - State = Finalizing; - RTDyld->finalizeWithMemoryManagerLocking(); - State = Finalized; + void finalize() override { + assert(PFC && "mapSectionAddress called on finalized LinkedObjectSet"); + + RuntimeDyld RTDyld(*MemMgr, *PFC->Resolver); + RTDyld.setProcessAllSections(PFC->ProcessAllSections); + PFC->RTDyld = &RTDyld; + + PFC->Finalizer(PFC->Handle, RTDyld, std::move(PFC->Objects), + [&]() { + updateSymbolTable(RTDyld); + Finalized = true; + }); + + // Release resources. + PFC = nullptr; + } + + JITSymbol::GetAddressFtor getSymbolMaterializer(std::string Name) override { + return + [this, Name]() { + // The symbol may be materialized between the creation of this lambda + // and its execution, so we need to double check. + if (!Finalized) + finalize(); + return getSymbol(Name, false).getAddress(); + }; + } + + void mapSectionAddress(const void *LocalAddress, + TargetAddress TargetAddr) const override { + assert(PFC && "mapSectionAddress called on finalized LinkedObjectSet"); + assert(PFC->RTDyld && "mapSectionAddress called on raw LinkedObjectSet"); + PFC->RTDyld->mapSectionAddress(LocalAddress, TargetAddr); } private: + + void buildInitialSymbolTable(const ObjSetT &Objects) { + for (const auto &Obj : Objects) + for (auto &Symbol : getObject(*Obj).symbols()) { + if (Symbol.getFlags() & object::SymbolRef::SF_Undefined) + continue; + ErrorOr SymbolName = Symbol.getName(); + // FIXME: Raise an error for bad symbols. + if (!SymbolName) + continue; + auto Flags = JITSymbol::flagsFromObjectSymbol(Symbol); + SymbolTable.insert( + std::make_pair(*SymbolName, RuntimeDyld::SymbolInfo(0, Flags))); + } + } + + void updateSymbolTable(const RuntimeDyld &RTDyld) { + for (auto &SymEntry : SymbolTable) + SymEntry.second = RTDyld.getSymbol(SymEntry.first()); + } + + // Contains the information needed prior to finalization: the object files, + // memory manager, resolver, and flags needed for RuntimeDyld. + struct PreFinalizeContents { + PreFinalizeContents(ObjSetT Objects, SymbolResolverPtrT Resolver, + FinalizerFtor Finalizer, bool ProcessAllSections) + : Objects(std::move(Objects)), Resolver(std::move(Resolver)), + Finalizer(std::move(Finalizer)), + ProcessAllSections(ProcessAllSections) {} + + ObjSetT Objects; + SymbolResolverPtrT Resolver; + FinalizerFtor Finalizer; + bool ProcessAllSections; + ObjSetHandleT Handle; + RuntimeDyld *RTDyld; + }; + MemoryManagerPtrT MemMgr; - SymbolResolverPtrT Resolver; + std::unique_ptr PFC; }; - template - std::unique_ptr - createLinkedObjectSet(MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver, + template + std::unique_ptr< + ConcreteLinkedObjectSet> + createLinkedObjectSet(ObjSetT Objects, MemoryManagerPtrT MemMgr, + SymbolResolverPtrT Resolver, + FinalizerFtor Finalizer, bool ProcessAllSections) { - typedef ConcreteLinkedObjectSet LOS; - return llvm::make_unique(std::move(MemMgr), std::move(Resolver), + typedef ConcreteLinkedObjectSet LOS; + return llvm::make_unique(std::move(Objects), std::move(MemMgr), + std::move(Resolver), std::move(Finalizer), ProcessAllSections); } @@ -133,9 +217,6 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { typedef std::vector> LoadedObjInfoList; - /// @brief Functor for receiving finalization notifications. - typedef std::function NotifyFinalizedFtor; - /// @brief Construct an ObjectLinkingLayer with the given NotifyLoaded, /// and NotifyFinalized functors. ObjectLinkingLayer( @@ -169,22 +250,39 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { template - ObjSetHandleT addObjectSet(const ObjSetT &Objects, + ObjSetHandleT addObjectSet(ObjSetT Objects, MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver) { - ObjSetHandleT Handle = - LinkedObjSetList.insert( - LinkedObjSetList.end(), - createLinkedObjectSet(std::move(MemMgr), std::move(Resolver), - ProcessAllSections)); - LinkedObjectSet &LOS = **Handle; - LoadedObjInfoList LoadedObjInfos; + auto Finalizer = [&](ObjSetHandleT H, RuntimeDyld &RTDyld, + const ObjSetT &Objs, + std::function LOSHandleLoad) { + LoadedObjInfoList LoadedObjInfos; + + for (auto &Obj : Objs) + LoadedObjInfos.push_back(RTDyld.loadObject(getObject(*Obj))); - for (auto &Obj : Objects) - LoadedObjInfos.push_back(LOS.addObject(*Obj)); + LOSHandleLoad(); - NotifyLoaded(Handle, Objects, LoadedObjInfos); + NotifyLoaded(H, Objs, LoadedObjInfos); + + RTDyld.finalizeWithMemoryManagerLocking(); + + if (NotifyFinalized) + NotifyFinalized(H); + }; + + auto LOS = + createLinkedObjectSet(std::move(Objects), std::move(MemMgr), + std::move(Resolver), std::move(Finalizer), + ProcessAllSections); + // LOS is an owning-ptr. Keep a non-owning one so that we can set the handle + // below. + auto *LOSPtr = LOS.get(); + + ObjSetHandleT Handle = LinkedObjSetList.insert(LinkedObjSetList.end(), + std::move(LOS)); + LOSPtr->setHandle(Handle); return Handle; } @@ -224,33 +322,7 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { /// given object set. JITSymbol findSymbolIn(ObjSetHandleT H, StringRef Name, bool ExportedSymbolsOnly) { - if (auto Sym = (*H)->getSymbol(Name)) { - if (Sym.isExported() || !ExportedSymbolsOnly) { - auto Addr = Sym.getAddress(); - auto Flags = Sym.getFlags(); - if (!(*H)->NeedsFinalization()) { - // If this instance has already been finalized then we can just return - // the address. - return JITSymbol(Addr, Flags); - } else { - // If this instance needs finalization return a functor that will do - // it. The functor still needs to double-check whether finalization is - // required, in case someone else finalizes this set before the - // functor is called. - auto GetAddress = - [this, Addr, H]() { - if ((*H)->NeedsFinalization()) { - (*H)->Finalize(); - if (NotifyFinalized) - NotifyFinalized(H); - } - return Addr; - }; - return JITSymbol(std::move(GetAddress), Flags); - } - } - } - return nullptr; + return (*H)->getSymbol(Name, ExportedSymbolsOnly); } /// @brief Map section addresses for the objects associated with the handle H. @@ -263,12 +335,21 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { /// given handle. /// @param H Handle for object set to emit/finalize. void emitAndFinalize(ObjSetHandleT H) { - (*H)->Finalize(); - if (NotifyFinalized) - NotifyFinalized(H); + (*H)->finalize(); } private: + + static const object::ObjectFile& getObject(const object::ObjectFile &Obj) { + return Obj; + } + + template + static const object::ObjectFile& + getObject(const object::OwningBinary &Obj) { + return *Obj.getBinary(); + } + LinkedObjectSetListT LinkedObjSetList; NotifyLoadedFtor NotifyLoaded; NotifyFinalizedFtor NotifyFinalized; diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index 2ab70a9fee8..896c184d440 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -178,11 +178,10 @@ class OrcMCJITReplacement : public ExecutionEngine { } void addObjectFile(object::OwningBinary O) override { - std::unique_ptr Obj; - std::unique_ptr Buf; - std::tie(Obj, Buf) = O.takeBinary(); - std::vector> Objs; - Objs.push_back(std::move(Obj)); + std::vector>> Objs; + Objs.push_back( + llvm::make_unique>( + std::move(O))); ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver); } @@ -284,12 +283,12 @@ class OrcMCJITReplacement : public ExecutionEngine { class NotifyObjectLoadedT { public: - typedef std::vector> ObjListT; typedef std::vector> LoadedObjInfoListT; NotifyObjectLoadedT(OrcMCJITReplacement &M) : M(M) {} + template void operator()(ObjectLinkingLayerBase::ObjSetHandleT H, const ObjListT &Objects, const LoadedObjInfoListT &Infos) const { @@ -298,10 +297,21 @@ class OrcMCJITReplacement : public ExecutionEngine { assert(Objects.size() == Infos.size() && "Incorrect number of Infos for Objects."); for (unsigned I = 0; I < Objects.size(); ++I) - M.MemMgr.notifyObjectLoaded(&M, *Objects[I]); + M.MemMgr.notifyObjectLoaded(&M, getObject(*Objects[I])); } private: + + static const object::ObjectFile& getObject(const object::ObjectFile &Obj) { + return Obj; + } + + template + static const object::ObjectFile& + getObject(const object::OwningBinary &Obj) { + return *Obj.getBinary(); + } + OrcMCJITReplacement &M; }; diff --git a/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp index c8c4cfb3634..f4267c95be0 100644 --- a/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp +++ b/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp @@ -12,6 +12,7 @@ #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" +#include "llvm/ExecutionEngine/Orc/NullResolver.h" #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" @@ -29,6 +30,13 @@ class ObjectLinkingLayerExecutionTest : public testing::Test, class SectionMemoryManagerWrapper : public SectionMemoryManager { public: int FinalizationCount = 0; + int NeedsToReserveAllocationSpaceCount = 0; + + bool needsToReserveAllocationSpace() override { + ++NeedsToReserveAllocationSpaceCount; + return SectionMemoryManager::needsToReserveAllocationSpace(); + } + bool finalizeMemory(std::string *ErrMsg = 0) override { ++FinalizationCount; return SectionMemoryManager::finalizeMemory(ErrMsg); @@ -178,4 +186,68 @@ TEST_F(ObjectLinkingLayerExecutionTest, NoDuplicateFinalization) { << "Extra call to finalize"; } +TEST_F(ObjectLinkingLayerExecutionTest, NoPrematureAllocation) { + + if (!TM) + return; + + ObjectLinkingLayer<> ObjLayer; + SimpleCompiler Compile(*TM); + + // Create a pair of unrelated modules: + // + // Module 1: + // int foo() { return 42; } + // Module 2: + // int bar() { return 7; } + // + // Both modules will share a memory manager. We want to verify that the + // second object is not loaded before the first one is finalized. To do this + // in a portable way, we abuse the + // RuntimeDyld::MemoryManager::needsToReserveAllocationSpace hook, which is + // called once per object before any sections are allocated. + + ModuleBuilder MB1(getGlobalContext(), "", "dummy"); + { + MB1.getModule()->setDataLayout(TM->createDataLayout()); + Function *BarImpl = MB1.createFunctionDecl("foo"); + BasicBlock *BarEntry = BasicBlock::Create(getGlobalContext(), "entry", + BarImpl); + IRBuilder<> Builder(BarEntry); + IntegerType *Int32Ty = IntegerType::get(getGlobalContext(), 32); + Value *FourtyTwo = ConstantInt::getSigned(Int32Ty, 42); + Builder.CreateRet(FourtyTwo); + } + + auto Obj1 = Compile(*MB1.getModule()); + std::vector Obj1Set; + Obj1Set.push_back(Obj1.getBinary()); + + ModuleBuilder MB2(getGlobalContext(), "", "dummy"); + { + MB2.getModule()->setDataLayout(TM->createDataLayout()); + Function *BarImpl = MB2.createFunctionDecl("bar"); + BasicBlock *BarEntry = BasicBlock::Create(getGlobalContext(), "entry", + BarImpl); + IRBuilder<> Builder(BarEntry); + IntegerType *Int32Ty = IntegerType::get(getGlobalContext(), 32); + Value *Seven = ConstantInt::getSigned(Int32Ty, 7); + Builder.CreateRet(Seven); + } + auto Obj2 = Compile(*MB2.getModule()); + std::vector Obj2Set; + Obj2Set.push_back(Obj2.getBinary()); + + SectionMemoryManagerWrapper SMMW; + NullResolver NR; + auto H = ObjLayer.addObjectSet(std::move(Obj1Set), &SMMW, &NR); + ObjLayer.addObjectSet(std::move(Obj2Set), &SMMW, &NR); + ObjLayer.emitAndFinalize(H); + + // Only one call to needsToReserveAllocationSpace should have been made. + EXPECT_EQ(SMMW.NeedsToReserveAllocationSpaceCount, 1) + << "More than one call to needsToReserveAllocationSpace " + "(multiple unrelated objects loaded prior to finalization)"; +} + } From 054b2c09886efb5be3bf5dbfb02fc4cfdc44f2ed Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 19 Jan 2016 21:13:54 +0000 Subject: [PATCH 0026/1132] [Orc] Fix a stale comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258187 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 08e6e6af3832a258bedbdffb064d8f07b7f07456) --- include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 62cac6b1fda..7e9474b2d3c 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -239,14 +239,8 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { /// @brief Add a set of objects (or archives) that will be treated as a unit /// for the purposes of symbol lookup and memory management. /// - /// @return A pair containing (1) A handle that can be used to free the memory - /// allocated for the objects, and (2) a LoadedObjInfoList containing - /// one LoadedObjInfo instance for each object at the corresponding - /// index in the Objects list. - /// - /// This version of this method allows the client to pass in an - /// RTDyldMemoryManager instance that will be used to allocate memory and look - /// up external symbol addresses for the given objects. + /// @return A handle that can be used to refer to the loaded objects (for + /// symbol searching, finalization, freeing memory, etc.). template From 0a6051eb2fe01a01223a9ba2898b85c5bdbfdf90 Mon Sep 17 00:00:00 2001 From: Xinliang David Li Date: Tue, 19 Jan 2016 21:18:12 +0000 Subject: [PATCH 0027/1132] Fix a coverage reading bug function record pointer is not advanced when duplicate entry is found. Test case to be added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258188 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 306bcccc421284520dde5563e6942b961a5fa900) --- lib/ProfileData/CoverageMappingReader.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp index da14ab23294..fa4d6d66d73 100644 --- a/lib/ProfileData/CoverageMappingReader.cpp +++ b/lib/ProfileData/CoverageMappingReader.cpp @@ -396,8 +396,10 @@ class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader { // function name. This is useful to ignore the redundant records for the // functions with ODR linkage. NameRefType NameRef = CFR->template getFuncNameRef(); - if (!UniqueFunctionMappingData.insert(NameRef).second) + if (!UniqueFunctionMappingData.insert(NameRef).second) { + CFR++; continue; + } StringRef FuncName; if (std::error_code EC = From 2694691a2dc78367f578bcc1ca179177195d5877 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 19 Jan 2016 22:20:21 +0000 Subject: [PATCH 0028/1132] [Orc] #undef a MACRO after I'm done with it. Suggested by Philip Reames in review of r257951. Thanks Philip! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258203 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 4ce7a6cb2567a4ae90a197f335b39d39252b35ad) --- .../Orc/OrcRemoteTargetRPCAPI.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp b/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp index 577c5185989..81e51a83021 100644 --- a/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp +++ b/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp @@ -9,14 +9,14 @@ #include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" -#define PROCNAME(X) \ - case X ## Id: \ - return #X - namespace llvm { namespace orc { namespace remote { +#define PROCNAME(X) \ + case X ## Id: \ + return #X + const char *OrcRemoteTargetRPCAPI::getJITProcIdName(JITProcId Id) { switch (Id) { case InvalidId: @@ -55,6 +55,9 @@ const char *OrcRemoteTargetRPCAPI::getJITProcIdName(JITProcId Id) { }; return nullptr; } -} -} -} + +#undef PROCNAME + +} // end namespace remote +} // end namespace orc +} // end namespace llvm From cce526324622fdeda67bba758bdf0a293e6f2493 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 19 Jan 2016 22:22:43 +0000 Subject: [PATCH 0029/1132] [Orc] Qualify call to make_unique to avoid ambiguity with std::make_unique. This should fix some of the bot failures associated with r258185. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258204 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit f32ef420e6f5b773c79a98554cabf215dc40bc02) --- include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 7e9474b2d3c..10255be221b 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -105,10 +105,10 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { FinalizerFtor Finalizer, bool ProcessAllSections) : MemMgr(std::move(MemMgr)), - PFC(make_unique(std::move(Objects), - std::move(Resolver), - std::move(Finalizer), - ProcessAllSections)) { + PFC(llvm::make_unique(std::move(Objects), + std::move(Resolver), + std::move(Finalizer), + ProcessAllSections)) { buildInitialSymbolTable(PFC->Objects); } From 4410df2ece2090e9ec6eac1a0de2a839d5b9b0ac Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 19 Jan 2016 22:31:01 +0000 Subject: [PATCH 0030/1132] [Orc] Add missing capture to lambda. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258206 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit b9be9b997a34ed8a783011d02eb25baefe1de95d) --- include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 10255be221b..3f7f0e78f69 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -135,7 +135,7 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { JITSymbol::GetAddressFtor getSymbolMaterializer(std::string Name) override { return - [this, Name]() { + [this, Name, &Finalized]() { // The symbol may be materialized between the creation of this lambda // and its execution, so we need to double check. if (!Finalized) From 6719cbed9fa65fce22d92d4f4caa9d97443aa7fb Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Tue, 19 Jan 2016 22:31:12 +0000 Subject: [PATCH 0031/1132] [MachineFunction] Constify getter. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258207 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 871b3df6d926c1e79b4fca532ac9c8e925722185) --- include/llvm/CodeGen/MachineFunction.h | 2 +- lib/CodeGen/MachineFunction.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 82c30d39afd..df7c951743c 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -295,7 +295,7 @@ class MachineFunction { } /// Should we be emitting segmented stack stuff for the function - bool shouldSplitStack(); + bool shouldSplitStack() const; /// getNumBlockIDs - Return the number of MBB ID's allocated. /// diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index ca4bb1c6ad4..f6604f38722 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -163,7 +163,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { } /// Should we be emitting segmented stack stuff for the function -bool MachineFunction::shouldSplitStack() { +bool MachineFunction::shouldSplitStack() const { return getFunction()->hasFnAttribute("split-stack"); } From 39013c31cf956c74e985b68ba92dfb97463319fc Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 19 Jan 2016 22:32:58 +0000 Subject: [PATCH 0032/1132] [Orc] Oops - lambda capture changed in r258206 was correct. Fully qualify reference to Finalized in the body of the lambda instead to work around GCC ICE. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258208 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit b8b874987ec77a9d69689347fc8cb5bd88d94bc9) --- include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 3f7f0e78f69..affb16a1c26 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -135,10 +135,10 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { JITSymbol::GetAddressFtor getSymbolMaterializer(std::string Name) override { return - [this, Name, &Finalized]() { + [this, Name]() { // The symbol may be materialized between the creation of this lambda // and its execution, so we need to double check. - if (!Finalized) + if (!this->Finalized) finalize(); return getSymbol(Name, false).getAddress(); }; From 524969755b493fb4e99dfee01690dd875932b6d2 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Tue, 19 Jan 2016 23:29:03 +0000 Subject: [PATCH 0033/1132] [X86] Do not run shrink-wrapping on function with split-stack attribute or HiPE calling convention. The implementation of the related callbacks in the x86 backend for such functions are not ready to deal with a prologue block that is not the entry block of the function. This fixes PR26107, but the longer term solution would be to fix those callbacks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258221 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 5a15c7dc2ceed39b9719b905f6a65283a3a55e0a) --- lib/Target/X86/X86FrameLowering.cpp | 18 ++++- test/CodeGen/X86/x86-shrink-wrap-unwind.ll | 83 ++++++++++++++++++++-- 2 files changed, 94 insertions(+), 7 deletions(-) diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 4dac80369c4..2a587375afd 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -2031,6 +2031,10 @@ void X86FrameLowering::adjustForSegmentedStacks( unsigned TlsReg, TlsOffset; DebugLoc DL; + // To support shrink-wrapping we would need to insert the new blocks + // at the right place and update the branches to PrologueMBB. + assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet"); + unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); assert(!MF.getRegInfo().isLiveIn(ScratchReg) && "Scratch register is live-in"); @@ -2271,6 +2275,11 @@ void X86FrameLowering::adjustForHiPEPrologue( MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { MachineFrameInfo *MFI = MF.getFrameInfo(); DebugLoc DL; + + // To support shrink-wrapping we would need to insert the new blocks + // at the right place and update the branches to PrologueMBB. + assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet"); + // HiPE-specific values const unsigned HipeLeafWords = 24; const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; @@ -2584,7 +2593,14 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { // If we may need to emit frameless compact unwind information, give // up as this is currently broken: PR25614. - return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF); + return (MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) && + // The lowering of segmented stack and HiPE only support entry blocks + // as prologue blocks: PR26107. + // This limitation may be lifted if we fix: + // - adjustForSegmentedStacks + // - adjustForHiPEPrologue + MF.getFunction()->getCallingConv() != CallingConv::HiPE && + !MF.shouldSplitStack(); } MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( diff --git a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll index 7c00f407b1e..eb87f7101d7 100644 --- a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -1,11 +1,5 @@ ; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK ; -; This test checks that we do not use shrink-wrapping when -; the function does not have any frame pointer and may unwind. -; This is a workaround for a limitation in the emission of -; the CFI directives, that are not correct in such case. -; PR25614 -; ; Note: This test cannot be merged with the shrink-wrapping tests ; because the booleans set on the command line take precedence on ; the target logic that disable shrink-wrapping. @@ -13,6 +7,12 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "x86_64-apple-macosx" +; This test checks that we do not use shrink-wrapping when +; the function does not have any frame pointer and may unwind. +; This is a workaround for a limitation in the emission of +; the CFI directives, that are not correct in such case. +; PR25614 +; ; No shrink-wrapping should occur here, until the CFI information are fixed. ; CHECK-LABEL: framelessUnwind: ; @@ -151,3 +151,74 @@ false: } attributes #2 = { "no-frame-pointer-elim"="false" nounwind } + + +; Check that we generate correct code for segmented stack. +; We used to emit the code at the entry point of the function +; instead of just before the prologue. +; For now, shrink-wrapping is disabled on segmented stack functions: PR26107. +; +; CHECK-LABEL: segmentedStack: +; CHECK: cmpq +; CHECK-NEXT: ja [[ENTRY_LABEL:LBB[0-9_]+]] +; +; CHECK: callq ___morestack +; CHECK-NEXT: retq +; +; CHECK: [[ENTRY_LABEL]]: +; Prologue +; CHECK: push +; +; In PR26107, we use to drop these two basic blocks, because +; the segmentedStack entry block was jumping directly to +; the place where the prologue is actually needed, which is +; the call to memcmp. +; Then, those two basic blocks did not have any predecessors +; anymore and were removed. +; +; Check if vk1 is null +; CHECK: testq %rdi, %rdi +; CHECK-NEXT: je [[STRINGS_EQUAL:LBB[0-9_]+]] +; +; Check if vk2 is null +; CHECK: testq %rsi, %rsi +; CHECK-NEXT: je [[STRINGS_EQUAL]] +; +; CHECK: [[STRINGS_EQUAL]] +; CHECK-NEXT: popq +define zeroext i1 @segmentedStack(i8* readonly %vk1, i8* readonly %vk2, i64 %key_size) #5 { +entry: + %cmp.i = icmp eq i8* %vk1, null + %cmp1.i = icmp eq i8* %vk2, null + %brmerge.i = or i1 %cmp.i, %cmp1.i + %cmp1.mux.i = and i1 %cmp.i, %cmp1.i + br i1 %brmerge.i, label %__go_ptr_strings_equal.exit, label %if.end4.i + +if.end4.i: ; preds = %entry + %tmp = getelementptr inbounds i8, i8* %vk1, i64 8 + %tmp1 = bitcast i8* %tmp to i64* + %tmp2 = load i64, i64* %tmp1, align 8 + %tmp3 = getelementptr inbounds i8, i8* %vk2, i64 8 + %tmp4 = bitcast i8* %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4, align 8 + %cmp.i.i = icmp eq i64 %tmp2, %tmp5 + br i1 %cmp.i.i, label %land.rhs.i.i, label %__go_ptr_strings_equal.exit + +land.rhs.i.i: ; preds = %if.end4.i + %tmp6 = bitcast i8* %vk2 to i8** + %tmp7 = load i8*, i8** %tmp6, align 8 + %tmp8 = bitcast i8* %vk1 to i8** + %tmp9 = load i8*, i8** %tmp8, align 8 + %call.i.i = tail call i32 @memcmp(i8* %tmp9, i8* %tmp7, i64 %tmp2) #5 + %cmp4.i.i = icmp eq i32 %call.i.i, 0 + br label %__go_ptr_strings_equal.exit + +__go_ptr_strings_equal.exit: ; preds = %land.rhs.i.i, %if.end4.i, %entry + %retval.0.i = phi i1 [ %cmp1.mux.i, %entry ], [ false, %if.end4.i ], [ %cmp4.i.i, %land.rhs.i.i ] + ret i1 %retval.0.i +} + +; Function Attrs: nounwind readonly +declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) #5 + +attributes #5 = { nounwind readonly ssp uwtable "split-stack" } From 2d93fbfdc9a195eaaa116deda4d5d9ce7fa3b78f Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Wed, 20 Jan 2016 00:23:21 +0000 Subject: [PATCH 0034/1132] LiveInterval: Add utility class to rename independent subregister usage This renaming is necessary to avoid a subregister aware scheduler accidentally creating liveness "holes" which are rejected by the MachineVerifier. Explanation as found in this patch: Helper class that can divide MachineOperands of a virtual register into equivalence classes of connected components. MachineOperands belong to the same equivalence class when they are part of the same SubRange segment or adjacent segments (adjacent in control flow); Different subranges affected by the same MachineOperand belong to the same equivalence class. Example: vreg0:sub0 = ... vreg0:sub1 = ... vreg0:sub2 = ... ... xxx = op vreg0:sub1 vreg0:sub1 = ... store vreg0:sub0_sub1 The example contains 3 different equivalence classes: - One for the (dead) vreg0:sub2 definition - One containing the first vreg0:sub1 definition and its use, but not the second definition! - The remaining class contains all other operands involving vreg0. We provide a utility function here to rename disjunct classes to different virtual registers. Differential Revision: http://reviews.llvm.org/D16126 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258257 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 1c6737efbcd0087aea2ecfcbbbb659dd99bd1e26) --- include/llvm/CodeGen/LiveInterval.h | 69 ++++++++ include/llvm/CodeGen/LiveIntervalAnalysis.h | 5 + lib/CodeGen/LiveInterval.cpp | 183 ++++++++++++++++++++ lib/CodeGen/LiveIntervalAnalysis.cpp | 16 ++ 4 files changed, 273 insertions(+) diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index edade3164a3..906ab9ff438 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -864,5 +864,74 @@ namespace llvm { MachineRegisterInfo &MRI); }; + /// Helper class that can divide MachineOperands of a virtual register into + /// equivalence classes of connected components. + /// MachineOperands belong to the same equivalence class when they are part of + /// the same SubRange segment or adjacent segments (adjacent in control + /// flow); Different subranges affected by the same MachineOperand belong to + /// the same equivalence class. + /// + /// Example: + /// vreg0:sub0 = ... + /// vreg0:sub1 = ... + /// vreg0:sub2 = ... + /// ... + /// xxx = op vreg0:sub1 + /// vreg0:sub1 = ... + /// store vreg0:sub0_sub1 + /// + /// The example contains 3 different equivalence classes: + /// - One for the (dead) vreg0:sub2 definition + /// - One containing the first vreg0:sub1 definition and its use, + /// but not the second definition! + /// - The remaining class contains all other operands involving vreg0. + /// + /// We provide a utility function here to rename disjunct classes to different + /// virtual registers. + class ConnectedSubRegClasses { + LiveIntervals &LIS; + MachineRegisterInfo &MRI; + + public: + ConnectedSubRegClasses(LiveIntervals &LIS, MachineRegisterInfo &MRI) + : LIS(LIS), MRI(MRI) {} + + /// Split unrelated subregister components and rename them to new vregs. + void renameComponents(LiveInterval &LI) const; + + private: + struct SubRangeInfo { + ConnectedVNInfoEqClasses ConEQ; + LiveInterval::SubRange *SR; + unsigned Index; + + SubRangeInfo(LiveIntervals &LIS, LiveInterval::SubRange &SR, + unsigned Index) + : ConEQ(LIS), SR(&SR), Index(Index) {} + }; + + /// \brief Build a vector of SubRange infos and a union find set of + /// equivalence classes. + /// Returns true if more than 1 equivalence class was found. + bool findComponents(IntEqClasses &Classes, + SmallVectorImpl &SubRangeInfos, + LiveInterval &LI) const; + + /// \brief Distribute the LiveInterval segments into the new LiveIntervals + /// belonging to their class. + void distribute(const IntEqClasses &Classes, + const SmallVectorImpl &SubRangeInfos, + const SmallVectorImpl &Intervals) const; + + /// \brief Constructs main liverange and add missing undef+dead flags. + void computeMainRangesFixFlags(const IntEqClasses &Classes, + const SmallVectorImpl &SubRangeInfos, + const SmallVectorImpl &Intervals) const; + + /// Rewrite Machine Operands to use the new vreg belonging to their class. + void rewriteOperands(const IntEqClasses &Classes, + const SmallVectorImpl &SubRangeInfos, + const SmallVectorImpl &Intervals) const; + }; } #endif diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index 87421e2f83b..07ab1bba714 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -406,6 +406,11 @@ extern cl::opt UseSegmentSetForPhysRegs; void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl &SplitLIs); + /// Assure dead subregister definitions have their own vreg assigned. + /// This calls ConnectedSubRegClasses::splitSeparateSubRegComponent() + /// on each virtual register. + void renameDisconnectedComponents(); + private: /// Compute live intervals for all virtual registers. void computeVirtRegs(); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index bb3488348f2..5574a813c6a 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -1466,3 +1466,186 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], // Distribute main liverange. DistributeRange(LI, LIV, EqClass); } + +void ConnectedSubRegClasses::renameComponents(LiveInterval &LI) const { + // Shortcut: We cannot have split components with a single definition. + if (LI.valnos.size() < 2) + return; + + SmallVector SubRangeInfos; + IntEqClasses Classes; + if (!findComponents(Classes, SubRangeInfos, LI)) + return; + + // Create a new VReg for each class. + unsigned Reg = LI.reg; + const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); + SmallVector Intervals; + Intervals.push_back(&LI); + for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses; + ++I) { + unsigned NewVReg = MRI.createVirtualRegister(RegClass); + LiveInterval &NewLI = LIS.createEmptyInterval(NewVReg); + Intervals.push_back(&NewLI); + } + + rewriteOperands(Classes, SubRangeInfos, Intervals); + distribute(Classes, SubRangeInfos, Intervals); + computeMainRangesFixFlags(Classes, SubRangeInfos, Intervals); +} + +bool ConnectedSubRegClasses::findComponents(IntEqClasses &Classes, + SmallVectorImpl &SubRangeInfos, + LiveInterval &LI) const { + // First step: Create connected components for the VNInfos inside the + // subranges and count the global number of such components. + unsigned NumComponents = 0; + for (LiveInterval::SubRange &SR : LI.subranges()) { + SubRangeInfos.push_back(SubRangeInfo(LIS, SR, NumComponents)); + ConnectedVNInfoEqClasses &ConEQ = SubRangeInfos.back().ConEQ; + + unsigned NumSubComponents = ConEQ.Classify(SR); + NumComponents += NumSubComponents; + } + // Shortcut: With only 1 subrange, the normal separate component tests are + // enough and we do not need to perform the union-find on the subregister + // segments. + if (SubRangeInfos.size() < 2) + return false; + + // Next step: Build union-find structure over all subranges and merge classes + // across subranges when they are affected by the same MachineOperand. + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + Classes.grow(NumComponents); + unsigned Reg = LI.reg; + for (const MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) { + if (!MO.isDef() && !MO.readsReg()) + continue; + unsigned SubRegIdx = MO.getSubReg(); + LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx); + unsigned MergedID = ~0u; + for (auto &SRInfo : SubRangeInfos) { + const LiveInterval::SubRange &SR = *SRInfo.SR; + if ((SR.LaneMask & LaneMask) == 0) + continue; + SlotIndex Pos = LIS.getInstructionIndex(MO.getParent()); + Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber()) + : Pos.getBaseIndex(); + const VNInfo *VNI = SR.getVNInfoAt(Pos); + if (VNI == nullptr) + continue; + + // Map to local representant ID. + unsigned LocalID = SRInfo.ConEQ.getEqClass(VNI); + // Global ID + unsigned ID = LocalID + SRInfo.Index; + // Merge other sets + MergedID = MergedID == ~0u ? ID : Classes.join(MergedID, ID); + } + } + + // Early exit if we ended up with a single equivalence class. + Classes.compress(); + unsigned NumClasses = Classes.getNumClasses(); + return NumClasses > 1; +} + +void ConnectedSubRegClasses::rewriteOperands(const IntEqClasses &Classes, + const SmallVectorImpl &SubRangeInfos, + const SmallVectorImpl &Intervals) const { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + unsigned Reg = Intervals[0]->reg;; + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(Reg), + E = MRI.reg_nodbg_end(); I != E; ) { + MachineOperand &MO = *I++; + if (!MO.isDef() && !MO.readsReg()) + continue; + + MachineInstr &MI = *MO.getParent(); + + SlotIndex Pos = LIS.getInstructionIndex(&MI); + unsigned SubRegIdx = MO.getSubReg(); + LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx); + + unsigned ID = ~0u; + for (auto &SRInfo : SubRangeInfos) { + const LiveInterval::SubRange &SR = *SRInfo.SR; + if ((SR.LaneMask & LaneMask) == 0) + continue; + LiveRange::const_iterator I = SR.find(Pos); + if (I == SR.end()) + continue; + + const VNInfo &VNI = *I->valno; + // Map to local representant ID. + unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI); + // Global ID + ID = Classes[LocalID + SRInfo.Index]; + break; + } + + unsigned VReg = Intervals[ID]->reg; + MO.setReg(VReg); + } +} + +void ConnectedSubRegClasses::distribute(const IntEqClasses &Classes, + const SmallVectorImpl &SubRangeInfos, + const SmallVectorImpl &Intervals) const { + unsigned NumClasses = Classes.getNumClasses(); + SmallVector VNIMapping; + SmallVector SubRanges; + BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + for (auto &SRInfo : SubRangeInfos) { + LiveInterval::SubRange &SR = *SRInfo.SR; + unsigned NumValNos = SR.valnos.size(); + VNIMapping.clear(); + VNIMapping.reserve(NumValNos); + SubRanges.clear(); + SubRanges.resize(NumClasses-1, nullptr); + for (unsigned I = 0; I < NumValNos; ++I) { + const VNInfo &VNI = *SR.valnos[I]; + unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI); + unsigned ID = Classes[LocalID + SRInfo.Index]; + VNIMapping.push_back(ID); + if (ID > 0 && SubRanges[ID-1] == nullptr) + SubRanges[ID-1] = Intervals[ID]->createSubRange(Allocator, SR.LaneMask); + } + DistributeRange(SR, SubRanges.data(), VNIMapping); + } +} + +void ConnectedSubRegClasses::computeMainRangesFixFlags( + const IntEqClasses &Classes, + const SmallVectorImpl &SubRangeInfos, + const SmallVectorImpl &Intervals) const { + BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + for (size_t I = 0, E = Intervals.size(); I < E; ++I) { + LiveInterval *LI = Intervals[I]; + LI->removeEmptySubRanges(); + if (I == 0) + LI->clear(); + LI->constructMainRangeFromSubranges(*LIS.getSlotIndexes(), Allocator); + + for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) { + if (!MO.isDef()) + continue; + unsigned SubRegIdx = MO.getSubReg(); + if (SubRegIdx == 0) + continue; + // After assigning the new vreg we may not have any other sublanes living + // in and out of the instruction anymore. We need to add new dead and kill + // flags in these cases. + if (!MO.isUndef()) { + SlotIndex Pos = LIS.getInstructionIndex(MO.getParent()); + if (!LI->liveAt(Pos.getBaseIndex())) + MO.setIsUndef(); + } + if (!MO.isDead()) { + SlotIndex Pos = LIS.getInstructionIndex(MO.getParent()); + if (!LI->liveAt(Pos.getDeadSlot())) + MO.setIsDead(); + } + } + } +} diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index a506e0571c0..a6dd48913dd 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1459,3 +1459,19 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, } ConEQ.Distribute(LI, SplitLIs.data(), *MRI); } + +void LiveIntervals::renameDisconnectedComponents() { + ConnectedSubRegClasses SubRegClasses(*this, *MRI); + + // Iterate over all vregs. Note that we query getNumVirtRegs() the newly + // created vregs end up with higher numbers but do not need to be visited as + // there can't be any further splitting. + for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + LiveInterval *LI = VirtRegIntervals[Reg]; + if (LI == nullptr || !LI->hasSubRanges()) + continue; + + SubRegClasses.renameComponents(*LI); + } +} From 0146a3b1f4eaaea2ef00b4dea78cb7ab2c2152d7 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Wed, 20 Jan 2016 00:23:26 +0000 Subject: [PATCH 0035/1132] RegisterPressure: Make liveness tracking subregister aware Differential Revision: http://reviews.llvm.org/D14968 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258258 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 051b30e8e2eb0e92951aca3bcce8819922ac2578) --- include/llvm/CodeGen/MachineScheduler.h | 2 +- include/llvm/CodeGen/RegisterPressure.h | 121 +++-- lib/CodeGen/MachineScheduler.cpp | 24 +- lib/CodeGen/RegisterPressure.cpp | 593 ++++++++++++++++------- lib/CodeGen/ScheduleDAGInstrs.cpp | 2 +- lib/Target/AMDGPU/SIMachineScheduler.cpp | 9 +- lib/Target/AMDGPU/SIMachineScheduler.h | 8 +- 7 files changed, 540 insertions(+), 219 deletions(-) diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 358fd5a3732..ce9a327c8e5 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -462,7 +462,7 @@ class ScheduleDAGMILive : public ScheduleDAGMI { void initRegPressure(); - void updatePressureDiffs(ArrayRef LiveUses); + void updatePressureDiffs(ArrayRef LiveUses); void updateScheduledPressure(const SUnit *SU, const std::vector &NewMaxPressure); diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index 9bbdf3e071b..9fdb73662cf 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -26,14 +26,22 @@ class LiveRange; class RegisterClassInfo; class MachineInstr; +struct RegisterMaskPair { + unsigned RegUnit; ///< Virtual register or register unit. + LaneBitmask LaneMask; + + RegisterMaskPair(unsigned RegUnit, LaneBitmask LaneMask) + : RegUnit(RegUnit), LaneMask(LaneMask) {} +}; + /// Base class for register pressure results. struct RegisterPressure { /// Map of max reg pressure indexed by pressure set ID, not class ID. std::vector MaxSetPressure; /// List of live in virtual registers or physical register units. - SmallVector LiveInRegs; - SmallVector LiveOutRegs; + SmallVector LiveInRegs; + SmallVector LiveOutRegs; void dump(const TargetRegisterInfo *TRI) const; }; @@ -144,23 +152,30 @@ class PressureDiff { /// List of registers defined and used by a machine instruction. class RegisterOperands { public: - /// List of virtual regiserts and register units read by the instruction. - SmallVector Uses; + /// List of virtual registers and register units read by the instruction. + SmallVector Uses; /// \brief List of virtual registers and register units defined by the /// instruction which are not dead. - SmallVector Defs; + SmallVector Defs; /// \brief List of virtual registers and register units defined by the /// instruction but dead. - SmallVector DeadDefs; + SmallVector DeadDefs; /// Analyze the given instruction \p MI and fill in the Uses, Defs and /// DeadDefs list based on the MachineOperand flags. void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, - const MachineRegisterInfo &MRI, bool IgnoreDead = false); + const MachineRegisterInfo &MRI, bool TrackLaneMasks, + bool IgnoreDead); /// Use liveness information to find dead defs not marked with a dead flag /// and move them to the DeadDefs vector. void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS); + + /// Use liveness information to find out which uses/defs are partially + /// undefined/dead and adjust the RegisterMaskPairs accordingly. + void adjustLaneLiveness(const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, SlotIndex Pos); + }; /// Array of PressureDiffs. @@ -225,7 +240,20 @@ struct RegPressureDelta { /// and virtual register indexes to an index usable by the sparse set. class LiveRegSet { private: - SparseSet Regs; + struct IndexMaskPair { + unsigned Index; + LaneBitmask LaneMask; + + IndexMaskPair(unsigned Index, LaneBitmask LaneMask) + : Index(Index), LaneMask(LaneMask) {} + + unsigned getSparseSetIndex() const { + return Index; + } + }; + + typedef SparseSet RegSet; + RegSet Regs; unsigned NumRegUnits; unsigned getSparseIndexFromReg(unsigned Reg) const { @@ -244,19 +272,37 @@ class LiveRegSet { void clear(); void init(const MachineRegisterInfo &MRI); - bool contains(unsigned Reg) const { + LaneBitmask contains(unsigned Reg) const { unsigned SparseIndex = getSparseIndexFromReg(Reg); - return Regs.count(SparseIndex); + RegSet::const_iterator I = Regs.find(SparseIndex); + if (I == Regs.end()) + return 0; + return I->LaneMask; } - bool insert(unsigned Reg) { - unsigned SparseIndex = getSparseIndexFromReg(Reg); - return Regs.insert(SparseIndex).second; + /// Mark the \p Pair.LaneMask lanes of \p Pair.Reg as live. + /// Returns the previously live lanes of \p Pair.Reg. + LaneBitmask insert(RegisterMaskPair Pair) { + unsigned SparseIndex = getSparseIndexFromReg(Pair.RegUnit); + auto InsertRes = Regs.insert(IndexMaskPair(SparseIndex, Pair.LaneMask)); + if (!InsertRes.second) { + unsigned PrevMask = InsertRes.first->LaneMask; + InsertRes.first->LaneMask |= Pair.LaneMask; + return PrevMask; + } + return 0; } - bool erase(unsigned Reg) { - unsigned SparseIndex = getSparseIndexFromReg(Reg); - return Regs.erase(SparseIndex); + /// Clears the \p Pair.LaneMask lanes of \p Pair.Reg (mark them as dead). + /// Returns the previously live lanes of \p Pair.Reg. + LaneBitmask erase(RegisterMaskPair Pair) { + unsigned SparseIndex = getSparseIndexFromReg(Pair.RegUnit); + RegSet::iterator I = Regs.find(SparseIndex); + if (I == Regs.end()) + return 0; + unsigned PrevMask = I->LaneMask; + I->LaneMask &= ~Pair.LaneMask; + return PrevMask; } size_t size() const { @@ -265,9 +311,10 @@ class LiveRegSet { template void appendTo(ContainerT &To) const { - for (unsigned I : Regs) { - unsigned Reg = getRegFromSparseIndex(I); - To.push_back(Reg); + for (const IndexMaskPair &P : Regs) { + unsigned Reg = getRegFromSparseIndex(P.Index); + if (P.LaneMask != 0) + To.push_back(RegisterMaskPair(Reg, P.LaneMask)); } } }; @@ -308,6 +355,9 @@ class RegPressureTracker { /// True if UntiedDefs will be populated. bool TrackUntiedDefs; + /// True if lanemasks should be tracked. + bool TrackLaneMasks; + /// Register pressure corresponds to liveness before this instruction /// iterator. It may point to the end of the block or a DebugValue rather than /// an instruction. @@ -327,23 +377,23 @@ class RegPressureTracker { public: RegPressureTracker(IntervalPressure &rp) : MF(nullptr), TRI(nullptr), RCI(nullptr), LIS(nullptr), MBB(nullptr), P(rp), - RequireIntervals(true), TrackUntiedDefs(false) {} + RequireIntervals(true), TrackUntiedDefs(false), TrackLaneMasks(false) {} RegPressureTracker(RegionPressure &rp) : MF(nullptr), TRI(nullptr), RCI(nullptr), LIS(nullptr), MBB(nullptr), P(rp), - RequireIntervals(false), TrackUntiedDefs(false) {} + RequireIntervals(false), TrackUntiedDefs(false), TrackLaneMasks(false) {} void reset(); void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, - bool ShouldTrackUntiedDefs = false); + bool TrackLaneMasks, bool TrackUntiedDefs); /// Force liveness of virtual registers or physical register /// units. Particularly useful to initialize the livein/out state of the /// tracker before the first call to advance/recede. - void addLiveRegs(ArrayRef Regs); + void addLiveRegs(ArrayRef Regs); /// Get the MI position corresponding to this register pressure. MachineBasicBlock::const_iterator getPos() const { return CurrPos; } @@ -355,14 +405,14 @@ class RegPressureTracker { void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; } /// Recede across the previous instruction. - void recede(SmallVectorImpl *LiveUses = nullptr); + void recede(SmallVectorImpl *LiveUses = nullptr); /// Recede across the previous instruction. /// This "low-level" variant assumes that recedeSkipDebugValues() was /// called previously and takes precomputed RegisterOperands for the /// instruction. void recede(const RegisterOperands &RegOpers, - SmallVectorImpl *LiveUses = nullptr); + SmallVectorImpl *LiveUses = nullptr); /// Recede until we find an instruction which is not a DebugValue. void recedeSkipDebugValues(); @@ -469,18 +519,31 @@ class RegPressureTracker { void dump() const; protected: - void discoverLiveOut(unsigned Reg); - void discoverLiveIn(unsigned Reg); + /// Add Reg to the live out set and increase max pressure. + void discoverLiveOut(RegisterMaskPair Pair); + /// Add Reg to the live in set and increase max pressure. + void discoverLiveIn(RegisterMaskPair Pair); /// \brief Get the SlotIndex for the first nondebug instruction including or /// after the current position. SlotIndex getCurrSlot() const; - void increaseRegPressure(ArrayRef Regs); - void decreaseRegPressure(ArrayRef Regs); + void increaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask, + LaneBitmask NewMask); + void decreaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask, + LaneBitmask NewMask); + + void bumpDeadDefs(ArrayRef DeadDefs); void bumpUpwardPressure(const MachineInstr *MI); void bumpDownwardPressure(const MachineInstr *MI); + + void discoverLiveInOrOut(RegisterMaskPair Pair, + SmallVectorImpl &LiveInOrOut); + + LaneBitmask getLastUsedLanes(unsigned RegUnit, SlotIndex Pos) const; + LaneBitmask getLiveLanesAt(unsigned RegUnit, SlotIndex Pos) const; + LaneBitmask getLiveThroughAt(unsigned RegUnit, SlotIndex Pos) const; }; void dumpRegSetPressure(ArrayRef SetPressure, diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index bcee15c7c75..fa8e5ba190b 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -874,8 +874,8 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, // Setup the register pressure trackers for the top scheduled top and bottom // scheduled regions. void ScheduleDAGMILive::initRegPressure() { - TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); - BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false); + BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, false, false); // Close the RPTracker to finalize live ins. RPTracker.closeRegion(); @@ -905,7 +905,7 @@ void ScheduleDAGMILive::initRegPressure() { // Account for liveness generated by the region boundary. if (LiveRegionEnd != RegionEnd) { - SmallVector LiveUses; + SmallVector LiveUses; BotRPTracker.recede(&LiveUses); updatePressureDiffs(LiveUses); } @@ -969,10 +969,12 @@ updateScheduledPressure(const SUnit *SU, /// Update the PressureDiff array for liveness after scheduling this /// instruction. -void ScheduleDAGMILive::updatePressureDiffs(ArrayRef LiveUses) { - for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { +void ScheduleDAGMILive::updatePressureDiffs( + ArrayRef LiveUses) { + for (const RegisterMaskPair &P : LiveUses) { /// FIXME: Currently assuming single-use physregs. - unsigned Reg = LiveUses[LUIdx]; + unsigned Reg = P.RegUnit; + assert(P.LaneMask != 0); DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); if (!TRI->isVirtualRegister(Reg)) continue; @@ -1111,7 +1113,7 @@ void ScheduleDAGMILive::buildDAGWithRegPressure() { // Initialize the register pressure tracker used by buildSchedGraph. RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, - /*TrackUntiedDefs=*/true); + false, /*TrackUntiedDefs=*/true); // Account for liveness generate by the region boundary. if (LiveRegionEnd != RegionEnd) @@ -1167,10 +1169,8 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { unsigned MaxCyclicLatency = 0; // Visit each live out vreg def to find def/use pairs that cross iterations. - ArrayRef LiveOuts = RPTracker.getPressure().LiveOutRegs; - for (ArrayRef::iterator RI = LiveOuts.begin(), RE = LiveOuts.end(); - RI != RE; ++RI) { - unsigned Reg = *RI; + for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { + unsigned Reg = P.RegUnit; if (!TRI->isVirtualRegister(Reg)) continue; const LiveInterval &LI = LIS->getInterval(Reg); @@ -1265,7 +1265,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { } if (ShouldTrackPressure) { // Update bottom scheduled pressure. - SmallVector LiveUses; + SmallVector LiveUses; BotRPTracker.recede(&LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); DEBUG( diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index f33dc3e1049..78a766ed13b 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -24,7 +24,13 @@ using namespace llvm; /// Increase pressure for each pressure set provided by TargetRegisterInfo. static void increaseSetPressure(std::vector &CurrSetPressure, - PSetIterator PSetI) { + const MachineRegisterInfo &MRI, unsigned Reg, + LaneBitmask PrevMask, LaneBitmask NewMask) { + assert((PrevMask & ~NewMask) == 0 && "Must not remove bits"); + if (PrevMask != 0 || NewMask == 0) + return; + + PSetIterator PSetI = MRI.getPressureSets(Reg); unsigned Weight = PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) CurrSetPressure[*PSetI] += Weight; @@ -32,7 +38,13 @@ static void increaseSetPressure(std::vector &CurrSetPressure, /// Decrease pressure for each pressure set provided by TargetRegisterInfo. static void decreaseSetPressure(std::vector &CurrSetPressure, - PSetIterator PSetI) { + const MachineRegisterInfo &MRI, unsigned Reg, + LaneBitmask PrevMask, LaneBitmask NewMask) { + assert((NewMask & !PrevMask) == 0 && "Must not add bits"); + if (NewMask != 0 || PrevMask == 0) + return; + + PSetIterator PSetI = MRI.getPressureSets(Reg); unsigned Weight = PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) { assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow"); @@ -59,12 +71,20 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; dumpRegSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; - for (unsigned Reg : LiveInRegs) - dbgs() << PrintVRegOrUnit(Reg, TRI) << " "; + for (const RegisterMaskPair &P : LiveInRegs) { + dbgs() << PrintVRegOrUnit(P.RegUnit, TRI); + if (P.LaneMask != ~0u) + dbgs() << ':' << PrintLaneMask(P.LaneMask); + dbgs() << ' '; + } dbgs() << '\n'; dbgs() << "Live Out: "; - for (unsigned Reg : LiveOutRegs) - dbgs() << PrintVRegOrUnit(Reg, TRI) << " "; + for (const RegisterMaskPair &P : LiveOutRegs) { + dbgs() << PrintVRegOrUnit(P.RegUnit, TRI); + if (P.LaneMask != ~0u) + dbgs() << ':' << PrintLaneMask(P.LaneMask); + dbgs() << ' '; + } dbgs() << '\n'; } @@ -89,24 +109,25 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const { dbgs() << '\n'; } -/// Increase the current pressure as impacted by these registers and bump -/// the high water mark if needed. -void RegPressureTracker::increaseRegPressure(ArrayRef RegUnits) { - for (unsigned RegUnit : RegUnits) { - PSetIterator PSetI = MRI->getPressureSets(RegUnit); - unsigned Weight = PSetI.getWeight(); - for (; PSetI.isValid(); ++PSetI) { - CurrSetPressure[*PSetI] += Weight; - P.MaxSetPressure[*PSetI] = - std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]); - } +void RegPressureTracker::increaseRegPressure(unsigned RegUnit, + LaneBitmask PreviousMask, + LaneBitmask NewMask) { + if (PreviousMask != 0 || NewMask == 0) + return; + + PSetIterator PSetI = MRI->getPressureSets(RegUnit); + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + CurrSetPressure[*PSetI] += Weight; + P.MaxSetPressure[*PSetI] = + std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]); } } -/// Simply decrease the current pressure as impacted by these registers. -void RegPressureTracker::decreaseRegPressure(ArrayRef RegUnits) { - for (unsigned RegUnit : RegUnits) - decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnit)); +void RegPressureTracker::decreaseRegPressure(unsigned RegUnit, + LaneBitmask PreviousMask, + LaneBitmask NewMask) { + decreaseSetPressure(CurrSetPressure, *MRI, RegUnit, PreviousMask, NewMask); } /// Clear the result so it can be used for another round of pressure tracking. @@ -201,8 +222,7 @@ void RegPressureTracker::init(const MachineFunction *mf, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, - bool ShouldTrackUntiedDefs) -{ + bool TrackLaneMasks, bool TrackUntiedDefs) { reset(); MF = mf; @@ -210,7 +230,8 @@ void RegPressureTracker::init(const MachineFunction *mf, RCI = rci; MRI = &MF->getRegInfo(); MBB = mbb; - TrackUntiedDefs = ShouldTrackUntiedDefs; + this->TrackUntiedDefs = TrackUntiedDefs; + this->TrackLaneMasks = TrackLaneMasks; if (RequireIntervals) { assert(lis && "IntervalPressure requires LiveIntervals"); @@ -297,20 +318,92 @@ void RegPressureTracker::closeRegion() { void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0); assert(isBottomClosed() && "need bottom-up tracking to intialize."); - for (unsigned Reg : P.LiveOutRegs) { - if (TargetRegisterInfo::isVirtualRegister(Reg) - && !RPTracker.hasUntiedDef(Reg)) { - increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg)); - } + for (const RegisterMaskPair &Pair : P.LiveOutRegs) { + unsigned RegUnit = Pair.RegUnit; + if (TargetRegisterInfo::isVirtualRegister(RegUnit) + && !RPTracker.hasUntiedDef(RegUnit)) + increaseSetPressure(LiveThruPressure, *MRI, RegUnit, 0, Pair.LaneMask); } } -/// \brief Convenient wrapper for checking membership in RegisterOperands. -/// (std::count() doesn't have an early exit). -static bool containsReg(ArrayRef RegUnits, unsigned RegUnit) { - return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end(); +static unsigned getRegLanes(ArrayRef RegUnits, + unsigned RegUnit) { + auto I = std::find_if(RegUnits.begin(), RegUnits.end(), + [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); + if (I == RegUnits.end()) + return 0; + return I->LaneMask; +} + +static void addRegLanes(SmallVectorImpl &RegUnits, + RegisterMaskPair Pair) { + unsigned RegUnit = Pair.RegUnit; + assert(Pair.LaneMask != 0); + auto I = std::find_if(RegUnits.begin(), RegUnits.end(), + [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); + if (I == RegUnits.end()) { + RegUnits.push_back(Pair); + } else { + I->LaneMask |= Pair.LaneMask; + } } +static void removeRegLanes(SmallVectorImpl &RegUnits, + RegisterMaskPair Pair) { + unsigned RegUnit = Pair.RegUnit; + assert(Pair.LaneMask != 0); + auto I = std::find_if(RegUnits.begin(), RegUnits.end(), + [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); + if (I != RegUnits.end()) { + I->LaneMask &= ~Pair.LaneMask; + if (I->LaneMask == 0) + RegUnits.erase(I); + } +} + +static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit, + SlotIndex Pos, + bool(*Property)(const LiveRange &LR, SlotIndex Pos)) { + if (TargetRegisterInfo::isVirtualRegister(RegUnit)) { + const LiveInterval &LI = LIS.getInterval(RegUnit); + LaneBitmask Result = 0; + if (TrackLaneMasks && LI.hasSubRanges()) { + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if (Property(SR, Pos)) + Result |= SR.LaneMask; + } + } else if (Property(LI, Pos)) + Result = MRI.getMaxLaneMaskForVReg(RegUnit); + + return Result; + } else { + const LiveRange *LR = LIS.getCachedRegUnit(RegUnit); + // Be prepared for missing liveranges: We usually do not compute liveranges + // for physical registers on targets with many registers (GPUs). + if (LR == nullptr) + return 0; + return Property(*LR, Pos) ? ~0u : 0; + } +} + +static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, + bool TrackLaneMasks, unsigned RegUnit, + SlotIndex Pos) { + return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos, + [](const LiveRange &LR, SlotIndex Pos) { + return LR.liveAt(Pos); + }); +} + + namespace { /// Collect this instruction's unique uses and defs into SmallVectors for @@ -321,23 +414,23 @@ class RegisterOperandsCollector { RegisterOperands &RegOpers; const TargetRegisterInfo &TRI; const MachineRegisterInfo &MRI; + bool TrackLaneMasks; bool IgnoreDead; RegisterOperandsCollector(RegisterOperands &RegOpers, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, - bool IgnoreDead) - : RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {} + bool TrackLaneMasks, bool IgnoreDead) + : RegOpers(RegOpers), TRI(TRI), MRI(MRI), + TrackLaneMasks(TrackLaneMasks), IgnoreDead(IgnoreDead) {} void collectInstr(const MachineInstr &MI) const { for (ConstMIBundleOperands OperI(&MI); OperI.isValid(); ++OperI) collectOperand(*OperI); // Remove redundant physreg dead defs. - SmallVectorImpl::iterator I = - std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), - std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); - RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); + for (const RegisterMaskPair &P : RegOpers.Defs) + removeRegLanes(RegOpers.DeadDefs, P); } /// Push this operand's register onto the correct vectors. @@ -345,28 +438,39 @@ class RegisterOperandsCollector { if (!MO.isReg() || !MO.getReg()) return; unsigned Reg = MO.getReg(); - if (MO.readsReg()) - pushRegUnits(Reg, RegOpers.Uses); - if (MO.isDef()) { + unsigned SubRegIdx = MO.getSubReg(); + if (MO.isUse()) { + if (!MO.isUndef() && !MO.isInternalRead()) + pushRegUnits(Reg, SubRegIdx, RegOpers.Uses); + } else { + assert(MO.isDef()); + if (MO.isUndef()) { + // Treat read-undef subreg defs as definitions of the whole register. + SubRegIdx = 0; + } else if (!TrackLaneMasks && SubRegIdx != 0 && !MO.isInternalRead()) { + // Interpret the subregister def as read-modify-store: A use+def of the + // full register. + pushRegUnits(Reg, SubRegIdx, RegOpers.Uses); + } + if (MO.isDead()) { if (!IgnoreDead) - pushRegUnits(Reg, RegOpers.DeadDefs); + pushRegUnits(Reg, SubRegIdx, RegOpers.DeadDefs); } else - pushRegUnits(Reg, RegOpers.Defs); + pushRegUnits(Reg, SubRegIdx, RegOpers.Defs); } } - void pushRegUnits(unsigned Reg, SmallVectorImpl &RegUnits) const { + void pushRegUnits(unsigned Reg, unsigned SubRegIdx, + SmallVectorImpl &RegUnits) const { if (TargetRegisterInfo::isVirtualRegister(Reg)) { - if (containsReg(RegUnits, Reg)) - return; - RegUnits.push_back(Reg); + LaneBitmask LaneMask = TrackLaneMasks && SubRegIdx != 0 + ? TRI.getSubRegIndexLaneMask(SubRegIdx) + : MRI.getMaxLaneMaskForVReg(Reg); + addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask)); } else if (MRI.isAllocatable(Reg)) { - for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { - if (containsReg(RegUnits, *Units)) - continue; - RegUnits.push_back(*Units); - } + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u)); } } @@ -378,24 +482,24 @@ class RegisterOperandsCollector { void RegisterOperands::collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, - bool IgnoreDead) { - RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead); + bool TrackLaneMasks, bool IgnoreDead) { + RegisterOperandsCollector Collector(*this, TRI, MRI, TrackLaneMasks, + IgnoreDead); Collector.collectInstr(MI); } void RegisterOperands::detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS) { SlotIndex SlotIdx = LIS.getInstructionIndex(&MI); - for (SmallVectorImpl::iterator RI = Defs.begin(); - RI != Defs.end(); /*empty*/) { - unsigned Reg = *RI; + for (auto RI = Defs.begin(); RI != Defs.end(); /*empty*/) { + unsigned Reg = RI->RegUnit; const LiveRange *LR = getLiveRange(LIS, Reg); if (LR != nullptr) { LiveQueryResult LRQ = LR->Query(SlotIdx); if (LRQ.isDeadDef()) { // LiveIntervals knows this is a dead even though it's MachineOperand is // not flagged as such. - DeadDefs.push_back(Reg); + DeadDefs.push_back(*RI); RI = Defs.erase(RI); continue; } @@ -404,6 +508,38 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI, } } +void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, + SlotIndex Pos) { + for (auto I = Defs.begin(); I != Defs.end(); ) { + LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit, + Pos.getDeadSlot()); +#if 0 + unsigned DeadDef = I->LaneMask & ~LiveAfter; + if (DeadDef != 0) + addRegLanes(DeadDefs, RegisterMaskPair(I->RegUnit, DeadDef)); +#endif + unsigned LaneMask = I->LaneMask & LiveAfter; + if (LaneMask == 0) + I = Defs.erase(I); + else { + I->LaneMask = LaneMask; + ++I; + } + } + for (auto I = Uses.begin(); I != Uses.end(); ) { + LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit, + Pos.getBaseIndex()); + unsigned LaneMask = I->LaneMask & LiveBefore; + if (LaneMask == 0) { + I = Uses.erase(I); + } else { + I->LaneMask = LaneMask; + ++I; + } + } +} + /// Initialize an array of N PressureDiffs. void PressureDiffs::init(unsigned N) { Size = N; @@ -421,11 +557,11 @@ void PressureDiffs::addInstruction(unsigned Idx, const MachineRegisterInfo &MRI) { PressureDiff &PDiff = (*this)[Idx]; assert(!PDiff.begin()->isValid() && "stale PDiff"); - for (unsigned Reg : RegOpers.Defs) - PDiff.addPressureChange(Reg, true, &MRI); + for (const RegisterMaskPair &P : RegOpers.Defs) + PDiff.addPressureChange(P.RegUnit, true, &MRI); - for (unsigned Reg : RegOpers.Uses) - PDiff.addPressureChange(Reg, false, &MRI); + for (const RegisterMaskPair &P : RegOpers.Uses) + PDiff.addPressureChange(P.RegUnit, false, &MRI); } /// Add a change in pressure to the pressure diff of a given instruction. @@ -465,33 +601,59 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, } /// Force liveness of registers. -void RegPressureTracker::addLiveRegs(ArrayRef Regs) { - for (unsigned Reg : Regs) { - if (LiveRegs.insert(Reg)) - increaseRegPressure(Reg); +void RegPressureTracker::addLiveRegs(ArrayRef Regs) { + for (const RegisterMaskPair &P : Regs) { + unsigned PrevMask = LiveRegs.insert(P); + unsigned NewMask = PrevMask | P.LaneMask; + increaseRegPressure(P.RegUnit, PrevMask, NewMask); } } -/// Add Reg to the live in set and increase max pressure. -void RegPressureTracker::discoverLiveIn(unsigned Reg) { - assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice"); - if (containsReg(P.LiveInRegs, Reg)) +void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair, + SmallVectorImpl &LiveInOrOut) { + if (Pair.LaneMask == 0) return; - // At live in discovery, unconditionally increase the high water mark. - P.LiveInRegs.push_back(Reg); - increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); + unsigned RegUnit = Pair.RegUnit; + auto I = std::find_if(LiveInOrOut.begin(), LiveInOrOut.end(), + [RegUnit](const RegisterMaskPair &Other) { + return Other.RegUnit == RegUnit; + }); + LaneBitmask PrevMask; + LaneBitmask NewMask; + if (I == LiveInOrOut.end()) { + PrevMask = 0; + NewMask = Pair.LaneMask; + LiveInOrOut.push_back(Pair); + } else { + PrevMask = I->LaneMask; + NewMask = PrevMask | Pair.LaneMask; + I->LaneMask = NewMask; + } + increaseSetPressure(P.MaxSetPressure, *MRI, RegUnit, PrevMask, NewMask); } -/// Add Reg to the live out set and increase max pressure. -void RegPressureTracker::discoverLiveOut(unsigned Reg) { - assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice"); - if (containsReg(P.LiveOutRegs, Reg)) - return; +void RegPressureTracker::discoverLiveIn(RegisterMaskPair Pair) { + discoverLiveInOrOut(Pair, P.LiveInRegs); +} + +void RegPressureTracker::discoverLiveOut(RegisterMaskPair Pair) { + discoverLiveInOrOut(Pair, P.LiveOutRegs); +} - // At live out discovery, unconditionally increase the high water mark. - P.LiveOutRegs.push_back(Reg); - increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); +void RegPressureTracker::bumpDeadDefs(ArrayRef DeadDefs) { + for (const RegisterMaskPair &P : DeadDefs) { + unsigned Reg = P.RegUnit; + LaneBitmask LiveMask = LiveRegs.contains(Reg); + LaneBitmask BumpedMask = LiveMask | P.LaneMask; + increaseRegPressure(Reg, LiveMask, BumpedMask); + } + for (const RegisterMaskPair &P : DeadDefs) { + unsigned Reg = P.RegUnit; + LaneBitmask LiveMask = LiveRegs.contains(Reg); + LaneBitmask BumpedMask = LiveMask | P.LaneMask; + decreaseRegPressure(Reg, BumpedMask, LiveMask); + } } /// Recede across the previous instruction. If LiveUses is provided, record any @@ -500,20 +662,29 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) { /// difference pointer is provided record the changes is pressure caused by this /// instruction independent of liveness. void RegPressureTracker::recede(const RegisterOperands &RegOpers, - SmallVectorImpl *LiveUses) { + SmallVectorImpl *LiveUses) { assert(!CurrPos->isDebugValue()); // Boost pressure for all dead defs together. - increaseRegPressure(RegOpers.DeadDefs); - decreaseRegPressure(RegOpers.DeadDefs); + bumpDeadDefs(RegOpers.DeadDefs); // Kill liveness at live defs. // TODO: consider earlyclobbers? - for (unsigned Reg : RegOpers.Defs) { - if (LiveRegs.erase(Reg)) - decreaseRegPressure(Reg); - else - discoverLiveOut(Reg); + for (const RegisterMaskPair &Def : RegOpers.Defs) { + unsigned Reg = Def.RegUnit; + + LaneBitmask PreviousMask = LiveRegs.erase(Def); + LaneBitmask NewMask = PreviousMask & ~Def.LaneMask; + + LaneBitmask LiveOut = Def.LaneMask & ~PreviousMask; + if (LiveOut != 0) { + discoverLiveOut(RegisterMaskPair(Reg, LiveOut)); + // Retroactively model effects on pressure of the live out lanes. + increaseSetPressure(CurrSetPressure, *MRI, Reg, 0, LiveOut); + PreviousMask = LiveOut; + } + + decreaseRegPressure(Reg, PreviousMask, NewMask); } SlotIndex SlotIdx; @@ -521,27 +692,34 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); // Generate liveness for uses. - for (unsigned Reg : RegOpers.Uses) { - if (!LiveRegs.contains(Reg)) { - // Adjust liveouts if LiveIntervals are available. - if (RequireIntervals) { - const LiveRange *LR = getLiveRange(*LIS, Reg); - if (LR) { - LiveQueryResult LRQ = LR->Query(SlotIdx); - if (!LRQ.isKill() && !LRQ.valueDefined()) - discoverLiveOut(Reg); - } + for (const RegisterMaskPair &Use : RegOpers.Uses) { + unsigned Reg = Use.RegUnit; + assert(Use.LaneMask != 0); + LaneBitmask PreviousMask = LiveRegs.insert(Use); + LaneBitmask NewMask = PreviousMask | Use.LaneMask; + if (NewMask == PreviousMask) + continue; + + // Did the register just become live? + if (PreviousMask == 0) { + if (LiveUses != nullptr) { + unsigned NewLanes = NewMask & ~PreviousMask; + addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewLanes)); } - increaseRegPressure(Reg); - LiveRegs.insert(Reg); - if (LiveUses && !containsReg(*LiveUses, Reg)) - LiveUses->push_back(Reg); + + // Discover live outs if this may be the first occurance of this register. + LaneBitmask LiveOut = getLiveThroughAt(Reg, SlotIdx); + discoverLiveOut(RegisterMaskPair(Reg, LiveOut)); } + + increaseRegPressure(Reg, PreviousMask, NewMask); } if (TrackUntiedDefs) { - for (unsigned Reg : RegOpers.Defs) { - if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg)) - UntiedDefs.insert(Reg); + for (const RegisterMaskPair &Def : RegOpers.Defs) { + unsigned RegUnit = Def.RegUnit; + if (TargetRegisterInfo::isVirtualRegister(RegUnit) && + (LiveRegs.contains(RegUnit) & Def.LaneMask) == 0) + UntiedDefs.insert(RegUnit); } } } @@ -569,14 +747,18 @@ void RegPressureTracker::recedeSkipDebugValues() { static_cast(P).openTop(SlotIdx); } -void RegPressureTracker::recede(SmallVectorImpl *LiveUses) { +void RegPressureTracker::recede(SmallVectorImpl *LiveUses) { recedeSkipDebugValues(); const MachineInstr &MI = *CurrPos; RegisterOperands RegOpers; - RegOpers.collect(MI, *TRI, *MRI); - if (RequireIntervals) + RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false); + if (TrackLaneMasks) { + SlotIndex SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); + } else if (RequireIntervals) { RegOpers.detectDeadDefs(MI, *LIS); + } recede(RegOpers, LiveUses); } @@ -602,38 +784,36 @@ void RegPressureTracker::advance() { } RegisterOperands RegOpers; - RegOpers.collect(*CurrPos, *TRI, *MRI); - - for (unsigned Reg : RegOpers.Uses) { - // Discover live-ins. - bool isLive = LiveRegs.contains(Reg); - if (!isLive) - discoverLiveIn(Reg); + RegOpers.collect(*CurrPos, *TRI, *MRI, TrackLaneMasks, false); + if (TrackLaneMasks) + RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); + + for (const RegisterMaskPair &Use : RegOpers.Uses) { + unsigned Reg = Use.RegUnit; + LaneBitmask LiveMask = LiveRegs.contains(Reg); + LaneBitmask LiveIn = Use.LaneMask & ~LiveMask; + if (LiveIn != 0) { + discoverLiveIn(RegisterMaskPair(Reg, LiveIn)); + increaseRegPressure(Reg, LiveMask, LiveMask | LiveIn); + LiveRegs.insert(RegisterMaskPair(Reg, LiveIn)); + } // Kill liveness at last uses. - bool lastUse = false; - if (RequireIntervals) { - const LiveRange *LR = getLiveRange(*LIS, Reg); - lastUse = LR && LR->Query(SlotIdx).isKill(); - } else { - // Allocatable physregs are always single-use before register rewriting. - lastUse = !TargetRegisterInfo::isVirtualRegister(Reg); + LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx); + if (LastUseMask != 0) { + LiveRegs.erase(RegisterMaskPair(Reg, LastUseMask)); + decreaseRegPressure(Reg, LiveMask, LiveMask & ~LastUseMask); } - if (lastUse && isLive) { - LiveRegs.erase(Reg); - decreaseRegPressure(Reg); - } else if (!lastUse && !isLive) - increaseRegPressure(Reg); } // Generate liveness for defs. - for (unsigned Reg : RegOpers.Defs) { - if (LiveRegs.insert(Reg)) - increaseRegPressure(Reg); + for (const RegisterMaskPair &Def : RegOpers.Defs) { + LaneBitmask PreviousMask = LiveRegs.insert(Def); + LaneBitmask NewMask = PreviousMask | Def.LaneMask; + increaseRegPressure(Def.RegUnit, PreviousMask, NewMask); } // Boost pressure for all dead defs together. - increaseRegPressure(RegOpers.DeadDefs); - decreaseRegPressure(RegOpers.DeadDefs); + bumpDeadDefs(RegOpers.DeadDefs); // Find the next instruction. do @@ -728,22 +908,38 @@ static void computeMaxPressureDelta(ArrayRef OldMaxPressureVec, void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); + // Account for register pressure similar to RegPressureTracker::recede(). RegisterOperands RegOpers; - RegOpers.collect(*MI, *TRI, *MRI, /*IgnoreDead=*/true); + RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/true); assert(RegOpers.DeadDefs.size() == 0); - if (RequireIntervals) + if (TrackLaneMasks) + RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); + else if (RequireIntervals) RegOpers.detectDeadDefs(*MI, *LIS); + // Boost max pressure for all dead defs together. + // Since CurrSetPressure and MaxSetPressure + bumpDeadDefs(RegOpers.DeadDefs); + // Kill liveness at live defs. - for (unsigned Reg : RegOpers.Defs) { - if (!containsReg(RegOpers.Uses, Reg)) - decreaseRegPressure(Reg); + for (const RegisterMaskPair &P : RegOpers.Defs) { + unsigned Reg = P.RegUnit; + LaneBitmask LiveLanes = LiveRegs.contains(Reg); + LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg); + LaneBitmask DefLanes = P.LaneMask; + LaneBitmask LiveAfter = (LiveLanes & ~DefLanes) | UseLanes; + decreaseRegPressure(Reg, LiveLanes, LiveAfter); } // Generate liveness for uses. - for (unsigned Reg : RegOpers.Uses) { - if (!LiveRegs.contains(Reg)) - increaseRegPressure(Reg); + for (const RegisterMaskPair &P : RegOpers.Uses) { + unsigned Reg = P.RegUnit; + LaneBitmask LiveLanes = LiveRegs.contains(Reg); + LaneBitmask LiveAfter = LiveLanes | P.LaneMask; + increaseRegPressure(Reg, LiveLanes, LiveAfter); } } @@ -888,15 +1084,64 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, } /// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). -static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx, - SlotIndex NextUseIdx, const MachineRegisterInfo &MRI, - const LiveIntervals *LIS) { - for (const MachineInstr &MI : MRI.use_nodbg_instructions(Reg)) { - SlotIndex InstSlot = LIS->getInstructionIndex(&MI).getRegSlot(); - if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) - return true; +/// The query starts with a lane bitmask which gets lanes/bits removed for every +/// use we find. +static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask, + SlotIndex PriorUseIdx, SlotIndex NextUseIdx, + const MachineRegisterInfo &MRI, + const LiveIntervals *LIS) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) { + if (MO.isUndef()) + continue; + const MachineInstr *MI = MO.getParent(); + SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot(); + if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) { + unsigned SubRegIdx = MO.getSubReg(); + LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx); + LastUseMask &= ~UseMask; + if (LastUseMask == 0) + return 0; + } } - return false; + return LastUseMask; +} + +LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit, + SlotIndex Pos) const { + if (!RequireIntervals) + return 0; + + return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, + [](const LiveRange &LR, SlotIndex Pos) { + return LR.liveAt(Pos); + }); +} + +LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit, + SlotIndex Pos) const { + if (!RequireIntervals) + return 0; + + return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, + Pos.getBaseIndex(), + [](const LiveRange &LR, SlotIndex Pos) { + const LiveRange::Segment *S = LR.getSegmentContaining(Pos); + return S != nullptr && S->end == Pos.getRegSlot(); + }); +} + +LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit, + SlotIndex Pos) const { + if (!RequireIntervals) + return 0; + + return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, + [](const LiveRange &LR, SlotIndex Pos) { + const LiveRange::Segment *S = LR.getSegmentContaining(Pos); + return S != nullptr && S->start < Pos.getRegSlot(true) && + S->end != Pos.getDeadSlot(); + }); } /// Record the downward impact of a single instruction on current register @@ -908,39 +1153,49 @@ static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx, void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); - // Account for register pressure similar to RegPressureTracker::recede(). - RegisterOperands RegOpers; - RegOpers.collect(*MI, *TRI, *MRI); - - // Kill liveness at last uses. Assume allocatable physregs are single-use - // rather than checking LiveIntervals. SlotIndex SlotIdx; if (RequireIntervals) SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); - for (unsigned Reg : RegOpers.Uses) { + // Account for register pressure similar to RegPressureTracker::recede(). + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, false); + if (TrackLaneMasks) + RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); + + for (const RegisterMaskPair &Use : RegOpers.Uses) { + unsigned Reg = Use.RegUnit; + LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx); + if (LastUseMask == 0) + continue; if (RequireIntervals) { + // The LastUseMask is queried from the liveness information of instruction + // which may be further down the schedule. Some lanes may actually not be + // last uses for the current position. // FIXME: allow the caller to pass in the list of vreg uses that remain // to be bottom-scheduled to avoid searching uses at each query. SlotIndex CurrIdx = getCurrSlot(); - const LiveRange *LR = getLiveRange(*LIS, Reg); - if (LR) { - LiveQueryResult LRQ = LR->Query(SlotIdx); - if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, *MRI, LIS)) - decreaseRegPressure(Reg); - } - } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // Allocatable physregs are always single-use before register rewriting. - decreaseRegPressure(Reg); + LastUseMask + = findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, LIS); + if (LastUseMask == 0) + continue; } + + LaneBitmask LiveMask = LiveRegs.contains(Reg); + LaneBitmask NewMask = LiveMask & ~LastUseMask; + decreaseRegPressure(Reg, LiveMask, NewMask); } // Generate liveness for defs. - increaseRegPressure(RegOpers.Defs); + for (const RegisterMaskPair &Def : RegOpers.Defs) { + unsigned Reg = Def.RegUnit; + LaneBitmask LiveMask = LiveRegs.contains(Reg); + LaneBitmask NewMask = LiveMask | Def.LaneMask; + increaseRegPressure(Reg, LiveMask, NewMask); + } // Boost pressure for all dead defs together. - increaseRegPressure(RegOpers.DeadDefs); - decreaseRegPressure(RegOpers.DeadDefs); + bumpDeadDefs(RegOpers.DeadDefs); } /// Consider the pressure increase caused by traversing this instruction diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 11b246a8de2..130b8a66239 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -899,7 +899,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, collectVRegUses(SU); RegisterOperands RegOpers; - RegOpers.collect(*MI, *TRI, MRI); + RegOpers.collect(*MI, *TRI, MRI, TrackLaneMasks, false); if (PDiffs != nullptr) PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI); diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index 1cfa9843002..f516c49a874 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -327,9 +327,9 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock, BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); // Do not Track Physical Registers, because it messes up. - for (unsigned Reg : RPTracker.getPressure().LiveInRegs) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) - LiveInRegs.insert(Reg); + for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) { + if (TargetRegisterInfo::isVirtualRegister(RegMaskPair.RegUnit)) + LiveInRegs.insert(RegMaskPair.RegUnit); } LiveOutRegs.clear(); // There is several possibilities to distinguish: @@ -354,7 +354,8 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock, // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7 // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7 // The use of findDefBetween removes the case 4. - for (unsigned Reg : RPTracker.getPressure().LiveOutRegs) { + for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) { + unsigned Reg = RegMaskPair.RegUnit; if (TargetRegisterInfo::isVirtualRegister(Reg) && isDefBetween(Reg, LIS->getInstructionIndex(BeginBlock).getRegSlot(), LIS->getInstructionIndex(EndBlock).getRegSlot(), diff --git a/lib/Target/AMDGPU/SIMachineScheduler.h b/lib/Target/AMDGPU/SIMachineScheduler.h index b270136811c..5e7d7ed37b0 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.h +++ b/lib/Target/AMDGPU/SIMachineScheduler.h @@ -441,7 +441,7 @@ class SIScheduleDAGMI : public ScheduleDAGMILive { // To init Block's RPTracker. void initRPTracker(RegPressureTracker &RPTracker) { - RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); + RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false); } MachineBasicBlock *getBB() { return BB; } @@ -460,8 +460,10 @@ class SIScheduleDAGMI : public ScheduleDAGMILive { unsigned &VgprUsage, unsigned &SgprUsage); std::set getInRegs() { - std::set InRegs (RPTracker.getPressure().LiveInRegs.begin(), - RPTracker.getPressure().LiveInRegs.end()); + std::set InRegs; + for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) { + InRegs.insert(RegMaskPair.RegUnit); + } return InRegs; }; From 9ea1e7ff4f89fe4b39debeaa6fd09246d725444f Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Wed, 20 Jan 2016 00:23:32 +0000 Subject: [PATCH 0036/1132] MachineScheduler: Allow independent scheduling of sub register defs Note that this is disabled by default and still requires a patch to handleMove() which is not upstreamed yet. If the TrackLaneMasks policy/strategy is enabled the MachineScheduler will build a schedule graph where definitions of independent subregisters are no longer serialised. Implementation comments: - Without lane mask tracking a sub register def also counts as a use (except for the first one with the read-undef flag set), with lane mask tracking enabled this is no longer the case. - Pressure Diffs where previously maintained per definition of a vreg with the help of the SSA information contained in the LiveIntervals. With lanemask tracking enabled we cannot do this anymore and instead change the pressure diffs for all uses of the vreg as it becomes live/dead. For this changed style to work correctly we ignore uses of instructions that define the same register again: They won't affect register pressure. - With lanemask tracking we remove all read-undef flags from sub register defs when building the graph and re-add them later when all vreg lanes have become dead. Differential Revision: http://reviews.llvm.org/D14969 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258259 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit d267d377b8438b1a43eab76576a0796dfc61ada2) --- include/llvm/CodeGen/MachineScheduler.h | 26 +++- include/llvm/CodeGen/RegisterPressure.h | 11 +- include/llvm/CodeGen/ScheduleDAGInstrs.h | 1 + lib/CodeGen/MachineScheduler.cpp | 147 ++++++++++++++++------- lib/CodeGen/RegisterPressure.cpp | 83 +++++++++++-- lib/CodeGen/ScheduleDAGInstrs.cpp | 19 +++ 6 files changed, 226 insertions(+), 61 deletions(-) diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index ce9a327c8e5..c091fa9ede8 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -150,6 +150,9 @@ class ScheduleDAGMI; struct MachineSchedPolicy { // Allow the scheduler to disable register pressure tracking. bool ShouldTrackPressure; + /// Track LaneMasks to allow reordering of independent subregister writes + /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks() + bool ShouldTrackLaneMasks; // Allow the scheduler to force top-down or bottom-up scheduling. If neither // is true, the scheduler runs in both directions and converges. @@ -160,8 +163,8 @@ struct MachineSchedPolicy { // first. bool DisableLatencyHeuristic; - MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false), - OnlyBottomUp(false), DisableLatencyHeuristic(false) {} + MachineSchedPolicy(): ShouldTrackPressure(false), ShouldTrackLaneMasks(false), + OnlyTopDown(false), OnlyBottomUp(false), DisableLatencyHeuristic(false) {} }; /// MachineSchedStrategy - Interface to the scheduling algorithm used by @@ -185,6 +188,11 @@ class MachineSchedStrategy { /// initializing this strategy. Called after initPolicy. virtual bool shouldTrackPressure() const { return true; } + /// Returns true if lanemasks should be tracked. LaneMask tracking is + /// necessary to reorder independent subregister defs for the same vreg. + /// This has to be enabled in combination with shouldTrackPressure(). + virtual bool shouldTrackLaneMasks() const { return false; } + /// Initialize the strategy after building the DAG for a new region. virtual void initialize(ScheduleDAGMI *DAG) = 0; @@ -371,6 +379,7 @@ class ScheduleDAGMILive : public ScheduleDAGMI { /// Register pressure in this region computed by initRegPressure. bool ShouldTrackPressure; + bool ShouldTrackLaneMasks; IntervalPressure RegPressure; RegPressureTracker RPTracker; @@ -387,13 +396,18 @@ class ScheduleDAGMILive : public ScheduleDAGMI { IntervalPressure BotPressure; RegPressureTracker BotRPTracker; + /// True if disconnected subregister components are already renamed. + /// The renaming is only done on demand if lane masks are tracked. + bool DisconnectedComponentsRenamed; + public: ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr S) : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), RegClassInfo(C->RegClassInfo), DFSResult(nullptr), - ShouldTrackPressure(false), RPTracker(RegPressure), - TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} + ShouldTrackPressure(false), ShouldTrackLaneMasks(false), + RPTracker(RegPressure), TopRPTracker(TopPressure), + BotRPTracker(BotPressure), DisconnectedComponentsRenamed(false) {} ~ScheduleDAGMILive() override; @@ -874,6 +888,10 @@ class GenericScheduler : public GenericSchedulerBase { return RegionPolicy.ShouldTrackPressure; } + bool shouldTrackLaneMasks() const override { + return RegionPolicy.ShouldTrackLaneMasks; + } + void initialize(ScheduleDAGMI *dag) override; SUnit *pickNode(bool &IsTopNode) override; diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index 9fdb73662cf..aaddac40ca7 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -173,9 +173,11 @@ class RegisterOperands { /// Use liveness information to find out which uses/defs are partially /// undefined/dead and adjust the RegisterMaskPairs accordingly. + /// If \p AddFlagsMI is given then missing read-undef and dead flags will be + /// added to the instruction. void adjustLaneLiveness(const LiveIntervals &LIS, - const MachineRegisterInfo &MRI, SlotIndex Pos); - + const MachineRegisterInfo &MRI, SlotIndex Pos, + MachineInstr *AddFlagsMI = nullptr); }; /// Array of PressureDiffs. @@ -420,6 +422,11 @@ class RegPressureTracker { /// Advance across the current instruction. void advance(); + /// Advance across the current instruction. + /// This is a "low-level" variant of advance() which takes precomputed + /// RegisterOperands of the instruction. + void advance(const RegisterOperands &RegOpers); + /// Finalize the region boundaries and recored live ins and live outs. void closeRegion(); diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index c574df09491..70537d1895f 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -214,6 +214,7 @@ namespace llvm { void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker = nullptr, PressureDiffs *PDiffs = nullptr, + LiveIntervals *LIS = nullptr, bool TrackLaneMasks = false); /// addSchedBarrierDeps - Add dependencies from instructions in the current diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index fa8e5ba190b..c266b746e60 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -869,13 +869,27 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, SUPressureDiffs.clear(); ShouldTrackPressure = SchedImpl->shouldTrackPressure(); + ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks(); + + if (ShouldTrackLaneMasks) { + if (!ShouldTrackPressure) + report_fatal_error("ShouldTrackLaneMasks requires ShouldTrackPressure"); + // Dead subregister defs have no users and therefore no dependencies, + // moving them around may cause liveintervals to degrade into multiple + // components. Change independent components to have their own vreg to avoid + // this. + if (!DisconnectedComponentsRenamed) + LIS->renameDisconnectedComponents(); + } } // Setup the register pressure trackers for the top scheduled top and bottom // scheduled regions. void ScheduleDAGMILive::initRegPressure() { - TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false); - BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, false, false); + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, + ShouldTrackLaneMasks, false); + BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, + ShouldTrackLaneMasks, false); // Close the RPTracker to finalize live ins. RPTracker.closeRegion(); @@ -972,46 +986,71 @@ updateScheduledPressure(const SUnit *SU, void ScheduleDAGMILive::updatePressureDiffs( ArrayRef LiveUses) { for (const RegisterMaskPair &P : LiveUses) { - /// FIXME: Currently assuming single-use physregs. unsigned Reg = P.RegUnit; - assert(P.LaneMask != 0); - DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + /// FIXME: Currently assuming single-use physregs. if (!TRI->isVirtualRegister(Reg)) continue; - // This may be called before CurrentBottom has been initialized. However, - // BotRPTracker must have a valid position. We want the value live into the - // instruction or live out of the block, so ask for the previous - // instruction's live-out. - const LiveInterval &LI = LIS->getInterval(Reg); - VNInfo *VNI; - MachineBasicBlock::const_iterator I = - nextIfDebug(BotRPTracker.getPos(), BB->end()); - if (I == BB->end()) - VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); - else { - LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); - VNI = LRQ.valueIn(); - } - // RegisterPressureTracker guarantees that readsReg is true for LiveUses. - assert(VNI && "No live value at use."); - for (const VReg2SUnit &V2SU - : make_range(VRegUses.find(Reg), VRegUses.end())) { - SUnit *SU = V2SU.SU; - // If this use comes before the reaching def, it cannot be a last use, so - // descrease its pressure change. - if (!SU->isScheduled && SU != &ExitSU) { - LiveQueryResult LRQ - = LI.Query(LIS->getInstructionIndex(SU->getInstr())); - if (LRQ.valueIn() == VNI) { - PressureDiff &PDiff = getPressureDiff(SU); - PDiff.addPressureChange(Reg, true, &MRI); - DEBUG( - dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " - << *SU->getInstr(); - dbgs() << " to "; - PDiff.dump(*TRI); - ); + if (ShouldTrackLaneMasks) { + // If the register has just become live then other uses won't change + // this fact anymore => decrement pressure. + // If the register has just become dead then other uses make it come + // back to life => increment pressure. + bool Decrement = P.LaneMask != 0; + + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit &SU = *V2SU.SU; + if (SU.isScheduled || &SU == &ExitSU) + continue; + + PressureDiff &PDiff = getPressureDiff(&SU); + PDiff.addPressureChange(Reg, Decrement, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " + << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) + << ' ' << *SU.getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } + } else { + assert(P.LaneMask != 0); + DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + // This may be called before CurrentBottom has been initialized. However, + // BotRPTracker must have a valid position. We want the value live into the + // instruction or live out of the block, so ask for the previous + // instruction's live-out. + const LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI; + MachineBasicBlock::const_iterator I = + nextIfDebug(BotRPTracker.getPos(), BB->end()); + if (I == BB->end()) + VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + else { + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); + VNI = LRQ.valueIn(); + } + // RegisterPressureTracker guarantees that readsReg is true for LiveUses. + assert(VNI && "No live value at use."); + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; + // If this use comes before the reaching def, it cannot be a last use, + // so decrease its pressure change. + if (!SU->isScheduled && SU != &ExitSU) { + LiveQueryResult LRQ + = LI.Query(LIS->getInstructionIndex(SU->getInstr())); + if (LRQ.valueIn() == VNI) { + PressureDiff &PDiff = getPressureDiff(SU); + PDiff.addPressureChange(Reg, true, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } } } } @@ -1113,14 +1152,14 @@ void ScheduleDAGMILive::buildDAGWithRegPressure() { // Initialize the register pressure tracker used by buildSchedGraph. RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, - false, /*TrackUntiedDefs=*/true); + ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true); // Account for liveness generate by the region boundary. if (LiveRegionEnd != RegionEnd) RPTracker.recede(); // Build the DAG, and compute current register pressure. - buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); + buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks); // Initialize top/bottom trackers after computing region pressure. initRegPressure(); @@ -1239,7 +1278,18 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { if (ShouldTrackPressure) { // Update top scheduled pressure. - TopRPTracker.advance(); + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); + if (ShouldTrackLaneMasks) { + // Adjust liveness and add missing dead+read-undef flags. + SlotIndex SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); + RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); + } else { + // Adjust for missing dead-def flags. + RegOpers.detectDeadDefs(*MI, *LIS); + } + + TopRPTracker.advance(RegOpers); assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); DEBUG( dbgs() << "Top Pressure:\n"; @@ -1264,9 +1314,20 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { CurrentBottom = MI; } if (ShouldTrackPressure) { - // Update bottom scheduled pressure. + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); + if (ShouldTrackLaneMasks) { + // Adjust liveness and add missing dead+read-undef flags. + SlotIndex SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); + RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); + } else { + // Adjust for missing dead-def flags. + RegOpers.detectDeadDefs(*MI, *LIS); + } + + BotRPTracker.recedeSkipDebugValues(); SmallVector LiveUses; - BotRPTracker.recede(&LiveUses); + BotRPTracker.recede(RegOpers, &LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); DEBUG( dbgs() << "Bottom Pressure:\n"; diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 78a766ed13b..80979267e2b 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -352,6 +352,19 @@ static void addRegLanes(SmallVectorImpl &RegUnits, } } +static void setRegZero(SmallVectorImpl &RegUnits, + unsigned RegUnit) { + auto I = std::find_if(RegUnits.begin(), RegUnits.end(), + [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); + if (I == RegUnits.end()) { + RegUnits.push_back(RegisterMaskPair(RegUnit, 0)); + } else { + I->LaneMask = 0; + } +} + static void removeRegLanes(SmallVectorImpl &RegUnits, RegisterMaskPair Pair) { unsigned RegUnit = Pair.RegUnit; @@ -510,7 +523,8 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI, void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, - SlotIndex Pos) { + SlotIndex Pos, + MachineInstr *AddFlagsMI) { for (auto I = Defs.begin(); I != Defs.end(); ) { LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit, Pos.getDeadSlot()); @@ -519,10 +533,20 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, if (DeadDef != 0) addRegLanes(DeadDefs, RegisterMaskPair(I->RegUnit, DeadDef)); #endif + // If the the def is all that is live after the instruction, then in case + // of a subregister def we need a read-undef flag. + unsigned RegUnit = I->RegUnit; + if (TargetRegisterInfo::isVirtualRegister(RegUnit) && + AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask) == 0) + AddFlagsMI->setRegisterDefReadUndef(RegUnit); + unsigned LaneMask = I->LaneMask & LiveAfter; - if (LaneMask == 0) + if (LaneMask == 0) { I = Defs.erase(I); - else { + // Make sure the operand is properly marked as Dead. + if (AddFlagsMI != nullptr) + AddFlagsMI->addRegisterDead(RegUnit, MRI.getTargetRegisterInfo()); + } else { I->LaneMask = LaneMask; ++I; } @@ -538,6 +562,15 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, ++I; } } + if (AddFlagsMI != nullptr) { + for (const RegisterMaskPair &P : DeadDefs) { + unsigned RegUnit = P.RegUnit; + LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit, + Pos.getDeadSlot()); + if (LiveAfter == 0) + AddFlagsMI->setRegisterDefReadUndef(RegUnit); + } + } } /// Initialize an array of N PressureDiffs. @@ -684,6 +717,13 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, PreviousMask = LiveOut; } + if (NewMask == 0) { + // Add a 0 entry to LiveUses as a marker that the complete vreg has become + // dead. + if (TrackLaneMasks && LiveUses != nullptr) + setRegZero(*LiveUses, Reg); + } + decreaseRegPressure(Reg, PreviousMask, NewMask); } @@ -703,8 +743,22 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, // Did the register just become live? if (PreviousMask == 0) { if (LiveUses != nullptr) { - unsigned NewLanes = NewMask & ~PreviousMask; - addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewLanes)); + if (!TrackLaneMasks) { + addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); + } else { + auto I = std::find_if(LiveUses->begin(), LiveUses->end(), + [Reg](const RegisterMaskPair Other) { + return Other.RegUnit == Reg; + }); + bool IsRedef = I != LiveUses->end(); + if (IsRedef) { + // ignore re-defs here... + assert(I->LaneMask == 0); + removeRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); + } else { + addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); + } + } } // Discover live outs if this may be the first occurance of this register. @@ -764,9 +818,8 @@ void RegPressureTracker::recede(SmallVectorImpl *LiveUses) { } /// Advance across the current instruction. -void RegPressureTracker::advance() { +void RegPressureTracker::advance(const RegisterOperands &RegOpers) { assert(!TrackUntiedDefs && "unsupported mode"); - assert(CurrPos != MBB->end()); if (!isTopClosed()) closeTop(); @@ -783,11 +836,6 @@ void RegPressureTracker::advance() { static_cast(P).openBottom(CurrPos); } - RegisterOperands RegOpers; - RegOpers.collect(*CurrPos, *TRI, *MRI, TrackLaneMasks, false); - if (TrackLaneMasks) - RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); - for (const RegisterMaskPair &Use : RegOpers.Uses) { unsigned Reg = Use.RegUnit; LaneBitmask LiveMask = LiveRegs.contains(Reg); @@ -821,6 +869,17 @@ void RegPressureTracker::advance() { while (CurrPos != MBB->end() && CurrPos->isDebugValue()); } +void RegPressureTracker::advance() { + const MachineInstr &MI = *CurrPos; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false); + if (TrackLaneMasks) { + SlotIndex SlotIdx = getCurrSlot(); + RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); + } + advance(RegOpers); +} + /// Find the max change in excess pressure across all sets. static void computeExcessPressureDelta(ArrayRef OldPressureVec, ArrayRef NewPressureVec, diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 130b8a66239..e0aeb570c2f 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -808,6 +809,19 @@ void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + // Ignore re-defs. + if (TrackLaneMasks) { + bool FoundDef = false; + for (const MachineOperand &MO2 : MI->operands()) { + if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) { + FoundDef = true; + break; + } + } + if (FoundDef) + continue; + } + // Record this local VReg use. VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); for (; UI != VRegUses.end(); ++UI) { @@ -825,6 +839,7 @@ void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker, PressureDiffs *PDiffs, + LiveIntervals *LIS, bool TrackLaneMasks) { const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI @@ -900,6 +915,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RegisterOperands RegOpers; RegOpers.collect(*MI, *TRI, MRI, TrackLaneMasks, false); + if (TrackLaneMasks) { + SlotIndex SlotIdx = LIS->getInstructionIndex(MI); + RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx); + } if (PDiffs != nullptr) PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI); From aceb2eb49a9229f43ab099ba10ebb87134d7725d Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 20 Jan 2016 03:12:40 +0000 Subject: [PATCH 0037/1132] [Orc] Qualify captured variable to work around GCC ICE. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258278 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 6e2f199988e7e89fb572a339247e6651d41272b4) --- include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index affb16a1c26..b1d1867339e 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -126,7 +126,7 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { PFC->Finalizer(PFC->Handle, RTDyld, std::move(PFC->Objects), [&]() { updateSymbolTable(RTDyld); - Finalized = true; + this->Finalized = true; }); // Release resources. From ba3cf67aad1ee3f390442d635d94d454afb4e098 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 20 Jan 2016 04:32:05 +0000 Subject: [PATCH 0038/1132] [Orc] More qualifications of lambda-captured member variables to fix GCC ICEs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258286 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 61cac3cdc56912f0eee77267cd9dd45ea23ce123) --- include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index b1d1867339e..c77e648a4df 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -258,12 +258,12 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { LOSHandleLoad(); - NotifyLoaded(H, Objs, LoadedObjInfos); + this->NotifyLoaded(H, Objs, LoadedObjInfos); RTDyld.finalizeWithMemoryManagerLocking(); - if (NotifyFinalized) - NotifyFinalized(H); + if (this->NotifyFinalized) + this->NotifyFinalized(H); }; auto LOS = From 5f41954685e5df7c7d521d88c4900cba802017e7 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 20 Jan 2016 05:10:59 +0000 Subject: [PATCH 0039/1132] [Orc] 'this' qualify more lambda-captured members. More workaround attempts for GCC ICEs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258288 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 801561e2bba12f2aa0285feb1105e110df443761) --- include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index c77e648a4df..85dfa849edc 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -125,7 +125,7 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { PFC->Finalizer(PFC->Handle, RTDyld, std::move(PFC->Objects), [&]() { - updateSymbolTable(RTDyld); + this->updateSymbolTable(RTDyld); this->Finalized = true; }); @@ -139,8 +139,8 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { // The symbol may be materialized between the creation of this lambda // and its execution, so we need to double check. if (!this->Finalized) - finalize(); - return getSymbol(Name, false).getAddress(); + this->finalize(); + return this->getSymbol(Name, false).getAddress(); }; } @@ -254,7 +254,7 @@ class ObjectLinkingLayer : public ObjectLinkingLayerBase { LoadedObjInfoList LoadedObjInfos; for (auto &Obj : Objs) - LoadedObjInfos.push_back(RTDyld.loadObject(getObject(*Obj))); + LoadedObjInfos.push_back(RTDyld.loadObject(this->getObject(*Obj))); LOSHandleLoad(); From c7c9de9027ce2b87c9e9948011bbdf271f29f1fc Mon Sep 17 00:00:00 2001 From: Ivan Krasin Date: Wed, 20 Jan 2016 08:41:22 +0000 Subject: [PATCH 0040/1132] [Verifier] Fix performance regression for LTO builds Summary: Fix a significant performance regression by introducing GlobalValueVisited field and reusing the map. This is a follow up to r257823 that slowed down linking Chrome with LTO by 2.5x. If you revert this commit, please, also revert r257823. BUG=https://llvm.org/bugs/show_bug.cgi?id=26214 Reviewers: pcc, loladiro, joker.eph Subscribers: krasin1, joker.eph, loladiro, pcc Differential Revision: http://reviews.llvm.org/D16338 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258297 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit d54bc6a4158813685ec5cf7d2f1ee2579b8225d9) --- lib/IR/Verifier.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 8ca4032a274..efc8879980b 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -217,6 +217,12 @@ class Verifier : public InstVisitor, VerifierSupport { /// Cache of constants visited in search of ConstantExprs. SmallPtrSet ConstantExprVisited; + // Verify that this GlobalValue is only used in this module. + // This map is used to avoid visiting uses twice. We can arrive at a user + // twice, if they have multiple operands. In particular for very large + // constant expressions, we can arrive at a particular user many times. + SmallPtrSet GlobalValueVisited; + void checkAtomicMemAccessSize(const Module *M, Type *Ty, const Instruction *I); public: @@ -494,12 +500,7 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) { if (GV.isDeclarationForLinker()) Assert(!GV.hasComdat(), "Declaration may not be in a Comdat!", &GV); - // Verify that this GlobalValue is only used in this module. - // This map is used to avoid visiting uses twice. We can arrive at a user - // twice, if they have multiple operands. In particular for very large - // constant expressions, we can arrive at a particular user many times. - SmallPtrSet Visited; - forEachUser(&GV, Visited, [&](const Value *V) -> bool { + forEachUser(&GV, GlobalValueVisited, [&](const Value *V) -> bool { if (const Instruction *I = dyn_cast(V)) { if (!I->getParent() || !I->getParent()->getParent()) CheckFailed("Global is referenced by parentless instruction!", &GV, From 742d19de75625f36ecabb5e8d6a59073741240de Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Wed, 20 Jan 2016 09:03:42 +0000 Subject: [PATCH 0041/1132] [LTO] Fix error reporting when a file passed to libLTO is invalid or non-existent This addresses PR26060 where function lto_module_create() could return nullptr but lto_get_error_message() returned an empty string. The error() call after LTOModule::createFromFile() in llvm-lto is then removed because any error from this function should go through the diagnostic handler in llvm-lto which will exit the program. The error() call was added because this previously did not happen when the file was non-existent. This is fixed by the patch. (The situation that llvm-lto reports an error when the input file does not exist is tested by llvm/tools/llvm-lto/error.ll). Differential Revision: http://reviews.llvm.org/D16106 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258298 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 616e99df645c908916a49a3fd2dfbbaf5b18566c) --- lib/LTO/LTOModule.cpp | 12 +++++++++--- tools/llvm-lto/llvm-lto.cpp | 1 - tools/lto/lto.cpp | 2 -- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 409b9490233..4806f903bdf 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -105,8 +105,10 @@ LTOModule::createFromFile(LLVMContext &Context, const char *path, TargetOptions options) { ErrorOr> BufferOrErr = MemoryBuffer::getFile(path); - if (std::error_code EC = BufferOrErr.getError()) + if (std::error_code EC = BufferOrErr.getError()) { + Context.emitError(EC.message()); return EC; + } std::unique_ptr Buffer = std::move(BufferOrErr.get()); return makeLTOModule(Buffer->getMemBufferRef(), options, &Context); } @@ -123,8 +125,10 @@ LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, off_t offset, TargetOptions options) { ErrorOr> BufferOrErr = MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset); - if (std::error_code EC = BufferOrErr.getError()) + if (std::error_code EC = BufferOrErr.getError()) { + Context.emitError(EC.message()); return EC; + } std::unique_ptr Buffer = std::move(BufferOrErr.get()); return makeLTOModule(Buffer->getMemBufferRef(), options, &Context); } @@ -158,8 +162,10 @@ parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context, // Find the buffer. ErrorOr MBOrErr = IRObjectFile::findBitcodeInMemBuffer(Buffer); - if (std::error_code EC = MBOrErr.getError()) + if (std::error_code EC = MBOrErr.getError()) { + Context.emitError(EC.message()); return EC; + } if (!ShouldBeLazy) { // Parse the full file. diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp index 55c0f48aead..e83ac279f83 100644 --- a/tools/llvm-lto/llvm-lto.cpp +++ b/tools/llvm-lto/llvm-lto.cpp @@ -294,7 +294,6 @@ int main(int argc, char **argv) { CurrentActivity = "loading file '" + InputFilenames[i] + "'"; ErrorOr> ModuleOrErr = LTOModule::createFromFile(Context, InputFilenames[i].c_str(), Options); - error(ModuleOrErr, "error " + CurrentActivity); std::unique_ptr &Module = *ModuleOrErr; CurrentActivity = ""; diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp index d8f99c050a3..e7062b59d52 100644 --- a/tools/lto/lto.cpp +++ b/tools/lto/lto.cpp @@ -81,7 +81,6 @@ static void diagnosticHandler(const DiagnosticInfo &DI, void *Context) { DiagnosticPrinterRawOStream DP(Stream); DI.print(DP); } - sLastErrorString += '\n'; } // Initialize the configured targets if they have not been initialized. @@ -111,7 +110,6 @@ namespace { static void handleLibLTODiagnostic(lto_codegen_diagnostic_severity_t Severity, const char *Msg, void *) { sLastErrorString = Msg; - sLastErrorString += "\n"; } // This derived class owns the native object file. This helps implement the From 356643df0602692078823f6121d15ff9e0ec31d8 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Wed, 20 Jan 2016 12:54:31 +0000 Subject: [PATCH 0042/1132] [AArch64] Fix two bugs in the .inst directive The AArch64 .inst directive was implemented using EmitIntValue, which resulted in both $x and $d (code and data) mapping symbols being emitted at the same address. This fixes it to only emit the $x mapping symbol. EmitIntValue also emits the value in big-endian order when targeting big-endian systems, but instructions are always emitted in little-endian order for AArch64. Differential Revision: http://reviews.llvm.org/D16349 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258308 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit c7c3038cf39227b367c6092fc21ddf50f06d5ea3) --- .../AArch64/MCTargetDesc/AArch64ELFStreamer.cpp | 14 +++++++++++++- test/MC/AArch64/inst-directive.s | 15 +++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index d26604f5765..685907a2178 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -112,9 +112,21 @@ class AArch64ELFStreamer : public MCELFStreamer { MCELFStreamer::EmitInstruction(Inst, STI); } + /// Emit a 32-bit value as an instruction. This is only used for the .inst + /// directive, EmitInstruction should be used in other cases. void emitInst(uint32_t Inst) { + char Buffer[4]; + + // We can't just use EmitIntValue here, as that will emit a data mapping + // symbol, and swap the endianness on big-endian systems (instructions are + // always little-endian). + for (unsigned I = 0; I < 4; ++I) { + Buffer[I] = uint8_t(Inst); + Inst >>= 8; + } + EmitA64MappingSymbol(); - MCELFStreamer::EmitIntValue(Inst, 4); + MCELFStreamer::EmitBytes(StringRef(Buffer, 4)); } /// This is one of the functions used to emit data into an ELF section, so the diff --git a/test/MC/AArch64/inst-directive.s b/test/MC/AArch64/inst-directive.s index 3bb620f689d..7fd5200b9e5 100644 --- a/test/MC/AArch64/inst-directive.s +++ b/test/MC/AArch64/inst-directive.s @@ -1,7 +1,14 @@ // RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=asm -o - \ // RUN: | FileCheck %s --check-prefix=CHECK-ASM -// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=obj -o - \ -// RUN: | llvm-readobj -s -sd | FileCheck %s --check-prefix=CHECK-OBJ +// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=obj -o %t +// RUN: llvm-readobj -s -sd %t | FileCheck %s --check-prefix=CHECK-OBJ +// RUN: llvm-objdump -t %t | FileCheck %s --check-prefix=CHECK-SYMS + +// RUN: llvm-mc %s -triple=aarch64_be-none-linux-gnu -filetype=asm -o - \ +// RUN: | FileCheck %s --check-prefix=CHECK-ASM +// RUN: llvm-mc %s -triple=aarch64_be-none-linux-gnu -filetype=obj -o %t +// RUN: llvm-readobj -s -sd %t | FileCheck %s --check-prefix=CHECK-OBJ +// RUN: llvm-objdump -t %t | FileCheck %s --check-prefix=CHECK-SYMS .section .inst.aarch64_inst @@ -22,3 +29,7 @@ aarch64_inst: // CHECK-OBJ: SectionData ( // CHECK-OBJ-NEXT: 0000: 2040105E // CHECK-OBJ-NEXT: ) + +// CHECK-SYMS-NOT: 0000000000000000 .inst.aarch64_inst 00000000 $d +// CHECK-SYMS: 0000000000000000 .inst.aarch64_inst 00000000 $x +// CHECK-SYMS-NOT: 0000000000000000 .inst.aarch64_inst 00000000 $d From b65093812f69e28806feabdb7a5a726ad5867277 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 20 Jan 2016 13:14:52 +0000 Subject: [PATCH 0043/1132] Proper handling of diamond-like cases in if-conversion If converter was somewhat careless about "diamond" cases, where there was no join block, or in other words, where the true/false blocks did not have analyzable branches. In such cases, it was possible for it to remove (needed) branches, resulting in a loss of entire basic blocks. Differential Revision: http://reviews.llvm.org/D16156 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258310 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 15802c148cf78263ff273959c59bb4d5847f0166) --- lib/CodeGen/IfConversion.cpp | 67 ++++++++++++++++++----- test/CodeGen/Hexagon/ifcvt-diamond-bad.ll | 43 +++++++++++++++ 2 files changed, 95 insertions(+), 15 deletions(-) create mode 100644 test/CodeGen/Hexagon/ifcvt-diamond-bad.ll diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index c38c9d22266..bca0a460f0c 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -595,15 +595,19 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, // Now, in preparation for counting duplicate instructions at the ends of the // blocks, move the end iterators up past any branch instructions. - while (TIE != TIB) { - --TIE; - if (!TIE->isBranch()) - break; - } - while (FIE != FIB) { - --FIE; - if (!FIE->isBranch()) - break; + // If both blocks are returning don't skip the branches, since they will + // likely be both identical return instructions. In such cases the return + // can be left unpredicated. + // Check for already containing all of the block. + if (TIB == TIE || FIB == FIE) + return true; + --TIE; + --FIE; + if (!TrueBBI.BB->succ_empty() || !FalseBBI.BB->succ_empty()) { + while (TIE != TIB && TIE->isBranch()) + --TIE; + while (FIE != FIB && FIE->isBranch()) + --FIE; } // If Dups1 includes all of a block, then don't count duplicate @@ -1395,8 +1399,13 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); - // Remove branch from 'true' block and remove duplicated instructions. - BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + // Remove branch from the 'true' block, unless it was not analyzable. + // Non-analyzable branches need to be preserved, since in such cases, + // the CFG structure is not an actual diamond (the join block may not + // be present). + if (BBI1->IsBrAnalyzable) + BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + // Remove duplicated instructions. DI1 = BBI1->BB->end(); for (unsigned i = 0; i != NumDups2; ) { // NumDups2 only counted non-dbg_value instructions, so this won't @@ -1413,8 +1422,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // must be removed. RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); - // Remove 'false' block branch and find the last instruction to predicate. - BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + // Remove 'false' block branch (unless it was not analyzable), and find + // the last instruction to predicate. + if (BBI2->IsBrAnalyzable) + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); while (NumDups2 != 0) { // NumDups2 only counted non-dbg_value instructions, so this won't @@ -1473,6 +1484,18 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Predicate the 'true' block. PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse); + // After predicating BBI1, if there is a predicated terminator in BBI1 and + // a non-predicated in BBI2, then we don't want to predicate the one from + // BBI2. The reason is that if we merged these blocks, we would end up with + // two predicated terminators in the same block. + if (!BBI2->BB->empty() && (DI2 == BBI2->BB->end())) { + MachineBasicBlock::iterator BBI1T = BBI1->BB->getFirstTerminator(); + MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator(); + if ((BBI1T != BBI1->BB->end()) && TII->isPredicated(BBI1T) && + ((BBI2T != BBI2->BB->end()) && !TII->isPredicated(BBI2T))) + --DI2; + } + // Predicate the 'false' block. PredicateBlock(*BBI2, DI2, *Cond2); @@ -1488,6 +1511,12 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; bool CanMergeTail = !TailBBI.HasFallThrough && !TailBBI.BB->hasAddressTaken(); + // The if-converted block can still have a predicated terminator + // (e.g. a predicated return). If that is the case, we cannot merge + // it with the tail block. + MachineBasicBlock::const_iterator TI = BBI.BB->getFirstTerminator(); + if (TI != BBI.BB->end() && TII->isPredicated(TI)) + CanMergeTail = false; // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; // check if there are any other predecessors besides those. unsigned NumPreds = TailBB->pred_size(); @@ -1659,8 +1688,16 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { assert(!FromBBI.BB->hasAddressTaken() && "Removing a BB whose address is taken!"); - ToBBI.BB->splice(ToBBI.BB->end(), - FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); + // In case FromBBI.BB contains terminators (e.g. return instruction), + // first move the non-terminator instructions, then the terminators. + MachineBasicBlock::iterator FromTI = FromBBI.BB->getFirstTerminator(); + MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator(); + ToBBI.BB->splice(ToTI, FromBBI.BB, FromBBI.BB->begin(), FromTI); + + // If FromBB has non-predicated terminator we should copy it at the end. + if ((FromTI != FromBBI.BB->end()) && !TII->isPredicated(FromTI)) + ToTI = ToBBI.BB->end(); + ToBBI.BB->splice(ToTI, FromBBI.BB, FromTI, FromBBI.BB->end()); // Force normalizing the successors' probabilities of ToBBI.BB to convert all // unknown probabilities into known ones. diff --git a/test/CodeGen/Hexagon/ifcvt-diamond-bad.ll b/test/CodeGen/Hexagon/ifcvt-diamond-bad.ll new file mode 100644 index 00000000000..e4bee8354a7 --- /dev/null +++ b/test/CodeGen/Hexagon/ifcvt-diamond-bad.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=hexagon -minimum-jump-tables=1 < %s +; REQUIRES: asserts + +target datalayout = "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32" +target triple = "hexagon" + +%struct.t0 = type { i8, [2 x i8] } +%struct.t1 = type { i8, i8, [1900 x i8], %struct.t0 } + +@var = internal global [3 x %struct.t1] zeroinitializer, align 8 +declare void @foo() #2 +declare void @bar(i32, i32) #2 + +; Function Attrs: nounwind +define void @fred(i8 signext %a, i8 signext %b) #1 { +entry: + %i = sext i8 %a to i32 + %t = getelementptr inbounds [3 x %struct.t1], [3 x %struct.t1]* @var, i32 0, i32 %i, i32 3, i32 0 + %0 = load i8, i8* %t, align 8 + switch i8 %0, label %if.end14 [ + i8 1, label %if.then + i8 0, label %do.body + ] + +if.then: ; preds = %entry + %j = sext i8 %b to i32 + %u = getelementptr inbounds [3 x %struct.t1], [3 x %struct.t1]* @var, i32 0, i32 %i, i32 3, i32 1, i32 %j + store i8 1, i8* %u, align 1 + tail call void @foo() #0 + br label %if.end14 + +do.body: ; preds = %entry + %conv11 = sext i8 %b to i32 + tail call void @bar(i32 %i, i32 %conv11) #0 + br label %if.end14 + +if.end14: ; preds = %entry, %do.body, %if.then + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "disable-tail-calls"="false" } +attributes #2 = { "disable-tail-calls"="false" } From 7cbb44772e67c9f373c5690cc0c62bec6656bda5 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 20 Jan 2016 17:39:52 +0000 Subject: [PATCH 0044/1132] [Orc] Fix a use-after-move bug in the Orc C-bindings stack. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258324 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 983ff8e507f81a1586098606527b2714cc5f1c0b) --- lib/ExecutionEngine/Orc/OrcCBindingsStack.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index aae6a99432b..3b2f9ab1cda 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -90,12 +90,12 @@ class OrcCBindingsStack { std::unique_ptr CCMgr, IndirectStubsManagerBuilder IndirectStubsMgrBuilder) : DL(TM.createDataLayout()), CCMgr(std::move(CCMgr)), + IndirectStubsMgr(IndirectStubsMgrBuilder()), ObjectLayer(), CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)), CODLayer(CompileLayer, [](Function &F) { std::set S; S.insert(&F); return S; }, *this->CCMgr, std::move(IndirectStubsMgrBuilder), false), - IndirectStubsMgr(IndirectStubsMgrBuilder()), CXXRuntimeOverrides([this](const std::string &S) { return mangle(S); }) {} ~OrcCBindingsStack() { @@ -264,13 +264,13 @@ class OrcCBindingsStack { DataLayout DL; SectionMemoryManager CCMgrMemMgr; + std::unique_ptr IndirectStubsMgr; + std::unique_ptr CCMgr; ObjLayerT ObjectLayer; CompileLayerT CompileLayer; CODLayerT CODLayer; - std::unique_ptr IndirectStubsMgr; - std::vector> GenericHandles; std::vector FreeHandleIndexes; From 9a833cd5a87960bdac7474bac651a25c739b3a09 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 20 Jan 2016 17:41:14 +0000 Subject: [PATCH 0045/1132] [LibCallSimplifier] don't get fooled by a fake sqrt() The test case will crash without this patch because the subsequent call to hasUnsafeAlgebra() assumes that the call instruction is an FPMathOperator (ie, returns an FP type). This part of the function signature check was omitted for the sqrt() case, but seems to be in place for all other transforms. Before: http://reviews.llvm.org/rL257400 ...we would have needlessly continued execution in optimizeSqrt(), but the bug was harmless because we'd eventually fail some other check and return without damage. This should fix: https://llvm.org/bugs/show_bug.cgi?id=26211 Differential Revision: http://reviews.llvm.org/D16198 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258325 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit e52954eb4a9d60e3fe08a8bf0cbe4b6fadfb766d) --- lib/Transforms/Utils/SimplifyLibCalls.cpp | 11 ++++++++++- test/Transforms/InstCombine/cos-2.ll | 16 +++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index dc074406014..908b4bb6a65 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1394,12 +1394,21 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - + Value *Ret = nullptr; if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); + // FIXME: Refactor - this check is repeated all over this file and even in the + // preceding call to shrink double -> float. + + // Make sure this has 1 argument of FP type, which matches the result type. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + if (!CI->hasUnsafeAlgebra()) return Ret; diff --git a/test/Transforms/InstCombine/cos-2.ll b/test/Transforms/InstCombine/cos-2.ll index c9a9c7c0771..a85cc8fa6bd 100644 --- a/test/Transforms/InstCombine/cos-2.ll +++ b/test/Transforms/InstCombine/cos-2.ll @@ -1,12 +1,11 @@ -; Test that the cos library call simplifier works correctly. -; ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" declare float @cos(double) +declare signext i8 @sqrt(...) -; Check that cos functions with the wrong prototype aren't simplified. +; Check that functions with the wrong prototype aren't simplified. define float @test_no_simplify1(double %d) { ; CHECK-LABEL: @test_no_simplify1( @@ -15,3 +14,14 @@ define float @test_no_simplify1(double %d) { ; CHECK: call float @cos(double %neg) ret float %cos } + + +define i8 @bogus_sqrt() { + %fake_sqrt = call signext i8 (...) @sqrt() + ret i8 %fake_sqrt + +; CHECK-LABEL: bogus_sqrt( +; CHECK-NEXT: %fake_sqrt = call signext i8 (...) @sqrt() +; CHECK-NEXT: ret i8 %fake_sqrt +} + From b50afb7b7280a2c2dbbcfcda072f994a61b870e2 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 20 Jan 2016 18:57:48 +0000 Subject: [PATCH 0046/1132] Accept subtractions involving a weak symbol. When a symbol S shows up in an expression in assembly there are two possible interpretations * The expression is referring to the value of S in this file. * The expression is referring to the value after symbol resolution. In the first case the assembler can reason about the value and try to produce a relocation. In the second case, that is only possible if the symbol cannot be preempted. Assemblers are not very consistent about which interpretation gets used. This changes MC to agree with GAS in the case of an expression of the form "Sym - WeakSym". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258329 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 888824fa231ef2e6908895c92da558e3a427d1e7) --- lib/MC/ELFObjectWriter.cpp | 6 ------ test/MC/AArch64/error-location.s | 3 --- test/MC/ARM/error-location.s | 3 --- test/MC/ELF/relocation.s | 6 ++++++ 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 7e3ddda1815..8183e8cbdf3 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -655,12 +655,6 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, return; } - if (::isWeak(SymB)) { - Ctx.reportError(Fixup.getLoc(), - "Cannot represent a subtraction with a weak symbol"); - return; - } - uint64_t SymBOffset = Layout.getSymbolOffset(SymB); uint64_t K = SymBOffset - FixupOffset; IsPCRel = true; diff --git a/test/MC/AArch64/error-location.s b/test/MC/AArch64/error-location.s index 02504368f00..c629e0a50de 100644 --- a/test/MC/AArch64/error-location.s +++ b/test/MC/AArch64/error-location.s @@ -16,9 +16,6 @@ // CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a difference across sections .word x_a - y_a -// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a subtraction with a weak symbol - .word a - w - // CHECK: :0: error: expression could not be evaluated .set v1, -undef diff --git a/test/MC/ARM/error-location.s b/test/MC/ARM/error-location.s index 112acf318ed..2f70f294b57 100644 --- a/test/MC/ARM/error-location.s +++ b/test/MC/ARM/error-location.s @@ -16,9 +16,6 @@ @ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a difference across sections .word x_a - y_a -@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a subtraction with a weak symbol - .word a - w - @ CHECK: :0: error: expression could not be evaluated .set v1, -undef diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s index 0fec7679281..e0313904563 100644 --- a/test/MC/ELF/relocation.s +++ b/test/MC/ELF/relocation.s @@ -63,6 +63,11 @@ pr24486: .code16 call pr23771 + .weak weak_sym +weak_sym: + .long pr23272-weak_sym + + // CHECK: Section { // CHECK: Name: .rela.text // CHECK: Relocations [ @@ -101,5 +106,6 @@ pr24486: // CHECK-NEXT: 0xDC R_X86_64_PLT32 foo 0x0 // CHECK-NEXT: 0xF0 R_X86_64_32 .text 0xF0 // CHECK-NEXT: 0xF5 R_X86_64_PC16 pr23771 0xFFFFFFFFFFFFFFFE +// CHECK-NEXT: 0xF7 R_X86_64_PC32 pr23272 0x0 // CHECK-NEXT: ] // CHECK-NEXT: } From f0b6585490e4f8738effcc97ba7d597d23436084 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Wed, 20 Jan 2016 22:05:50 +0000 Subject: [PATCH 0047/1132] Fix PR26152. Fix the condition for when the new global takes over the name of the existing one to be the negation of the condition for the new global to get internal linkage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258355 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit c97b26586f24799ba1de0abb1c5a53e38ac93c84) --- lib/Linker/IRMover.cpp | 2 +- test/Linker/Inputs/alias-2.ll | 7 +++++++ test/Linker/alias-2.ll | 24 ++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 test/Linker/Inputs/alias-2.ll create mode 100644 test/Linker/alias-2.ll diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index de368498ef2..9fdf8ca6b24 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -1087,7 +1087,7 @@ Constant *IRLinker::linkGlobalValueProto(GlobalValue *SGV, bool ForAlias) { return nullptr; NewGV = copyGlobalValueProto(SGV, ShouldLink); - if (!ForAlias) + if (ShouldLink || !ForAlias) forceRenaming(NewGV, SGV->getName()); } if (ShouldLink || ForAlias) { diff --git a/test/Linker/Inputs/alias-2.ll b/test/Linker/Inputs/alias-2.ll new file mode 100644 index 00000000000..03c1d91fe6b --- /dev/null +++ b/test/Linker/Inputs/alias-2.ll @@ -0,0 +1,7 @@ +define void @B() { + call void @A() + ret void +} + +declare void @A() + diff --git a/test/Linker/alias-2.ll b/test/Linker/alias-2.ll new file mode 100644 index 00000000000..8875aa76455 --- /dev/null +++ b/test/Linker/alias-2.ll @@ -0,0 +1,24 @@ +; RUN: llvm-link %s %S/Inputs/alias-2.ll -S -o - | FileCheck %s +; RUN: llvm-link %S/Inputs/alias-2.ll %s -S -o - | FileCheck %s + +; Test the fix for PR26152, where A from the second module is +; erroneously renamed to A.1 and not linked to the declaration from +; the first module + +@C = alias void (), void ()* @A + +define void @D() { + call void @C() + ret void +} + +define void @A() { + ret void +} + +; CHECK-DAG: @C = alias void (), void ()* @A +; CHECK-DAG: define void @B() +; CHECK-DAG: call void @A() +; CHECK-DAG: define void @D() +; CHECK-DAG: call void @C() +; CHECK-DAG: define void @A() From 553e5bab5fd8169d95111b14e6f9d37433cc9c36 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 20 Jan 2016 22:16:14 +0000 Subject: [PATCH 0048/1132] [Orc] Try to turn Orc execution unit tests back on for Linux. The fix in r258324 (plus r258354) should allow Orc execution tests to run on Linux. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258358 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 8b5686809776b59de0d2ff06adaf12582b7175eb) --- unittests/ExecutionEngine/Orc/OrcTestCommon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/ExecutionEngine/Orc/OrcTestCommon.h b/unittests/ExecutionEngine/Orc/OrcTestCommon.h index f480e0789ae..5cc7071a866 100644 --- a/unittests/ExecutionEngine/Orc/OrcTestCommon.h +++ b/unittests/ExecutionEngine/Orc/OrcTestCommon.h @@ -46,7 +46,7 @@ class OrcExecutionTest { if (TM) { // If we found a TargetMachine, check that it's one that Orc supports. const Triple& TT = TM->getTargetTriple(); - if (TT.getArch() != Triple::x86_64 || !TT.isOSDarwin()) + if (TT.getArch() != Triple::x86_64 || TT.isOSWindows()) TM = nullptr; } }; From a49960680c0624732bec5c644aa727ef2e719007 Mon Sep 17 00:00:00 2001 From: Xinliang David Li Date: Thu, 21 Jan 2016 02:35:59 +0000 Subject: [PATCH 0049/1132] re-submit test case (withright format-version) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258384 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 37c2652ad9f8706d6a4fc9322fcd17b1e032883a) --- test/tools/llvm-cov/Inputs/elf_binary_comdat | Bin 0 -> 2125219 bytes .../Inputs/elf_binary_comdat.profdata | Bin 0 -> 1040 bytes test/tools/llvm-cov/Inputs/instrprof-comdat.h | 19 ++++++++++++++++++ test/tools/llvm-cov/cov-comdat.test | 10 +++++++++ 4 files changed, 29 insertions(+) create mode 100755 test/tools/llvm-cov/Inputs/elf_binary_comdat create mode 100644 test/tools/llvm-cov/Inputs/elf_binary_comdat.profdata create mode 100644 test/tools/llvm-cov/Inputs/instrprof-comdat.h create mode 100644 test/tools/llvm-cov/cov-comdat.test diff --git a/test/tools/llvm-cov/Inputs/elf_binary_comdat b/test/tools/llvm-cov/Inputs/elf_binary_comdat new file mode 100755 index 0000000000000000000000000000000000000000..319322d747f282483d206898392860a59af25787 GIT binary patch literal 2125219 zcmeF&3zQvIoiO_9PB%2K?gn{8Ky8^u0|Oxq5s89!C!OT9b~FP4QD8bD=|D8Eo1EsM zjAo`2#$(e?#>;%4Yp%0~HRHTz&FD8SM8|Owg9H>cBKk2tGm6?Rs6mm3B7Oh6PF3fm zN$&WqyYBkdUF%nPde{Ef-n(}FYS*qhb>e##4=!nLYRbx`IeT4}avwOhHxJeSX4waq zbmjTkmcgEB4q znfjPMj~3e>E&5re{jn`AWtz&SW%>83Yts6I=jPc+d-IhteY|L|Ov`$|6ZKL*f77M2 zD0sp8b=O}!|AO^v&R@T72=Nn-5dX^~#QTpB|Kt(k7at-1_eY3-JK|mOnYmmLfx|h! z{Rr_}ju1cY2=Ol;A^xEw#IHR<{G&&R-yZQ~{emUN_3AW?FV5C(+C0258yebLUA6ih zL#uCm$I#kU>(*x*-mzxgmh6V%>gIK8vaQuE)lKWO)vMO8-?TdA)<=~M!yC2^SEJ_e zmMt4M4aJsK)peUvQ)@SGS+}veHZ_(?t=@cVws!UUOSRYR+)!?&zU zUAkezrj13Jp={~kz`{jC7tZaOKfQWz?V`B{*Dsv=s(@;aE4I1ezm|sew6%3QUoV$z zo8oFu*Ks*--1*1G8j0nR z*RRaeG-nl|Jiosop5~QunP`Z|j;5Cb4e^fYXqG+G5KnVexyf2VHn}g)|19Ox(3;$p z=YN#)vr_&edH%a8pN7)pU3vcNDW8VWR6ioq7HPDW6XL zWR~aOm-6Y9Pab$a0%!k0%BNF3xj)apGv(7Mp4^w`x1@YJwUfK@{M%DLozlsVEbUvFU0FH5HT*P>V?AE+nSL^3+n z`tI{1baSky9ls}S8g2c=R`YuqVTzq>Djeos^k5s?eUwZ2FF@o z8nJ<<1HDh}Z5lbMXIIZdkInvMx>S00jZQuPp5vodOKKtxb!x{`)y1{vcmL#NP0xS% z`GY-d+PC0m?>;69eq(-IhW9V{+3%e&{)VQ$V6v2&}7$Cpf1YQL(CxAYEdW_vNCo> zXJu?_TctKx8GG%-ieJard+nmm>bI}C?w-`;zH9UIcg=M@yJB^1x&OYm^j+0gd#dls zz7?^XXgg)KEy@TWD ze>_U}jV;c0?CRL@mr-W%j;Vp#nca(PBiYEQY2aUTUEj5R*Y*8w-%xa7Ta<|QzES=D zw%2CW{hv;cc9q(9CZ70JbUEg*SEbCokr^Cc*)&-D;l$Okufe!pYM-stzFn#9op^Nw zDz%?Zd?_yblrBv*$6xijGwKapk~T*}JJPPB{K36XUKb@hUe;f)))|?6(n-$$o=<@n zcYg0kEBQ^lWBZ>* za~*e|*7J?UJwJ}q)!n)Mp^?)kH%2t*qy5o6O_kMO>9~8vDcSaisy*9&dG^TT_1L!C zPRTZpd?zfoPsMH`IJ&E$?8w7;{j|9&TI(CX^7)Q`{>=Dov!cw@#Q8~L$F70eeI47+ z%e$~F+x|eu_F1vMxaZNHZ!BB=g~9RLx;r21s`q2r>U;Y}?`>N2-s6wTcmMg0y9ZB* zD%B<1o;`cyKl9k0ZMU_iE)A@HZm{;b2n{wpJy?5wur}D;Ht`R?j6JUY{Nkp^2gf2` zR3F)0oabe$AN_RfI~%M$z39G{S7Z}oF)%}ItlB+iP35BbIc3L=_a)HLdoMY7s?rb* zr-}dT*)$MZ*St86wYK{c$+>4&ei42;t*7yENfe2(So`d|FU^Pg&Z zX1>b!OR|e18Ed`uf=EVxFFIbERlQ;2p}0p?J{h&sl2u;5FQ)6}j_qUV$i`-+@pwt> z_RnK{Ozlg#b0_YK8hy2}Xnj*^YW$Rr=+h^fqe4zr8M~xsSAP5s{@Cx;ujPM=-E4b(*0KGsQU}Lhbx~^d`Fg7Z zwR=0ZkEeL;hn2l8-LaNUoD}!C%GfDmb7N+RD{;K_iJq>gc-gN{OXXtjIpvnv6!8aA zyw*AK$4}S0_>ygxWYyW9OcQxJ8+#{iOnVtW<&UFWt+_IGe0Q{Ratz$T+K&cnKbUwn z9uDHP#`&E1r)Xkvnu30rZl$?m$M&D3_3_KDjQ!@49oxSdk%`Nr?ARp}?~29;a@1$f zkA93TOD64ynkC23EF{@AI1LgeQ)Zww#Pn;KLa`e*fI9ZFc>P-_T#zWX)2JqjfaS)SxIfB0x zj=Zt8xLCKRQ5rX`OXH>$_p0=*DIecYr~Z$>>WZ|N zm)6B%{B~^rPKq~-?+fFZF9z{H{B~D_<2q|SG1t26C8-$Udv!P-wIUK|g{gSDU5_Dp;!TG;W8j_u#eM|V!#vHffH z;eFG|)5H7TG`zP)*%;m*ini;+`}F9|Kz{L@azRN{pE7QYGLwH4m2$Di{_5cHS-W$r z^+z$HV^GH9OnSbE@%Q2wGkaUV6yY>cPrfgnbr05v$C|hHT^WxaeJj$F$3X4j{4rs0 zeEpu;X^>aOUQ-{(WABJ5;G)WCgggHE9S174A5H8`K((I^%(}FDaKU2(wM)B49!bNz zXIC)>q6>Xj4b-0cbXV-S@5+JNuk*Wb--^E2P0Y9Ni9TI1b!FCH`$3$Nd|TwwKs>8b zThuvNyQoroWU%&*w*K1c*?SkoQ(cyocaU@j-<&Sq+IOZpdv-nc!}@$)-pAWw9-o&s zkG-dJ>P2a4iH=2Queok&|G}eQyEFCb7jgTqeYJQxnltv6Gsl+B9Acxs6=UYYgA&Pwe|6We|s=dS7A!QWmT9f~fW z8T*_weL2*}ZyJ%cujk`^>Wa3w%m2x)dZ~`P|?rm-TUA2UH!xK^_iZhmeuY{tY57^a(=#Y&Q~hG|M;ActL*;OtcK^S?cK8@ zm>w4Xv3ql@4UT=P`<7Jfo{w+%#@*?EG1MNZ)Nbo;8?41$q*m>oJvjE^?v+ty;GCZh z*5Y}nB^r1j9yTj`w|93&JS~H9)r`!ojNO^HA5S4C=T9NE`#W}Qio0*+_wP-eDF;&i z%$hAb=ZV^V1G}G{6+1aO?QP;SaVtp1y%UcVYn95__U`mmx-!0^dv-kGoSgPl8UIxG z&e&_Ec5V0a!RYMDN^N`hwiFqU+Jn*HjlCUrKb0P*uCCP9cW+MH^BUvzK38fV>rU5Z zI@h;Ey_h{d-2L82Kb^jIr_u*f8b|oSNS7DucSV{Gc(C@5-G5ueJ`(9AX}$KL?tAiW zAM4(g@=^ExNMkyW@^^IaOR=c;NU?2yr1iEw)jd(fo+)CLu^9GK`*!U5L^{FI2G+TE2}8e7rbwk*C@4Ad5P_SH7b?%TUKzv>5SSGM*1xaWbL17q_ati+_% zK6Yx%u@jfY<6Ycv`$j+86yJ*$eE#O|#P^glBl7xm{l{(x#(poZ|0p{3qMipPC*nD= zK5lChmAH^5hteUX`*ePAllM<=9~iqbzxL8di37Z?yDN=_dBr8TqPr^|X8NYCj4OEL z-m;UAjfKR&&glx`pT^gMsm{rPXntbyC;3k6*MGzJjbEhi81QYYAy4m{_?JB0n{SWFGT&Z_C-OPTY~Hfkvz4*Dp=-N4m(>oW&d!dn za;eRj_D=2|tWCybHE~_~>NI+9rD5ZDb=n^LT6z^TyF9+V3)|vM#ttv< zjQGe;M|U^XK37@rKX3kO9QP%a1&@sUV?5hUzX6F`KtnD)&mLWSs^jA=n~Uea+EcZM z;tG9wcE{Z5h*d*zr`y_Alm&pXsYTQF;37qu=dV@UxDk zUt2r+%!@ky@L%&yyVFaMG!8oMUNHKZrb_g#>6!iopBwqt8^6A`W95%x{a0%{*8NAM-;d8P)^@CVHuC=&=@V-^p1&_X_r_;$eD>UY zca%8L^trViTff~_-?-_S$b2VW_@ol=?(cqbR(uwGsbl+(q9?U4_jTNTd|T7~@iu0` z6CI;pi`aCedBGPuZojuNGAl*?M`NTVMeb^hw5G`W8zV=h$Zd_0qZfSfrj674X^Y&- z>D)1qyL>u#Y~=cNq!GX?7ZAr{E zS3Of%@chW5F|ibHf)2ia$|uapSNy1)cgL4@jP8Ht;PxwOKi~bmmWl7j#n$x5HGA^! zKV$D{nRqw?@hvaK4qgoLJUe!COB%>uO+$NpXI=- zM|0nT`#ZKj5uK@hIbOVU930Nyir91{AI_g|jO4@l6OEC4IH#NSbgg_i|4CycAI|B% zGhHSP=aK1L8qPON=hAR~>vS#+=QmF0(r~_PI+up?1=G1SoL`>j%He!bye2!XErxTP zuby4?`}5C>`K?UjKHHwgcbwJ9{usGfuGuQ$HP=Nmio_fggT zk?DDA>WbO@@qO>Ui3?(HsmAz?ZP(N$>XjP1m#)9J^wqwUz7##5_ME;4#eRPt4+8OI zJGZZPYG3V5EfY`2T{_;eR)^x1Xy5qKsb#gN2gdu4YD#zYnEcXp*k5}#zBR>zM*lfa z42l3S@N?B(5eqGx;bwxb>eD<+Fs~=b5 zNH3{pcYJ(qrS|Gd?JaFx{c4hC<*>RF$e8d;S_{xy?Jm$ZSkNC)W2ZwO|w%oV~b{we5&WEhWO1B^>>rk#nIN+ zu8y_1U%w|ROjVDb{DVhhtnQ5QJbiD^pSNn4EjY6)=CaE!jAU{{jG)E>F_T^P;`;WJ z()R0S4EU_})Y{sLo96}k_QaDW#Y-A?5&K`-)tE2d7uAPcJh*gh|NE#lIJS09ya12K z`i|}C*Q44)%f`=pc-h!+mWTc$LOl-*EI2P7BfmFt^5h>xzI>ipaBlTomD*R*&lB;m zkRE9s-}dZO^)30c{ioA8o0tUT~avbmJ`1rf$eL7vv z@kD+6#6LwV^*zT!^6}Nr4A%DNP0ks)b8u|+oXL~p6!m-~|BWDR8tC}w?`8G9J+`wx z9@F!Bak0kG9DO{#!QIw9d*b`?>Lb3uOnf2IWl8b0xSzyTHt?zPt83ng$~k-EX)(WS zE4!y=4RrkTa}%eReQN3M*#6s5ZE;*j3%*quU!T5I#G8eY&6Cf^0o3;&m)pcM@xCXH zKE5jabmGAnAaOazH;FN!Q{`oY9~X(EbO1iu|$rz-hn|I1nN zU3x`#yi>V!UldBuniDIdRP4FBHBR1~%J|lKX$SAhzfo2HFdpomIU@$FclV`Uja}M3G5(*iGJ5aTr~W!Rb-{F}J{X<)!D)G?&Z~Fo zl0!Sy6P=pe6leV41s3mrVm_YAUs4sXEaEll&QrSLC)V-vKN#s1gXes|=NpyXzm6A3 z@yd6@{^-t*A6H*mUT^t3?Wg*qjj5BrnZCJH7Cbicqd4T^Nhm+ywFme3g6n_0_2o%2 z&hN36vCf{SCf^=M96v<#K7& zZ`rhV%c`5Rv)5#UgI6sZT7LPPmkbOp9=dAr>iZw``4n z_0+Xt-PR4Os;h5|c7|`+JiNLZJ2-pId0p3!RJ-DzJH>ixJRRhJ(T}sv-g;KOFK>>j zo7Q&4zo6Q*<<|O<% zzB4s-N2K>f+LH1y0I!Yo$-bf#h^x;Tbj*fbfo*C(i zC*t8C(lwE;iL@H&ZINz|^v+06{#ooV((XtfjC6jak4D-b>2r~;i3#}RnC~BsbTHCK zBV7~elabyQX-mwVABwa)(oaR&AHO0!9O;@!pNzB`>GAQy$fJ>78|n7=>FC3ew#3hB z-;DI(NRN*J^<<>ok)9mC;9MH%{76?s+8^oqNLNIIiekjsAyzJ_nmV^$B>#K(@^M^6 z`DeZ{HMK2W^zEHX+GoF^tB{k7*_)O}WJAl2`W&+-^oD^j4Zy)z{#erGng-d+yjnC$&*OmGi_5P@NVf*a&&syBxHQutYea;=NeeLsZ zKdP_&lF_4=w)Zw)er)?C5$S86v#`A@N=A)^?QNCgn%BjK{ugX`L#W1eAD{HY_?);- zetGDAqLs$|^r!vww{M>HyG@7P(bDPmZi{vviR}Cy?xKI z?Q5Nawx{~Z{jmMNJ$k#Sq5qZR z-aR9d1E+qZ`RIc&zr13)7xg(S%|joF_{tPtlJ>JWIaQ{e2HWRVV=sGRH}{3pgKDZ8(OEHu-xycJqI}0ojvJX?Pa`Xh z(}$zpzNmLv>UZ>`@p@Q%==HFyeSdTF_+iIs|MWQRZppH{Ca0!8kos}3y~B_DZB5NL z9q#%{bz@2Axf<;qi1yO;TQ2$ekNCu4;_3de|1j}%KY8RZ@rNV6?=bPlBYyvk_<7NR zmKgZEB0iit6UTQ)I={E4`D3)D`Ko-fi3y_K)6~!T=|YM7SPoPV=f^b|@n>eVpUS8E zTRPsx^6C1De;tz_@9n9L{Q7F%-ngF+#rCUXKhf>!CAF9ClglF>w}I&;#UG4#CF1SV z(Yp7e5$}!o8Rs@_Fy>AxV<1nX}bbUOT+KYOtQZzsAgY#}2Zn{qA{mzU@YVmQy?Q=pV zuE3}tsgX18dcjG>D`%#tje6Xy& zcm3gpL4F?YjCSTlI}O7%mjcU=>&^SU?bv!cugI4N(^RaK>G4H= ze_Or$2}S;!bL#mQ7x{_HR_1GG6#32~J-tYuDYl^N~?34dsC9N=6|#7 zu9n_>?WiJOX|CswE%K)q9Xzhc&o0tainL5-l`Fj~^R?z8Kc`qZxyT<~q^A|>JxBHC zYh}E*$d~=Tr6_kyk(T>^eyV;v2U_d(r;2>(d7#LDve;f!|KFapy>8`U`*7+py?Oi< zMS5|OE-BKhigay}-c+Qw7wLa1(!Va!yNmR`BK=yC{-8*oD$=8lt?&2LB7H@XURQjO#cL#-H~1lw&W&wior57xhx?aLb7iOxMn_Mdc3^^{)Oe z>ZR+Yv7dig)Z1Lt+y5K(^Y4p#+y0CF&FtqrMZNd_7xk7dTJ)N(IagjkvaveSHGl3U zbLXFb;YgnAd3VqJx%1|qQ;%k=*RR@mL)T5iTehy-w6W`=xeMmb>zY&DGP3a4k_LzW z1(4qiEkVuQdh3Sjs_P@IZmFj?mTT)aR)@E2&gO31R2`n%w{YP6>Z%)x+zlH?=3YOt zZvC3`*R9FsrreFIw%(Y{U32TkXs@1Dx74>3WAc#9P;A>Wyna=xP^@iUU(M#O+qf?N zR)=qiziT5C+cw28T9pkAtzUoBhM~<{)(&l4wPAQ`Hh1{Op|x9Lb?C-5(O8+QH!-wo z%a&EQ)|)8T-@ZDE#qL&ZShqU*vMKfuf9B@9ow?FzcbRXWd;QkfZH(&;!yBvR z*43MC+OTSKg8YBCr00<|g{3(uE%!I|7HOVjFfKEIUxFQ1#*@~sE| zyeU?Xi%(g<(%c)lEUOfOtBW)}XQe|b>zB_nS4Aw%ZDsxPx$Lf@qvdm06v>x@rOal( z8*AwqsH|T;_k6PG@P$Rmvj1hdGJQvEPtR0kzI?7aP*hl6)Sub^BAmT5R#I2X`sH(6 zZ}DJRKF5{&pLzV-XVfpB3-2lFm**kXPtVFT>%Tu@$)qTrmCw=pi-P6zaTLjyg0;+M zwODJcpB2wX<;`Q;4Dj-~yG-w#QNMid-&Wj^-uBYzUF9ofy)yczGwPSm{okBZFVT1p zOOcuVzbj&?i>3d8IlYm~vI9l^ok!?@UMT+jZG2LZGzQaB-dDGsThEvG+j_2Ewp>r? zC*sq%{|C?QP2udpqW;qP1Un|&dZbR$r%7`_3-rHFUMNCCzky$ zzb8#x)|Ho;={>Xlx1v->L;do5)H6l>ndN8ZC!=noUsgWnzpg8`92~EC<}mr6L@bTn zvj64xgtnsozQV5@-{t ze^+0<#$A0~d6DI*a}CSP`oA69Q>^qquyAEw`oO}jJl&lCJ@}_|FC+7c{m&}{MOxM$ zoDna(Q>F`ICEdSfF1?Go@=#~ttJ}YZ!?rjX^>k*xP|IxT{Ry=)IntK~V`TDH5BJXNBT>R*)yuUV9niVSLee=-x zFHiYB2 zPK$YMWn(B`pOwXE>^)rk#aTHYHCCDx->k}c=Fs>VSvh|k8b32D=Y>P#FKNt9ug}V6 z-1K8hy6#%CC#I$g6)Wlaq`Bcdo>auswRX7j%Oc*Cz0mD#MLWBW?TxjAOM3a2+CMNO z{s%>T=KIMHM!YGW+RNiD=Zn9|<1ajqUo6@w-+M+jU%noHo3a;r}AaFONx%Z{s`q)L_9g{I=(k=`7K5JLq)vngnGQIXy+Y8ytlm`|78)s zy^MF($S%NDK7(o#O>d@SOJ>;LDE5T86k{P`oqJDLyQ{@D?4iZ>2fU%fxQ zsZ+5mjL+fv^Qyf3N!j0C+?$F`FN^Z{3&&e3aJY8f9Obh(?=!FWm3ccaysp<3zucwK)W;wQy<{75m6<<(=4f05$NCuI9h>P;QUipS6F^F@4D5ih@o{4j4PFM7zI zU*+)=vhuy;8Ie!R32~Dvug!X-{`Z_C#9y1oqn~H><^?}gFP>d}gz`5FKtpG@&$ zy)ijD26pl<-`BPm8@p0G`njt(FKJ#&%PaHp_4UTTuZ?(TcEn$HTXZcJAt~w^NRd z=TbcSf7guhGdsSplx6ZA7k|!+cxU*R_m^_pqEtTG**T+~Re3unO#dEmLs4OC5#QC_ zn@3($#NSuM54@}%KdHEW|E`EH|E+rbokjV3^LUYINWPcisXwRH%byn4Lt36K;+@5H z^on{o`>mr7f4r}bcxTMRGoM5Hk5K;V6p#IGp5d@H;$6kQ8k0ZB+lhImINr$S%b(}* z#*+2*e~I{s%`eU-dRHE@an}*befvlw z45Sm|n|As9S(ZO3kDr73$4K0XxXB^kc zMJtO)J$YR!AN~KpvAu`H7Z>ex7Wa|vV(ZlrKk+d0@W!J2M+(o!OmG70xPJOy)zpI${E-E(sN5s?l>RPlikCf--7pa|y&zv`!VnC+jEq|X| zQM7YZ5ubNfZyt$hDqmh&#LM3wBbzThv69Bswi)B9Qk38KTfKR?FV%}@>+cZBwzPwmHXUA?H@ z^Rhq3#8>Ea-8KF$mU{ME5${SXGnUIDo;V+z&I`Zq|79MJdEl(x)NHmY{~PZ^Q#|_fk<)wg$Onq}=OUhtcln}~d8FL$ zSMqpKs3G}b(f+<-9xLb7Y#{vtx;%aXUAKPS#v3k=-)+}z7@oU&bvCqg@XZVR28V{W zj$A)9Fm%LntG86QR!7#Zof}n#-mvKMp}~PGRtybgYlgQB->`0L{3JJ2-7vIz{icoa zE7)N|Lqlse4c)MQ)Ag&?53Q+g+OlJBM-_+Ne-ekL2)?Tpqtj-keIOf+?_R zqqB!r|eKmluwKF@Ijqg)??q-&5@N&@;4r{i^EPO zSzVQam14)cy2VEx-{eC;AOG8fO7eVrd;hq)o3rby>#Oe>#-6*z*0{ix?Vk;nGV-Kx zjl5&U;TXTZxtf7mLc|e@H$qpuuKo!-1xd)Elxx*sDx@hDt5v~Uyl@zYVxd-@Sf6pY zB&JI5pe3PKYune1tu{|6HRxvZgdRw-;oDkdbjyp02ucc^Yr$I)!c*L~AdzJDVz@b$ zl^!RQV38~R0!9K7%=nHgZv20$%5e-6cDB9<*B^Wd(d=<&y@TdWf?4vh7SyFe!FBKu z1S!)~3?>`Hj3`={ts5*Edwssi*XvcaoL?``X?dhJKHtmBuakU(Um+y=YUp*-^F3tkAzjv^v1xh zquU{I{MA1JSEL1_oP?mx-W^ZV%!DjsD;m+DHL(sN{FJqq2;hj;_Y@V{N+PAg2q|U( z3co-?VA^olB{)>)1+`JE*P3+~7J07lMu};Tw;RwUv>VjR4T1==auJOKXXxVJ1PE$vw`J&}j%YMF}XBLi3^O_rj33}w*CZ+jX-`eD_ zsDA_%@7%jUR2WI{?{C*=OT5Sp>oVFmF{W1g)Fw?KhEb^}UP69*w*9ch+7!kMGKQVM zKW|OgKxUNINV4tjbL`I`X*z-ge;ge^JakmXu9F3wa~Mj&vA5MY z{-|VAb4x^Adf8DYqmc|&p6H$2tOwn~O>CvnEXM)OxmFAJOqNE9k(c%?Yk%6n|3;0j A4*&oF literal 0 HcmV?d00001 diff --git a/test/tools/llvm-cov/Inputs/elf_binary_comdat.profdata b/test/tools/llvm-cov/Inputs/elf_binary_comdat.profdata new file mode 100644 index 0000000000000000000000000000000000000000..85667041e2c294edf76103df5f041757c17361f2 GIT binary patch literal 1040 zcmeyLQ&5zjmf6V600B%;8csn47#W;G)y>KZ>{s(ar4*oae3YNDo4>zjrmM4&YZ*)* zBa{I%j|0kQWSBKQ|Msolr(^`7A_h class FOO { +public: + FOO() : t(0) {} + + T DoIt(T ti); + +private: + T t; +}; + +template T FOO::DoIt(T ti) { // HEADER: 2| [[@LINE]]|template + for (T I = 0; I < ti; I++) { // HEADER: 22| [[@LINE]]| for (T + t += I; // HEADER: 20| [[@LINE]]| t += I; + if (I > ti / 2) // HEADER: 20| [[@LINE]]| if (I > ti + t -= 1; // HEADER: 8| [[@LINE]]| t -= 1; + } // HEADER: 10| [[@LINE]]| } + // HEADER: 1| [[@LINE]]| + return t; // HEADER: 1| [[@LINE]]| return t; +} diff --git a/test/tools/llvm-cov/cov-comdat.test b/test/tools/llvm-cov/cov-comdat.test new file mode 100644 index 00000000000..f44434c0c22 --- /dev/null +++ b/test/tools/llvm-cov/cov-comdat.test @@ -0,0 +1,10 @@ +REQUIRES: shell + +# The binary contains two (merged) covmap sections which +# have duplicate CovMapRecords from comdat (template instantation). +# This test makes sure the reader reads it properly. It also +# tests that the coverage data from different instantiations +# of the same template function are properly merged in show +# output. + +// RUN: llvm-cov show %S/Inputs/elf_binary_comdat -instr-profile %S/Inputs/elf_binary_comdat.profdata -filename-equivalence %S/Inputs/instrprof-comdat.h | FileCheck --check-prefix=HEADER %S/Inputs/instrprof-comdat.h From 8bb497e708e0842a354f315dea425db140410526 Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Thu, 21 Jan 2016 17:04:42 +0000 Subject: [PATCH 0050/1132] [GCOV] Avoid emitting profile arcs for module and skeleton CUs Do not emit profile arc files and note files for module and skeleton CU's. Our users report seeing unexpected *.gcda and *.gcno files in their projects when using gcov-style profiling with modules or frameworks. The unwanted files come from these modules. This is not very helpful for end-users. Further, we've seen reports of instrumented programs crashing while writing these files out (due to I/O failures). rdar://problem/22838296 Reviewed-by: aprantl Differential Revision: http://reviews.llvm.org/D15997 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258406 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 94680bd2ddce07dffe8f91ac7bf8ad53e61fbf7e) --- lib/Transforms/Instrumentation/GCOVProfiling.cpp | 10 ++++++++++ test/Transforms/GCOVProfiling/modules.ll | 12 ++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 test/Transforms/GCOVProfiling/modules.ll diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index d6fdf26ab76..e2968c8480d 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -494,6 +494,11 @@ void GCOVProfiler::emitProfileNotes() { // LTO, we'll generate the same .gcno files. auto *CU = cast(CU_Nodes->getOperand(i)); + + // Skip module skeleton (and module) CUs. + if (CU->getDWOId()) + continue; + std::error_code EC; raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None); std::string EdgeDestinations; @@ -853,6 +858,11 @@ Function *GCOVProfiler::insertCounterWriteout( if (CU_Nodes) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { auto *CU = cast(CU_Nodes->getOperand(i)); + + // Skip module skeleton (and module) CUs. + if (CU->getDWOId()) + continue; + std::string FilenameGcda = mangleName(CU, "gcda"); uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i]; Builder.CreateCall(StartFile, diff --git a/test/Transforms/GCOVProfiling/modules.ll b/test/Transforms/GCOVProfiling/modules.ll new file mode 100644 index 00000000000..1a8edfeedd4 --- /dev/null +++ b/test/Transforms/GCOVProfiling/modules.ll @@ -0,0 +1,12 @@ +; RUN: opt -insert-gcov-profiling -o - < %s | llvm-dis | FileCheck -check-prefix=EMIT-ARCS %s + +; EMIT-ARCS-NOT: call void @llvm_gcda_start_file + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: false, runtimeVersion: 2, splitDebugFilename: "my.dwo", emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, dwoId: 43981) +!1 = !DIFile(filename: "", directory: "/") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} From 2b22b99882e07040c3fbdb931806822a00e8a817 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Thu, 21 Jan 2016 18:28:36 +0000 Subject: [PATCH 0051/1132] [TTI] Add getCacheLineSize Summary: And use it in PPCLoopDataPrefetch.cpp. @hfinkel, please let me know if your preference would be to preserve the ppc-loop-prefetch-cache-line option in order to be able to override the value of TTI::getCacheLineSize for PPC. Reviewers: hfinkel Subscribers: hulx2000, mcrosier, mssimpso, hfinkel, llvm-commits Differential Revision: http://reviews.llvm.org/D16306 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258419 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit c0ed657c76eb0d42d7dbf5ffd56d54d39af76e3f) --- include/llvm/Analysis/TargetTransformInfo.h | 7 +++++++ include/llvm/Analysis/TargetTransformInfoImpl.h | 2 ++ lib/Analysis/TargetTransformInfo.cpp | 4 ++++ lib/Target/PowerPC/PPCLoopDataPrefetch.cpp | 8 +++----- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 12 ++++++++++++ lib/Target/PowerPC/PPCTargetTransformInfo.h | 1 + 6 files changed, 29 insertions(+), 5 deletions(-) diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 3913cc3f107..30bda634377 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -416,6 +416,9 @@ class TargetTransformInfo { /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; + /// \return The size of a cache line in bytes. + unsigned getCacheLineSize() const; + /// \return The maximum interleave factor that any transform should try to /// perform for this target. This number depends on the level of parallelism /// and the number of execution units in the CPU. @@ -609,6 +612,7 @@ class TargetTransformInfo::Concept { Type *Ty) = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; virtual unsigned getRegisterBitWidth(bool Vector) = 0; + virtual unsigned getCacheLineSize() = 0; virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, @@ -775,6 +779,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { unsigned getRegisterBitWidth(bool Vector) override { return Impl.getRegisterBitWidth(Vector); } + unsigned getCacheLineSize() override { + return Impl.getCacheLineSize(); + } unsigned getMaxInterleaveFactor(unsigned VF) override { return Impl.getMaxInterleaveFactor(VF); } diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 43815234051..41bc72ca3ad 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -264,6 +264,8 @@ class TargetTransformInfoImplBase { unsigned getRegisterBitWidth(bool Vector) { return 32; } + unsigned getCacheLineSize() { return 0; } + unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 9c1d3fd4f58..ed7005e5e52 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -215,6 +215,10 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } +unsigned TargetTransformInfo::getCacheLineSize() const { + return TTIImpl->getCacheLineSize(); +} + unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { return TTIImpl->getMaxInterleaveFactor(VF); } diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp index e3a35d5df35..c113ae5f52c 100644 --- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp +++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp @@ -50,10 +50,6 @@ static cl::opt PrefDist("ppc-loop-prefetch-distance", cl::Hidden, cl::init(300), cl::desc("The loop prefetch distance")); -static cl::opt -CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), - cl::desc("The loop prefetch cache line size")); - namespace llvm { void initializePPCLoopDataPrefetchPass(PassRegistry&); } @@ -110,6 +106,8 @@ bool PPCLoopDataPrefetch::runOnFunction(Function &F) { AC = &getAnalysis().getAssumptionCache(F); TTI = &getAnalysis().getTTI(F); + assert(TTI->getCacheLineSize() && "Cache line size is not set for target"); + bool MadeChange = false; for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) @@ -193,7 +191,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) { if (const SCEVConstant *ConstPtrDiff = dyn_cast(PtrDiff)) { int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); - if (PD < (int64_t) CacheLineSize) { + if (PD < (int64_t) TTI->getCacheLineSize()) { DupPref = true; break; } diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index cd86dabd5ab..b60c07c3672 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -21,6 +21,12 @@ using namespace llvm; static cl::opt DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); +// This is currently only used for the data prefetch pass which is only enabled +// for BG/Q by default. +static cl::opt +CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), + cl::desc("The loop prefetch cache line size")); + //===----------------------------------------------------------------------===// // // PPC cost model. @@ -230,6 +236,12 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { } +unsigned PPCTTIImpl::getCacheLineSize() { + // This is currently only used for the data prefetch pass which is only + // enabled for BG/Q by default. + return CacheLineSize; +} + unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { unsigned Directive = ST->getDarwinDirective(); // The 440 has no SIMD support, but floating-point instructions diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 04c1b02235f..d216bdfec97 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -70,6 +70,7 @@ class PPCTTIImpl : public BasicTTIImplBase { bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); + unsigned getCacheLineSize(); unsigned getMaxInterleaveFactor(unsigned VF); int getArithmeticInstrCost( unsigned Opcode, Type *Ty, From fc1c57523179e3195ba452301ddf5589c06f625a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 21 Jan 2016 18:01:57 +0000 Subject: [PATCH 0052/1132] make helper functions static; NFCI git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258416 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit d0d71cf4bb98c8a4305c26f50654424e005a1f82) --- include/llvm/Transforms/Utils/SimplifyLibCalls.h | 2 -- lib/Transforms/Utils/SimplifyLibCalls.cpp | 12 +++++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 410a075aeb9..fc34f49a125 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -125,8 +125,6 @@ class LibCallSimplifier { Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B); // Math Library Optimizations - Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, bool CheckRetType); - Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B); Value *optimizeCos(CallInst *CI, IRBuilder<> &B); Value *optimizePow(CallInst *CI, IRBuilder<> &B); Value *optimizeExp2(CallInst *CI, IRBuilder<> &B); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 908b4bb6a65..aad1b00af8d 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -970,11 +970,9 @@ static Value *valueHasFloatPrecision(Value *Val) { return nullptr; } -//===----------------------------------------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' - -Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, - bool CheckRetType) { +/// Shrink double -> float for unary functions like 'floor'. +static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, + bool CheckRetType) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || @@ -1013,8 +1011,8 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, return B.CreateFPExt(V, B.getDoubleTy()); } -// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax' -Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { +/// Shrink double -> float for binary functions like 'fmin/fmax'. +static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the From 1a9fc77e6c6d790c34731ed1256084c5efcf6c4b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 21 Jan 2016 20:19:54 +0000 Subject: [PATCH 0053/1132] [LibCallSimplifier] don't get fooled by a fake fmin() This is similar to the bug/fix: https://llvm.org/bugs/show_bug.cgi?id=26211 http://reviews.llvm.org/rL258325 The fmin() test case reveals another bug caused by sloppy code duplication. It will crash without this patch because fp128 is a valid floating-point type, but we would think that we had matched a function that used doubles. The new helper function can be used to replace similar checks that are used in several other places in this file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258428 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit faf8f30ad7233c6c308df0fb7d844e4b744fd65a) --- lib/Transforms/Utils/SimplifyLibCalls.cpp | 34 ++++++++++++++----- .../InstCombine/double-float-shrink-1.ll | 20 +++++++++++ 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index aad1b00af8d..2f3c31128cf 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -970,13 +970,34 @@ static Value *valueHasFloatPrecision(Value *Val) { return nullptr; } +/// Any floating-point library function that we're trying to simplify will have +/// a signature of the form: fptype foo(fptype param1, fptype param2, ...). +/// CheckDoubleTy indicates that 'fptype' must be 'double'. +static bool matchesFPLibFunctionSignature(const Function *F, unsigned NumParams, + bool CheckDoubleTy) { + FunctionType *FT = F->getFunctionType(); + if (FT->getNumParams() != NumParams) + return false; + + // The return type must match what we're looking for. + Type *RetTy = FT->getReturnType(); + if (CheckDoubleTy ? !RetTy->isDoubleTy() : !RetTy->isFloatingPointTy()) + return false; + + // Each parameter must match the return type, and therefore, match every other + // parameter too. + for (const Type *ParamTy : FT->params()) + if (ParamTy != RetTy) + return false; + + return true; +} + /// Shrink double -> float for unary functions like 'floor'. static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, bool CheckRetType) { Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || - !FT->getParamType(0)->isDoubleTy()) + if (!matchesFPLibFunctionSignature(Callee, 1, true)) return nullptr; if (CheckRetType) { @@ -1014,12 +1035,7 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, /// Shrink double -> float for binary functions like 'fmin/fmax'. static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 2 arguments of the same FP type, which match the - // result type. - if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - !FT->getParamType(0)->isFloatingPointTy()) + if (!matchesFPLibFunctionSignature(Callee, 2, true)) return nullptr; // If this is something like 'fmin((double)floatval1, (double)floatval2)', diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll index 319ea325983..74f3ebbf523 100644 --- a/test/Transforms/InstCombine/double-float-shrink-1.ll +++ b/test/Transforms/InstCombine/double-float-shrink-1.ll @@ -364,6 +364,26 @@ define float @max1(float %a, float %b) { ; CHECK-NEXT: ret } +; A function can have a name that matches a common libcall, +; but with the wrong type(s). Let it be. + +define float @fake_fmin(float %a, float %b) { + %c = fpext float %a to fp128 + %d = fpext float %b to fp128 + %e = call fp128 @fmin(fp128 %c, fp128 %d) + %f = fptrunc fp128 %e to float + ret float %f + +; CHECK-LABEL: fake_fmin( +; CHECK-NEXT: %c = fpext float %a to fp128 +; CHECK-NEXT: %d = fpext float %b to fp128 +; CHECK-NEXT: %e = call fp128 @fmin(fp128 %c, fp128 %d) +; CHECK-NEXT: %f = fptrunc fp128 %e to float +; CHECK-NEXT: ret float %f +} + +declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for. + declare double @fmax(double, double) declare double @tanh(double) #1 From 8159b8fc49b61cf96b19ff7eb08c58639ad52522 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Thu, 21 Jan 2016 21:13:27 +0000 Subject: [PATCH 0054/1132] =?UTF-8?q?Fix=20MachOObjectFile::getSymbolSecti?= =?UTF-8?q?on()=20to=20not=20call=C2=A0report=5Ffatal=5Ferror()=20but=20to?= =?UTF-8?q?=20return=C2=A0object=5Ferror::parse=5Ffailed.=20=C2=A0Then=20m?= =?UTF-8?q?ade=20the=20code=20in=20llvm-nm=20do=20for=20Mach-O=20files=20w?= =?UTF-8?q?hat=20is=20done=20in=20the=20darwin=20native=20tools=20which=20?= =?UTF-8?q?is=20to=20print=20"(=3F,=3F)"=20or=20just=20"s"=20for=20bad=20s?= =?UTF-8?q?ection=20indexes.=20=20Also=20added=20a=20test=20to=20show=20it?= =?UTF-8?q?=20prints=20the=20bad=20section=20index=20of=20"42"=20when=20pr?= =?UTF-8?q?inting=20the=20fields=20as=20raw=20hex.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258434 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit d8116dc95faf85c1209ae8dc6515950f43dd188f) --- lib/Object/MachOObjectFile.cpp | 2 +- test/Object/macho-invalid.test | 10 ++++++++-- tools/llvm-nm/llvm-nm.cpp | 13 +++++++++++-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index d1f79b225ee..4ba6523a528 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -458,7 +458,7 @@ MachOObjectFile::getSymbolSection(DataRefImpl Symb) const { DataRefImpl DRI; DRI.d.a = index - 1; if (DRI.d.a >= Sections.size()) - report_fatal_error("getSymbolSection: Invalid section index."); + return object_error::parse_failed; return section_iterator(SectionRef(DRI, this)); } diff --git a/test/Object/macho-invalid.test b/test/Object/macho-invalid.test index f4aa1e0c298..686e516f5e9 100644 --- a/test/Object/macho-invalid.test +++ b/test/Object/macho-invalid.test @@ -31,9 +31,15 @@ RUN: not llvm-objdump -t %p/Inputs/macho-invalid-symbol-name-past-eof 2>&1 \ RUN: | FileCheck -check-prefix NAME-PAST-EOF %s NAME-PAST-EOF: Symbol name entry points before beginning or past end of file -RUN: not llvm-nm %p/Inputs/macho-invalid-section-index-getSectionRawName 2>&1 \ +RUN: llvm-nm %p/Inputs/macho-invalid-section-index-getSectionRawName 2>&1 \ RUN: | FileCheck -check-prefix INVALID-SECTION-IDX-SYMBOL-SEC %s -INVALID-SECTION-IDX-SYMBOL-SEC: getSymbolSection: Invalid section index +INVALID-SECTION-IDX-SYMBOL-SEC: 0000000100000000 S __mh_execute_header +RUN: llvm-nm -m %p/Inputs/macho-invalid-section-index-getSectionRawName 2>&1 \ +RUN: | FileCheck -check-prefix INVALID-SECTION-IDX-SYMBOL-SEC-m %s +INVALID-SECTION-IDX-SYMBOL-SEC-m: 0000000100000000 (?,?) [referenced dynamically] external __mh_execute_header +RUN: llvm-nm -pax %p/Inputs/macho-invalid-section-index-getSectionRawName 2>&1 \ +RUN: | FileCheck -check-prefix INVALID-SECTION-IDX-SYMBOL-SEC-pax %s +INVALID-SECTION-IDX-SYMBOL-SEC-pax: 0000000100000000 0f 42 0010 00000065 __mh_execute_header RUN: not llvm-objdump -private-headers %p/Inputs/macho-invalid-header 2>&1 | FileCheck -check-prefix INVALID-HEADER %s INVALID-HEADER: Invalid data was encountered while parsing the file diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp index 50960653f75..b70a79c334c 100644 --- a/tools/llvm-nm/llvm-nm.cpp +++ b/tools/llvm-nm/llvm-nm.cpp @@ -367,7 +367,13 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I, outs() << "(?,?) "; break; } - section_iterator Sec = *MachO->getSymbolSection(I->Sym.getRawDataRefImpl()); + ErrorOr SecOrErr = + MachO->getSymbolSection(I->Sym.getRawDataRefImpl()); + if (SecOrErr.getError()) { + outs() << "(?,?) "; + break; + } + section_iterator Sec = *SecOrErr; DataRefImpl Ref = Sec->getRawDataRefImpl(); StringRef SectionName; MachO->getSectionName(Ref, SectionName); @@ -772,7 +778,10 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) { case MachO::N_INDR: return 'i'; case MachO::N_SECT: { - section_iterator Sec = *Obj.getSymbolSection(Symb); + ErrorOr SecOrErr = Obj.getSymbolSection(Symb); + if (SecOrErr.getError()) + return 's'; + section_iterator Sec = *SecOrErr; DataRefImpl Ref = Sec->getRawDataRefImpl(); StringRef SectionName; Obj.getSectionName(Ref, SectionName); From af2abef74928f930c487a1383a6ba11e23651805 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 21 Jan 2016 21:59:50 +0000 Subject: [PATCH 0055/1132] [RuntimeDyld][AArch64] Add support for the MachO ARM64_RELOC_SUBTRACTOR reloc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258438 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 1a9ebde7e0930343823f20aae07ddb64ed33d0fb) --- .../Targets/RuntimeDyldMachOAArch64.h | 54 ++++++++++++++++++- .../AArch64/MachO_ARM64_relocations.s | 5 ++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h index dbca37747ce..ea2a7a2953b 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h @@ -270,6 +270,9 @@ class RuntimeDyldMachOAArch64 RelInfo = Obj.getRelocation(RelI->getRawDataRefImpl()); } + if (Obj.getAnyRelocationType(RelInfo) == MachO::ARM64_RELOC_SUBTRACTOR) + return processSubtractRelocation(SectionID, RelI, Obj, ObjSectionToID); + RelocationEntry RE(getRelocationEntry(SectionID, Obj, RelI)); RE.Addend = decodeAddend(RE); @@ -349,7 +352,15 @@ class RuntimeDyldMachOAArch64 encodeAddend(LocalAddress, /*Size=*/4, RelType, Value); break; } - case MachO::ARM64_RELOC_SUBTRACTOR: + case MachO::ARM64_RELOC_SUBTRACTOR: { + uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress(); + uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress(); + assert((Value == SectionABase || Value == SectionBBase) && + "Unexpected SUBTRACTOR relocation value."); + Value = SectionABase - SectionBBase + RE.Addend; + writeBytesUnaligned(Value, LocalAddress, 1 << RE.Size); + break; + } case MachO::ARM64_RELOC_POINTER_TO_GOT: case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21: case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12: @@ -398,6 +409,47 @@ class RuntimeDyldMachOAArch64 RE.IsPCRel, RE.Size); addRelocationForSection(TargetRE, RE.SectionID); } + + relocation_iterator + processSubtractRelocation(unsigned SectionID, relocation_iterator RelI, + const ObjectFile &BaseObjT, + ObjSectionToIDMap &ObjSectionToID) { + const MachOObjectFile &Obj = + static_cast(BaseObjT); + MachO::any_relocation_info RE = + Obj.getRelocation(RelI->getRawDataRefImpl()); + + unsigned Size = Obj.getAnyRelocationLength(RE); + uint64_t Offset = RelI->getOffset(); + uint8_t *LocalAddress = Sections[SectionID].getAddressWithOffset(Offset); + unsigned NumBytes = 1 << Size; + + ErrorOr SubtrahendNameOrErr = RelI->getSymbol()->getName(); + if (auto EC = SubtrahendNameOrErr.getError()) + report_fatal_error(EC.message()); + auto SubtrahendI = GlobalSymbolTable.find(*SubtrahendNameOrErr); + unsigned SectionBID = SubtrahendI->second.getSectionID(); + uint64_t SectionBOffset = SubtrahendI->second.getOffset(); + int64_t Addend = + SignExtend64(readBytesUnaligned(LocalAddress, NumBytes), NumBytes * 8); + + ++RelI; + ErrorOr MinuendNameOrErr = RelI->getSymbol()->getName(); + if (auto EC = MinuendNameOrErr.getError()) + report_fatal_error(EC.message()); + auto MinuendI = GlobalSymbolTable.find(*MinuendNameOrErr); + unsigned SectionAID = MinuendI->second.getSectionID(); + uint64_t SectionAOffset = MinuendI->second.getOffset(); + + RelocationEntry R(SectionID, Offset, MachO::ARM64_RELOC_SUBTRACTOR, (uint64_t)Addend, + SectionAID, SectionAOffset, SectionBID, SectionBOffset, + false, Size); + + addRelocationForSection(R, SectionAID); + + return ++RelI; + } + }; } diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s index 0387b932f1c..b29418783d6 100644 --- a/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s +++ b/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s @@ -77,3 +77,8 @@ tgt: .fill 4096, 1, 0 _ptr: .quad _foo + +# Test ARM64_RELOC_SUBTRACTOR. +# rtdyld-check: *{8}_subtractor_result = _test_branch_reloc - _foo +_subtractor_result: + .quad _test_branch_reloc - _foo From dce4816b582a74a8d6a21b81480951ae8cdb0a5f Mon Sep 17 00:00:00 2001 From: Pirama Arumuga Nainar Date: Fri, 22 Jan 2016 01:16:57 +0000 Subject: [PATCH 0056/1132] Do not lower VSETCC if operand is an f16 vector Summary: SETCC with f16 vectors has OperationAction set to Expand but still gets lowered to FCM* intrinsics based on its result type. This patch skips lowering of VSETCC if the operand is an f16 vector. v4 and v8 tests included. Reviewers: ab, jmolloy Subscribers: srhines, llvm-commits Differential Revision: http://reviews.llvm.org/D15361 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258471 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit fb7c14ad339bcda1b2482141880fc36480e69bd7) --- lib/Target/AArch64/AArch64ISelLowering.cpp | 3 + test/CodeGen/AArch64/fp16-v4-instructions.ll | 274 +++++++++++++++++++ test/CodeGen/AArch64/fp16-v8-instructions.ll | 84 ++++++ 3 files changed, 361 insertions(+) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 2a838d63f0f..c8e67ff8305 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6689,6 +6689,9 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); } + if (LHS.getValueType().getVectorElementType() == MVT::f16) + return SDValue(); + assert(LHS.getValueType().getVectorElementType() == MVT::f32 || LHS.getValueType().getVectorElementType() == MVT::f64); diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll index f6e4bdf7345..b892f1902b0 100644 --- a/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -267,4 +267,278 @@ define <4 x i16> @fptoui_i16(<4 x half> %a) #0 { ret <4 x i16> %1 } +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_une: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, ne +; CHECK-DAG: csel {{.*}}, wzr, ne +; CHECK-DAG: csel {{.*}}, wzr, ne +; CHECK-DAG: csel {{.*}}, wzr, ne +define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp une <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_ueq: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, eq +; CHECK-DAG: csel {{.*}}, wzr, eq +; CHECK-DAG: csel {{.*}}, wzr, eq +; CHECK-DAG: csel {{.*}}, wzr, eq +; CHECK-DAG: csel {{.*}}, vs +; CHECK-DAG: csel {{.*}}, vs +; CHECK-DAG: csel {{.*}}, vs +; CHECK-DAG: csel {{.*}}, vs +define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp ueq <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_ugt: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, hi +; CHECK-DAG: csel {{.*}}, wzr, hi +; CHECK-DAG: csel {{.*}}, wzr, hi +; CHECK-DAG: csel {{.*}}, wzr, hi +define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp ugt <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_uge: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, pl +; CHECK-DAG: csel {{.*}}, wzr, pl +; CHECK-DAG: csel {{.*}}, wzr, pl +; CHECK-DAG: csel {{.*}}, wzr, pl +define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp uge <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_ult: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, lt +; CHECK-DAG: csel {{.*}}, wzr, lt +; CHECK-DAG: csel {{.*}}, wzr, lt +; CHECK-DAG: csel {{.*}}, wzr, lt +define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp ult <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_ule: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, le +; CHECK-DAG: csel {{.*}}, wzr, le +; CHECK-DAG: csel {{.*}}, wzr, le +; CHECK-DAG: csel {{.*}}, wzr, le +define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp ule <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_uno: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, vs +; CHECK-DAG: csel {{.*}}, wzr, vs +; CHECK-DAG: csel {{.*}}, wzr, vs +; CHECK-DAG: csel {{.*}}, wzr, vs +define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp uno <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_one: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, mi +; CHECK-DAG: csel {{.*}}, wzr, mi +; CHECK-DAG: csel {{.*}}, wzr, mi +; CHECK-DAG: csel {{.*}}, wzr, mi +; CHECK-DAG: csel {{.*}}, gt +; CHECK-DAG: csel {{.*}}, gt +; CHECK-DAG: csel {{.*}}, gt +; CHECK-DAG: csel {{.*}}, gt +define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp one <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_oeq: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, eq +; CHECK-DAG: csel {{.*}}, wzr, eq +; CHECK-DAG: csel {{.*}}, wzr, eq +; CHECK-DAG: csel {{.*}}, wzr, eq +define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp oeq <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_ogt: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, gt +; CHECK-DAG: csel {{.*}}, wzr, gt +; CHECK-DAG: csel {{.*}}, wzr, gt +; CHECK-DAG: csel {{.*}}, wzr, gt +define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp ogt <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_oge: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, ge +; CHECK-DAG: csel {{.*}}, wzr, ge +; CHECK-DAG: csel {{.*}}, wzr, ge +; CHECK-DAG: csel {{.*}}, wzr, ge +define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp oge <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_olt: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, mi +; CHECK-DAG: csel {{.*}}, wzr, mi +; CHECK-DAG: csel {{.*}}, wzr, mi +; CHECK-DAG: csel {{.*}}, wzr, mi +define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp olt <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_ole: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, ls +; CHECK-DAG: csel {{.*}}, wzr, ls +; CHECK-DAG: csel {{.*}}, wzr, ls +; CHECK-DAG: csel {{.*}}, wzr, ls +define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp ole <4 x half> %a, %b + ret <4 x i1> %1 +} + +; Function Attrs: nounwind readnone +; CHECK-LABEL: test_fcmp_ord: +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: fcvt +; CHECK-DAG: csel {{.*}}, wzr, vc +; CHECK-DAG: csel {{.*}}, wzr, vc +; CHECK-DAG: csel {{.*}}, wzr, vc +; CHECK-DAG: csel {{.*}}, wzr, vc +define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 { + %1 = fcmp ord <4 x half> %a, %b + ret <4 x i1> %1 +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll index 137d1f358a3..2f70f3635d1 100644 --- a/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -421,4 +421,88 @@ define <8 x i16> @fptoui_i16(<8 x half> %a) #0 { ret <8 x i16> %1 } +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_une(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp une <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 16 csel tests. Skipped. +define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp ueq <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_ugt(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp ugt <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_uge(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp uge <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_ult(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp ult <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_ule(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp ule <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp uno <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_one(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp one <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_oeq(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp oeq <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_ogt(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp ogt <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_oge(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp oge <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_olt(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp olt <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_ole(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp ole <8 x half> %a, %b + ret <8 x i1> %1 +} + +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. +define <8 x i1> @test_fcmp_ord(<8 x half> %a, <8 x half> %b) #0 { + %1 = fcmp ord <8 x half> %a, %b + ret <8 x i1> %1 +} + attributes #0 = { nounwind } From 38b17817ab33638a579210d4c91cb6f5d6f1fdef Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 22 Jan 2016 03:57:34 +0000 Subject: [PATCH 0057/1132] [SelectionDAG] Fold more offsets into GlobalAddresses This reapplies r258296 and r258366, and also fixes an existing bug in SelectionDAG.cpp's isMemSrcFromString, neglecting to account for the offset in a GlobalAddressSDNode, which is uncovered by those patches. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258482 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit f2cde91200e362b44c66e1ee42a2a4b1af49e450) --- include/llvm/CodeGen/SelectionDAG.h | 7 + lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 150 ++--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 48 +- test/CodeGen/WebAssembly/address-offsets.ll | 672 ++++++++++++++++++++ test/CodeGen/X86/lea-opt.ll | 18 +- test/CodeGen/X86/memcpy-from-string.ll | 24 + test/CodeGen/X86/negative-offset.ll | 18 + test/CodeGen/XCore/threads.ll | 4 +- 8 files changed, 855 insertions(+), 86 deletions(-) create mode 100644 test/CodeGen/WebAssembly/address-offsets.ll create mode 100644 test/CodeGen/X86/memcpy-from-string.ll create mode 100644 test/CodeGen/X86/negative-offset.ll diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index a21e9ae881a..39fcc4b0498 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -1156,6 +1156,10 @@ class SelectionDAG { /// either of the specified value types. SDValue CreateStackTemporary(EVT VT1, EVT VT2); + SDValue FoldSymbolOffset(unsigned Opcode, EVT VT, + const GlobalAddressSDNode *GA, + const SDNode *N2); + SDValue FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, SDNode *Cst1, SDNode *Cst2); @@ -1267,6 +1271,9 @@ class SelectionDAG { unsigned getEVTAlignment(EVT MemoryVT) const; + /// Test whether the given value is a constant int or similar node. + SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N); + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c741982bc08..98caf5b2c43 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -390,6 +390,9 @@ namespace { /// consecutive chains. bool findBetterNeighborChains(StoreSDNode *St); + /// Match "(X shl/srl V1) & V2" where V2 may not be present. + bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -763,16 +766,6 @@ static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { EltVT.getSizeInBits() >= SplatBitSize); } -// \brief Returns the SDNode if it is a constant integer BuildVector -// or constant integer. -static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { - if (isa(N)) - return N.getNode(); - if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) - return N.getNode(); - return nullptr; -} - // \brief Returns the SDNode if it is a constant float BuildVector // or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { @@ -825,8 +818,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc) { - if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { + if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); @@ -845,8 +838,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, } if (N1.getOpcode() == Opc) { - if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { - if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { + if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); @@ -1657,34 +1650,28 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return N0; if (N1.getOpcode() == ISD::UNDEF) return N1; - // fold (add c1, c2) -> c1+c2 - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); - // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { + // canonicalize constant to RHS + if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + // fold (add c1, c2) -> c1+c2 + return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, + N0.getNode(), N1.getNode()); + } // fold (add x, 0) -> x if (isNullConstant(N1)) return N0; - // fold (add Sym, c) -> Sym+c - if (GlobalAddressSDNode *GA = dyn_cast(N0)) - if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && - GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, - GA->getOffset() + - (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A - if (N1C && N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { - SDLoc DL(N); - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), DL, VT), - N0.getOperand(1)); - } + if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) { + if (N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(N1C->getAPIntValue()+ + N0C->getAPIntValue(), DL, VT), + N0.getOperand(1)); + } + } // reassociate add if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) return RADD; @@ -1879,11 +1866,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); - // fold (sub c1, c2) -> c1-c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + // fold (sub c1, c2) -> c1-c2 + return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, + N0.getNode(), N1.getNode()); + } ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) if (N1C) { SDLoc DL(N); @@ -2047,8 +2037,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getNode(), N1.getNode()); // canonicalize constant to RHS (vector doesn't have to splat) - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) @@ -2125,9 +2115,9 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (isConstantIntBuildVectorOrConstantInt(N1) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && N0.getOpcode() == ISD::ADD && - isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) return DAG.getNode(ISD::ADD, SDLoc(N), VT, DAG.getNode(ISD::MUL, SDLoc(N0), VT, @@ -2698,8 +2688,8 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); return SDValue(); @@ -3045,8 +3035,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (isAllOnesConstant(N1)) @@ -3760,8 +3750,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (isNullConstant(N1)) @@ -3817,9 +3807,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } /// Match "(X shl/srl V1) & V2" where V2 may not be present. -static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { +bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { - if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); Op = Op.getOperand(0); } else { @@ -4106,8 +4096,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (isNullConstant(N1)) @@ -4916,7 +4906,7 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { EVT VT = N->getValueType(0); // fold (bswap c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) @@ -4929,7 +4919,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4939,7 +4929,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctlz_zero_undef c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4949,7 +4939,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4959,7 +4949,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz_zero_undef c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4969,7 +4959,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } @@ -6902,7 +6892,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getUNDEF(VT); // fold (sext_in_reg c1) -> c1 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. @@ -7021,7 +7011,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getValueType() == N->getValueType(0)) return N0; // fold (truncate c1) -> c1 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) @@ -8868,7 +8858,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (isConstantIntBuildVectorOrConstantInt(N0) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -8922,7 +8912,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (isConstantIntBuildVectorOrConstantInt(N0) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -10940,9 +10930,23 @@ struct BaseIndexOffset { } /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr) { + static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) { bool IsIndexSignExt = false; + // Split up a folded GlobalAddress+Offset into its component parts. + if (GlobalAddressSDNode *GA = dyn_cast(Ptr)) + if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { + return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), + SDLoc(GA), + GA->getValueType(0), + /*Offset=*/0, + /*isTargetGA=*/false, + GA->getTargetFlags()), + SDValue(), + GA->getOffset(), + IsIndexSignExt); + } + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD // instruction, then it could be just the BASE or everything else we don't // know how to handle. Just use Ptr as BASE and give up. @@ -11063,7 +11067,7 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, // multiply (CONST * A) after we also do the same transformation // to the "t2" instruction. if (OtherOp->getOpcode() == ISD::ADD && - isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && + DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && OtherOp->getOperand(0).getNode() == MulVar) return true; } @@ -11215,7 +11219,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( SmallVectorImpl &AliasLoadNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.Base.getNode()) @@ -11253,7 +11257,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( if (OtherST->getMemoryVT() != MemVT) continue; - BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG); if (Ptr.equalBaseIndex(BasePtr)) StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); @@ -11269,7 +11273,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!Ptr.equalBaseIndex(BasePtr)) @@ -11557,7 +11561,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Ld->getMemoryVT() != MemVT) break; - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); // If this is not the first ptr that we check. if (LdBasePtr.Base.getNode()) { // The base ptr must be the same. @@ -14716,7 +14720,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.Base.getNode()) @@ -14742,7 +14746,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!Ptr.equalBaseIndex(BasePtr)) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 893871f9448..d4af722e7e4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3263,6 +3263,26 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, return getConstant(Folded.first, DL, VT); } +SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, + const GlobalAddressSDNode *GA, + const SDNode *N2) { + if (GA->getOpcode() != ISD::GlobalAddress) + return SDValue(); + if (!TLI->isOffsetFoldingLegal(GA)) + return SDValue(); + const ConstantSDNode *Cst2 = dyn_cast(N2); + if (!Cst2) + return SDValue(); + int64_t Offset = Cst2->getSExtValue(); + switch (Opcode) { + case ISD::ADD: break; + case ISD::SUB: Offset = -uint64_t(Offset); break; + default: return SDValue(); + } + return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT, + GA->getOffset() + uint64_t(Offset)); +} + SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, SDNode *Cst1, SDNode *Cst2) { // If the opcode is a target-specific ISD node, there's nothing we can @@ -3289,6 +3309,13 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, } } + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast(Cst1)) + return FoldSymbolOffset(Opcode, VT, GA, Cst2); + if (isCommutativeBinOp(Opcode)) + if (GlobalAddressSDNode *GA = dyn_cast(Cst2)) + return FoldSymbolOffset(Opcode, VT, GA, Cst1); + // For vectors extract each constant element into Inputs so we can constant // fold them individually. BuildVectorSDNode *BV1 = dyn_cast(Cst1); @@ -4136,7 +4163,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, /// isMemSrcFromString - Returns true if memcpy source is a string constant. /// static bool isMemSrcFromString(SDValue Src, StringRef &Str) { - unsigned SrcDelta = 0; + uint64_t SrcDelta = 0; GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast(Src); @@ -4149,7 +4176,8 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) { if (!G) return false; - return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false); + return getConstantStringInfo(G->getGlobal(), Str, + SrcDelta + G->getOffset(), false); } /// Determines the optimal series of memory ops to replace the memset / memcpy. @@ -7322,6 +7350,22 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } +// \brief Returns the SDNode if it is a constant integer BuildVector +// or constant integer. +SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { + if (isa(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) + return N.getNode(); + // Treat a GlobalAddress supporting constant offset folding as a + // constant integer. + if (GlobalAddressSDNode *GA = dyn_cast(N)) + if (GA->getOpcode() == ISD::GlobalAddress && + TLI->isOffsetFoldingLegal(GA)) + return GA; + return nullptr; +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl &Visited, diff --git a/test/CodeGen/WebAssembly/address-offsets.ll b/test/CodeGen/WebAssembly/address-offsets.ll new file mode 100644 index 00000000000..46d16d2b926 --- /dev/null +++ b/test/CodeGen/WebAssembly/address-offsets.ll @@ -0,0 +1,672 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test folding constant offsets and symbols into load and store addresses under +; a variety of circumstances. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +@g = external global [0 x i32], align 4 + +; CHECK-LABEL: load_test0: +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 0{{$}} +; CHECK-NEXT: i32.load $push1=, g+40($pop0){{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @load_test0() { + %t = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @g, i32 0, i32 10), align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test0_noinbounds: +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 0{{$}} +; CHECK-NEXT: i32.load $push1=, g+40($pop0){{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @load_test0_noinbounds() { + %t = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @g, i32 0, i32 10), align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test1: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push2=, g+40($pop1){{$}} +; CHECK-NEXT: return $pop2{{$}} +define i32 @load_test1(i32 %n) { + %add = add nsw i32 %n, 10 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %add + %t = load i32, i32* %arrayidx, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test2: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push2=, g+40($pop1){{$}} +; CHECK-NEXT: return $pop2{{$}} +define i32 @load_test2(i32 %n) { + %add = add nsw i32 10, %n + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %add + %t = load i32, i32* %arrayidx, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test3: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push2=, g+40($pop1){{$}} +; CHECK-NEXT: return $pop2{{$}} +define i32 @load_test3(i32 %n) { + %add.ptr = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %n + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10 + %t = load i32, i32* %add.ptr1, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test4: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push2=, g+40($pop1){{$}} +; CHECK-NEXT: return $pop2{{$}} +define i32 @load_test4(i32 %n) { + %add.ptr = getelementptr inbounds i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @g, i32 0, i32 10), i32 %n + %t = load i32, i32* %add.ptr, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test5: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push2=, g+40($pop1){{$}} +; CHECK-NEXT: return $pop2{{$}} +define i32 @load_test5(i32 %n) { + %add.ptr = getelementptr inbounds i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @g, i32 0, i32 10), i32 %n + %t = load i32, i32* %add.ptr, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test6: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push2=, g+40($pop1){{$}} +; CHECK-NEXT: return $pop2{{$}} +define i32 @load_test6(i32 %n) { + %add = add nsw i32 %n, 10 + %add.ptr = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %add + %t = load i32, i32* %add.ptr, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test7: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push2=, g+40($pop1){{$}} +; CHECK-NEXT: return $pop2{{$}} +define i32 @load_test7(i32 %n) { + %add.ptr = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %n + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10 + %t = load i32, i32* %add.ptr1, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test8: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push2=, g+40($pop1){{$}} +; CHECK-NEXT: return $pop2{{$}} +define i32 @load_test8(i32 %n) { + %add = add nsw i32 10, %n + %add.ptr = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %add + %t = load i32, i32* %add.ptr, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test9: +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 0{{$}} +; CHECK-NEXT: i32.load $push1=, g-40($pop0){{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @load_test9() { + %t = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @g, i32 0, i32 1073741814), align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test10: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.const $push2=, g-40{{$}} +; CHECK-NEXT: i32.add $push3=, $pop1, $pop2{{$}} +; CHECK-NEXT: i32.load $push4=, 0($pop3){{$}} +; CHECK-NEXT: return $pop4{{$}} +define i32 @load_test10(i32 %n) { + %add = add nsw i32 %n, -10 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %add + %t = load i32, i32* %arrayidx, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test11: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.load $push0=, 40($0){{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @load_test11(i32* %p) { + %arrayidx = getelementptr inbounds i32, i32* %p, i32 10 + %t = load i32, i32* %arrayidx, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test11_noinbounds: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 40{{$}} +; CHECK-NEXT: i32.add $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push2=, 0($pop1){{$}} +; CHECK-NEXT: return $pop2{{$}} +define i32 @load_test11_noinbounds(i32* %p) { + %arrayidx = getelementptr i32, i32* %p, i32 10 + %t = load i32, i32* %arrayidx, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test12: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} +; CHECK-NEXT: return $pop5{{$}} +define i32 @load_test12(i32* %p, i32 %n) { + %add = add nsw i32 %n, 10 + %arrayidx = getelementptr inbounds i32, i32* %p, i32 %add + %t = load i32, i32* %arrayidx, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test13: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} +; CHECK-NEXT: return $pop5{{$}} +define i32 @load_test13(i32* %p, i32 %n) { + %add = add nsw i32 10, %n + %arrayidx = getelementptr inbounds i32, i32* %p, i32 %add + %t = load i32, i32* %arrayidx, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test14: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.load $push3=, 40($pop2){{$}} +; CHECK-NEXT: return $pop3{{$}} +define i32 @load_test14(i32* %p, i32 %n) { + %add.ptr = getelementptr inbounds i32, i32* %p, i32 %n + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10 + %t = load i32, i32* %add.ptr1, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test15: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} +; CHECK-NEXT: return $pop5{{$}} +define i32 @load_test15(i32* %p, i32 %n) { + %add.ptr = getelementptr inbounds i32, i32* %p, i32 10 + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %n + %t = load i32, i32* %add.ptr1, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test16: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} +; CHECK-NEXT: return $pop5{{$}} +define i32 @load_test16(i32* %p, i32 %n) { + %add.ptr = getelementptr inbounds i32, i32* %p, i32 10 + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %n + %t = load i32, i32* %add.ptr1, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test17: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} +; CHECK-NEXT: return $pop5{{$}} +define i32 @load_test17(i32* %p, i32 %n) { + %add = add nsw i32 %n, 10 + %add.ptr = getelementptr inbounds i32, i32* %p, i32 %add + %t = load i32, i32* %add.ptr, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test18: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.load $push3=, 40($pop2){{$}} +; CHECK-NEXT: return $pop3{{$}} +define i32 @load_test18(i32* %p, i32 %n) { + %add.ptr = getelementptr inbounds i32, i32* %p, i32 %n + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10 + %t = load i32, i32* %add.ptr1, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test19: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} +; CHECK-NEXT: return $pop5{{$}} +define i32 @load_test19(i32* %p, i32 %n) { + %add = add nsw i32 10, %n + %add.ptr = getelementptr inbounds i32, i32* %p, i32 %add + %t = load i32, i32* %add.ptr, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test20: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, -40{{$}} +; CHECK-NEXT: i32.add $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.load $push2=, 0($pop1){{$}} +; CHECK-NEXT: return $pop2{{$}} +define i32 @load_test20(i32* %p) { + %arrayidx = getelementptr inbounds i32, i32* %p, i32 -10 + %t = load i32, i32* %arrayidx, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_test21: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: result i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} +; CHECK-NEXT: i32.const $push3=, -40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} +; CHECK-NEXT: return $pop5{{$}} +define i32 @load_test21(i32* %p, i32 %n) { + %add = add nsw i32 %n, -10 + %arrayidx = getelementptr inbounds i32, i32* %p, i32 %add + %t = load i32, i32* %arrayidx, align 4 + ret i32 %t +} + +; CHECK-LABEL: store_test0: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: i32.const $push0=, 0{{$}} +; CHECK-NEXT: i32.store $discard=, g+40($pop0), $0{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test0(i32 %i) { + store i32 %i, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @g, i32 0, i32 10), align 4 + ret void +} + +; CHECK-LABEL: store_test0_noinbounds: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: i32.const $push0=, 0{{$}} +; CHECK-NEXT: i32.store $discard=, g+40($pop0), $0{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test0_noinbounds(i32 %i) { + store i32 %i, i32* getelementptr ([0 x i32], [0 x i32]* @g, i32 0, i32 10), align 4 + ret void +} + +; CHECK-LABEL: store_test1: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, g+40($pop1), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test1(i32 %n, i32 %i) { + %add = add nsw i32 %n, 10 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %add + store i32 %i, i32* %arrayidx, align 4 + ret void +} + +; CHECK-LABEL: store_test2: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, g+40($pop1), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test2(i32 %n, i32 %i) { + %add = add nsw i32 10, %n + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %add + store i32 %i, i32* %arrayidx, align 4 + ret void +} + +; CHECK-LABEL: store_test3: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, g+40($pop1), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test3(i32 %n, i32 %i) { + %add.ptr = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %n + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10 + store i32 %i, i32* %add.ptr1, align 4 + ret void +} + +; CHECK-LABEL: store_test4: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, g+40($pop1), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test4(i32 %n, i32 %i) { + %add.ptr = getelementptr inbounds i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @g, i32 0, i32 10), i32 %n + store i32 %i, i32* %add.ptr, align 4 + ret void +} + +; CHECK-LABEL: store_test5: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, g+40($pop1), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test5(i32 %n, i32 %i) { + %add.ptr = getelementptr inbounds i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @g, i32 0, i32 10), i32 %n + store i32 %i, i32* %add.ptr, align 4 + ret void +} + +; CHECK-LABEL: store_test6: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, g+40($pop1), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test6(i32 %n, i32 %i) { + %add = add nsw i32 %n, 10 + %add.ptr = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %add + store i32 %i, i32* %add.ptr, align 4 + ret void +} + +; CHECK-LABEL: store_test7: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, g+40($pop1), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test7(i32 %n, i32 %i) { + %add.ptr = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %n + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10 + store i32 %i, i32* %add.ptr1, align 4 + ret void +} + +; CHECK-LABEL: store_test8: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, g+40($pop1), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test8(i32 %n, i32 %i) { + %add = add nsw i32 10, %n + %add.ptr = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %add + store i32 %i, i32* %add.ptr, align 4 + ret void +} + +; CHECK-LABEL: store_test9: +; CHECK-NEXT: param i32{{$}} +; CHECK-NEXT: i32.const $push0=, 0{{$}} +; CHECK-NEXT: i32.store $discard=, g-40($pop0), $0{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test9(i32 %i) { + store i32 %i, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @g, i32 0, i32 1073741814), align 4 + ret void +} + +; CHECK-LABEL: store_test10: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.const $push2=, g-40{{$}} +; CHECK-NEXT: i32.add $push3=, $pop1, $pop2{{$}} +; CHECK-NEXT: i32.store $discard=, 0($pop3), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test10(i32 %n, i32 %i) { + %add = add nsw i32 %n, -10 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i32 0, i32 %add + store i32 %i, i32* %arrayidx, align 4 + ret void +} + +; CHECK-LABEL: store_test11: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.store $discard=, 40($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test11(i32* %p, i32 %i) { + %arrayidx = getelementptr inbounds i32, i32* %p, i32 10 + store i32 %i, i32* %arrayidx, align 4 + ret void +} + +; CHECK-LABEL: store_test11_noinbounds: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 40{{$}} +; CHECK-NEXT: i32.add $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, 0($pop1), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test11_noinbounds(i32* %p, i32 %i) { + %arrayidx = getelementptr i32, i32* %p, i32 10 + store i32 %i, i32* %arrayidx, align 4 + ret void +} + +; CHECK-LABEL: store_test12: +; CHECK-NEXT: param i32, i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.store $discard=, 0($pop4), $2{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test12(i32* %p, i32 %n, i32 %i) { + %add = add nsw i32 %n, 10 + %arrayidx = getelementptr inbounds i32, i32* %p, i32 %add + store i32 %i, i32* %arrayidx, align 4 + ret void +} + +; CHECK-LABEL: store_test13: +; CHECK-NEXT: param i32, i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.store $discard=, 0($pop4), $2{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test13(i32* %p, i32 %n, i32 %i) { + %add = add nsw i32 10, %n + %arrayidx = getelementptr inbounds i32, i32* %p, i32 %add + store i32 %i, i32* %arrayidx, align 4 + ret void +} + +; CHECK-LABEL: store_test14: +; CHECK-NEXT: param i32, i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.store $discard=, 40($pop2), $2{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test14(i32* %p, i32 %n, i32 %i) { + %add.ptr = getelementptr inbounds i32, i32* %p, i32 %n + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10 + store i32 %i, i32* %add.ptr1, align 4 + ret void +} + +; CHECK-LABEL: store_test15: +; CHECK-NEXT: param i32, i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.store $discard=, 0($pop4), $2{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test15(i32* %p, i32 %n, i32 %i) { + %add.ptr = getelementptr inbounds i32, i32* %p, i32 10 + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %n + store i32 %i, i32* %add.ptr1, align 4 + ret void +} + +; CHECK-LABEL: store_test16: +; CHECK-NEXT: param i32, i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.store $discard=, 0($pop4), $2{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test16(i32* %p, i32 %n, i32 %i) { + %add.ptr = getelementptr inbounds i32, i32* %p, i32 10 + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %n + store i32 %i, i32* %add.ptr1, align 4 + ret void +} + +; CHECK-LABEL: store_test17: +; CHECK-NEXT: param i32, i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.store $discard=, 0($pop4), $2{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test17(i32* %p, i32 %n, i32 %i) { + %add = add nsw i32 %n, 10 + %add.ptr = getelementptr inbounds i32, i32* %p, i32 %add + store i32 %i, i32* %add.ptr, align 4 + ret void +} + +; CHECK-LABEL: store_test18: +; CHECK-NEXT: param i32, i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.store $discard=, 40($pop2), $2{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test18(i32* %p, i32 %n, i32 %i) { + %add.ptr = getelementptr inbounds i32, i32* %p, i32 %n + %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10 + store i32 %i, i32* %add.ptr1, align 4 + ret void +} + +; CHECK-LABEL: store_test19: +; CHECK-NEXT: param i32, i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} +; CHECK-NEXT: i32.const $push3=, 40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.store $discard=, 0($pop4), $2{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test19(i32* %p, i32 %n, i32 %i) { + %add = add nsw i32 10, %n + %add.ptr = getelementptr inbounds i32, i32* %p, i32 %add + store i32 %i, i32* %add.ptr, align 4 + ret void +} + +; CHECK-LABEL: store_test20: +; CHECK-NEXT: param i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, -40{{$}} +; CHECK-NEXT: i32.add $push1=, $0, $pop0{{$}} +; CHECK-NEXT: i32.store $discard=, 0($pop1), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test20(i32* %p, i32 %i) { + %arrayidx = getelementptr inbounds i32, i32* %p, i32 -10 + store i32 %i, i32* %arrayidx, align 4 + ret void +} + +; CHECK-LABEL: store_test21: +; CHECK-NEXT: param i32, i32, i32{{$}} +; CHECK-NEXT: i32.const $push0=, 2{{$}} +; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} +; CHECK-NEXT: i32.const $push3=, -40{{$}} +; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} +; CHECK-NEXT: i32.store $discard=, 0($pop4), $2{{$}} +; CHECK-NEXT: return{{$}} +define void @store_test21(i32* %p, i32 %n, i32 %i) { + %add = add nsw i32 %n, -10 + %arrayidx = getelementptr inbounds i32, i32* %p, i32 %add + store i32 %i, i32* %arrayidx, align 4 + ret void +} diff --git a/test/CodeGen/X86/lea-opt.ll b/test/CodeGen/X86/lea-opt.ll index 8096bfabd6c..20e27773195 100644 --- a/test/CodeGen/X86/lea-opt.ll +++ b/test/CodeGen/X86/lea-opt.ll @@ -34,12 +34,12 @@ sw.bb.2: ; preds = %entry sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry ret void ; CHECK-LABEL: test1: -; CHECK: leaq (%rdi,%rdi,2), [[REG1:%[a-z]+]] -; CHECK: movl arr1(,[[REG1]],4), {{.*}} -; CHECK: leaq arr1+4(,[[REG1]],4), [[REG2:%[a-z]+]] -; CHECK: subl arr1+4(,[[REG1]],4), {{.*}} -; CHECK: leaq arr1+8(,[[REG1]],4), [[REG3:%[a-z]+]] -; CHECK: addl arr1+8(,[[REG1]],4), {{.*}} +; CHECK: shlq $2, [[REG1:%[a-z]+]] +; CHECK: movl arr1([[REG1]],[[REG1]],2), {{.*}} +; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]] +; CHECK: subl arr1+4([[REG1]],[[REG1]],2), {{.*}} +; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]] +; CHECK: addl arr1+8([[REG1]],[[REG1]],2), {{.*}} ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) ; CHECK: movl ${{[1-4]+}}, ([[REG3]]) ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) @@ -74,11 +74,11 @@ sw.bb.2: ; preds = %entry sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry ret void ; CHECK-LABEL: test2: -; CHECK: leaq (%rdi,%rdi,2), [[REG1:%[a-z]+]] -; CHECK: leaq arr1+4(,[[REG1]],4), [[REG2:%[a-z]+]] +; CHECK: shlq $2, [[REG1:%[a-z]+]] +; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]] ; CHECK: movl -4([[REG2]]), {{.*}} ; CHECK: subl ([[REG2]]), {{.*}} -; CHECK: leaq arr1+8(,[[REG1]],4), [[REG3:%[a-z]+]] +; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]] ; CHECK: addl ([[REG3]]), {{.*}} ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) ; CHECK: movl ${{[1-4]+}}, ([[REG3]]) diff --git a/test/CodeGen/X86/memcpy-from-string.ll b/test/CodeGen/X86/memcpy-from-string.ll new file mode 100644 index 00000000000..d62d9e20254 --- /dev/null +++ b/test/CodeGen/X86/memcpy-from-string.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%0 = type { %1, i64, %2 } +%1 = type { i8* } +%2 = type { i64, [8 x i8] } + +@0 = internal constant [10 x i8] c"asdf jkl;\00", align 1 + +; Memcpy lowering should emit stores of immediates containing string data from +; the correct offsets. + +; CHECK-LABEL: foo: +; CHECK: movb $0, 6(%rdi) +; CHECK: movw $15212, 4(%rdi) +; CHECK: movl $1802117222, (%rdi) +define void @foo(i8* %tmp2) { + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @0, i64 0, i64 3), i64 7, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) diff --git a/test/CodeGen/X86/negative-offset.ll b/test/CodeGen/X86/negative-offset.ll new file mode 100644 index 00000000000..dc1b255d020 --- /dev/null +++ b/test/CodeGen/X86/negative-offset.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Test that a constant consisting of a global symbol with a negative offset +; is properly folded and isel'd. + +; CHECK-LABEL: negative_offset: +; CHECK: movl $G, %eax +; CHECK: notq %rax +; CHECK: addq %rdi, %rax +; CHECK: retq +@G = external global [8 x i32] +define i8* @negative_offset(i8* %a) { + %t = getelementptr i8, i8* %a, i64 sub (i64 -1, i64 ptrtoint ([8 x i32]* @G to i64)) + ret i8* %t +} diff --git a/test/CodeGen/XCore/threads.ll b/test/CodeGen/XCore/threads.ll index 0c25314295d..30dda143e08 100644 --- a/test/CodeGen/XCore/threads.ll +++ b/test/CodeGen/XCore/threads.ll @@ -87,7 +87,7 @@ define i32* @f_tle() { ; CHECK: shl [[R0:r[0-9]]], r11, 3 ; CHECK: ldaw [[R1:r[0-9]]], dp[tle] ; r0 = &tl + id*8 -; CHECK: add r0, [[R1]], [[R0]] +; CHECK: add r0, [[R0]], [[R1]] ret i32* getelementptr inbounds ([2 x i32], [2 x i32]* @tle, i32 0, i32 0) } @@ -96,7 +96,7 @@ define i32 @f_tlExpr () { ; CHECK: get r11, id ; CHECK: shl [[R0:r[0-9]]], r11, 3 ; CHECK: ldaw [[R1:r[0-9]]], dp[tle] -; CHECK: add [[R2:r[0-9]]], [[R1]], [[R0]] +; CHECK: add [[R2:r[0-9]]], [[R0]], [[R1]] ; CHECK: add r0, [[R2]], [[R2]] ret i32 add( i32 ptrtoint( i32* getelementptr inbounds ([2 x i32], [2 x i32]* @tle, i32 0, i32 0) to i32), From 747b0152da2aea251b72573dbcc314beae3045ce Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Fri, 22 Jan 2016 18:47:14 +0000 Subject: [PATCH 0058/1132] =?UTF-8?q?Fix=20MachOObjectFile::getSymbolName(?= =?UTF-8?q?)=20to=20not=20call=C2=A0report=5Ffatal=5Ferror()=20but=20to=20?= =?UTF-8?q?return=C2=A0object=5Ferror::parse=5Ffailed.=20=C2=A0Then=20made?= =?UTF-8?q?=20the=20code=20in=20llvm-nm=20do=20for=20Mach-O=20files=20what?= =?UTF-8?q?=20is=20done=20in=20the=20darwin=20native=20tools=20which=20is?= =?UTF-8?q?=20to=20print=20"bad=20string=20index"=20for=20bad=20string=20i?= =?UTF-8?q?ndexes.=20=20Updated=20the=20error=20message=20in=20the=20llvm-?= =?UTF-8?q?objdump=20test,=20and=20added=20tests=20to=20show=20llvm-nm=20p?= =?UTF-8?q?rints=20"bad=20string=20index"=20and=20a=20test=20to=20print=20?= =?UTF-8?q?the=20actual=20bad=20string=20index=20value=20which=20in=20this?= =?UTF-8?q?=20case=20is=200xfe000002=20when=20printing=20the=20fields=20as?= =?UTF-8?q?=20raw=20hex.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258520 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 161c62450c4a9bc4b9f6550f32541da739a75df9) --- lib/Object/MachOObjectFile.cpp | 3 +-- test/Object/macho-invalid.test | 8 +++++++- tools/llvm-nm/llvm-nm.cpp | 7 +++++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 4ba6523a528..3fb8e537fad 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -332,8 +332,7 @@ ErrorOr MachOObjectFile::getSymbolName(DataRefImpl Symb) const { MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb); const char *Start = &StringTable.data()[Entry.n_strx]; if (Start < getData().begin() || Start >= getData().end()) - report_fatal_error( - "Symbol name entry points before beginning or past end of file."); + return object_error::parse_failed; return StringRef(Start); } diff --git a/test/Object/macho-invalid.test b/test/Object/macho-invalid.test index 686e516f5e9..0cf264f8771 100644 --- a/test/Object/macho-invalid.test +++ b/test/Object/macho-invalid.test @@ -29,7 +29,13 @@ BAD-SYMBOL: Requested symbol index is out of range RUN: not llvm-objdump -t %p/Inputs/macho-invalid-symbol-name-past-eof 2>&1 \ RUN: | FileCheck -check-prefix NAME-PAST-EOF %s -NAME-PAST-EOF: Symbol name entry points before beginning or past end of file +NAME-PAST-EOF: error reading file: Invalid data was encountered while parsing the file. +RUN: llvm-nm -pa %p/Inputs/macho-invalid-symbol-name-past-eof 2>&1 \ +RUN: | FileCheck -check-prefix NAME-PAST-EOF-nm-pa %s +NAME-PAST-EOF-nm-pa: 0000000000000000 - 00 0000 SO bad string index +RUN: llvm-nm -pax %p/Inputs/macho-invalid-symbol-name-past-eof 2>&1 \ +RUN: | FileCheck -check-prefix NAME-PAST-EOF-nm-pax %s +NAME-PAST-EOF-nm-pax: 0000000000000000 64 00 0000 fe000002 bad string index RUN: llvm-nm %p/Inputs/macho-invalid-section-index-getSectionRawName 2>&1 \ RUN: | FileCheck -check-prefix INVALID-SECTION-IDX-SYMBOL-SEC %s diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp index b70a79c334c..20f080986da 100644 --- a/tools/llvm-nm/llvm-nm.cpp +++ b/tools/llvm-nm/llvm-nm.cpp @@ -960,8 +960,11 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, S.Address = *AddressOrErr; } S.TypeChar = getNMTypeChar(Obj, Sym); - if (error(Sym.printName(OS))) - break; + std::error_code EC = Sym.printName(OS); + if (EC && MachO) + OS << "bad string index"; + else + error(EC); OS << '\0'; S.Sym = Sym; SymbolList.push_back(S); From 341c61917f191655d2fa85c4948be0b8ae024c7a Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 22 Jan 2016 19:43:43 +0000 Subject: [PATCH 0059/1132] [AArch64] Assert that CCMP isel didn't fail inconsistently. We verify that the op tree is eligible for CCMP emission in isConjunctionDisjunctionTree, but it's also possible that emitConjunctionDisjunctionTree fails later. The initial check is useful, as it avoids building nodes that will get discarded. Still, make sure that inconsistencies don't happen with an assert. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258532 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 515609118c056e06a0fdd234ce8cc2d3315cb2a3) --- lib/Target/AArch64/AArch64ISelLowering.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index c8e67ff8305..98cbbc01a57 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1418,11 +1418,13 @@ static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode RHSCC; SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, PushNegate, CCOp, Predicate, Depth+1); + assert(CmpR && "Transform legality should have been checked already!"); if (NegateOperands && !PushNegate) RHSCC = AArch64CC::getInvertedCondCode(RHSCC); // Emit LHS. We must push the negate through if we need to negate it. SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, OutCC, NegateOperands, CmpR, RHSCC, Depth+1); + assert(CmpL && "Transform legality should have been checked already!"); // If we transformed an OR to and AND then we have to negate the result // (or absorb a PushNegate resulting in a double negation). if (Opcode == ISD::OR && !PushNegate) From 52bf0d75639ca1a9c76088737deea7f79ab1e876 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 22 Jan 2016 19:43:54 +0000 Subject: [PATCH 0060/1132] [AArch64] Lower 2-CC FCCMPs (one/ueq) using AND'ed CCs. The current behavior is incorrect, as the two CCs returned by changeFPCCToAArch64CC, intended to be OR'ed, are instead used in an AND ccmp chain. Consider: define i32 @t(float %a, float %b, float %c, float %d, i32 %e, i32 %f) { %cc1 = fcmp one float %a, %b %cc2 = fcmp olt float %c, %d %and = and i1 %cc1, %cc2 %r = select i1 %and, i32 %e, i32 %f ret i32 %r } Assuming (%a < %b) and (%c < %d); we used to do: fcmp s0, s1 # nzcv <- 1000 orr w8, wzr, #0x1 # w8 <- 1 csel w9, w8, wzr, mi # w9 <- 1 csel w8, w8, w9, gt # w8 <- 1 fcmp s2, s3 # nzcv <- 1000 cset w9, mi # w9 <- 1 tst w8, w9 # (w8 & w9) == 1, so: nzcv <- 0000 csel w0, w0, w1, ne # w0 <- w0 We now do: fcmp s2, s3 # nzcv <- 1000 fccmp s0, s1, #0, mi # mi, so: nzcv <- 1000 fccmp s0, s1, #8, le # !le, so: nzcv <- 1000 csel w0, w0, w1, pl # !pl, so: w0 <- w1 In other words, we transformed: (c < d) && ((a < b) || (a > b)) into: (c < d) && (a u>= b) && (a u<= b) whereas, per De Morgan's, we wanted: (c < d) && !((a u>= b) && (a u<= b)) Note that this problem doesn't occur in the test-suite. changeFPCCToAArch64CC produces disjunct CCs; here, one -> mi/gt. We can't represent that in the fccmp chain; it can't express arbitrary OR sequences, as one comment explains: In general we can create code for arbitrary "... (and (and A B) C)" sequences. We can also implement some "or" expressions, because "(or A B)" is equivalent to "not (and (not A) (not B))" and we can implement some negation operations. [...] However there is no way to negate the result of a partial sequence. Instead, introduce changeFPCCToANDAArch64CC, which produces the conjunct cond codes: - (a one b) == ((a olt b) || (a ogt b)) == ((a ord b) && (a une b)) - (a ueq b) == ((a uno b) || (a oeq b)) == ((a ule b) && (a uge b)) Note that, at first, one might think that, when PushNegate is true, we should use the disjunct CCs, in effect doing: (a || b) = !(!a && !(b)) = !(!a && !(b1 || b2)) <- changeFPCCToAArch64CC(b, b1, b2) = !(!a && !b1 && !b2) However, we can take advantage of the fact that the CC is already negated, which lets us avoid special-casing PushNegate and doing the simpler to reason about: (a || b) = !(!a && (!b)) = !(!a && (b1 && b2)) <- changeFPCCToANDAArch64CC(!b, b1, b2) = !(!a && b1 && b2) This makes both emitConditionalCompare cases behave identically, and produces correct ccmp sequences for the 2-CC fcmps. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258533 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit becd93fec06fa6c616d4ca759432d32ec9b73e8b) --- lib/Target/AArch64/AArch64ISelLowering.cpp | 44 ++++- test/CodeGen/AArch64/arm64-ccmp.ll | 178 ++++++++++++++++++--- 2 files changed, 196 insertions(+), 26 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 98cbbc01a57..49acffcc8b9 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1135,6 +1135,35 @@ static void changeFPCCToAArch64CC(ISD::CondCode CC, } } +/// Convert a DAG fp condition code to an AArch64 CC. +/// This differs from changeFPCCToAArch64CC in that it returns cond codes that +/// should be AND'ed instead of OR'ed. +static void changeFPCCToANDAArch64CC(ISD::CondCode CC, + AArch64CC::CondCode &CondCode, + AArch64CC::CondCode &CondCode2) { + CondCode2 = AArch64CC::AL; + switch (CC) { + default: + changeFPCCToAArch64CC(CC, CondCode, CondCode2); + assert(CondCode2 == AArch64CC::AL); + break; + case ISD::SETONE: + // (a one b) + // == ((a olt b) || (a ogt b)) + // == ((a ord b) && (a une b)) + CondCode = AArch64CC::VC; + CondCode2 = AArch64CC::NE; + break; + case ISD::SETUEQ: + // (a ueq b) + // == ((a uno b) || (a oeq b)) + // == ((a ule b) && (a uge b)) + CondCode = AArch64CC::PL; + CondCode2 = AArch64CC::LE; + break; + } +} + /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 /// CC usable with the vector instructions. Fewer operations are available /// without a real NZCV register, so we have to use less efficient combinations @@ -1344,24 +1373,23 @@ static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, } else { assert(LHS.getValueType().isFloatingPoint()); AArch64CC::CondCode ExtraCC; - changeFPCCToAArch64CC(CC, OutCC, ExtraCC); - // Surpisingly some floating point conditions can't be tested with a - // single condition code. Construct an additional comparison in this case. - // See comment below on how we deal with OR conditions. + changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC); + // Some floating point conditions can't be tested with a single condition + // code. Construct an additional comparison in this case. if (ExtraCC != AArch64CC::AL) { SDValue ExtraCmp; if (!CCOp.getNode()) ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); else { SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC); - // Note that we want the inverse of ExtraCC, so NZCV is not inversed. - unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC); + AArch64CC::CondCode InvExtraCC = + AArch64CC::getInvertedCondCode(ExtraCC); + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvExtraCC); ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL, DAG); } CCOp = ExtraCmp; - Predicate = AArch64CC::getInvertedCondCode(ExtraCC); - OutCC = AArch64CC::getInvertedCondCode(OutCC); + Predicate = ExtraCC; } } diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll index 72d3b833116..28317261814 100644 --- a/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/test/CodeGen/AArch64/arm64-ccmp.ll @@ -317,24 +317,6 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { ret i64 %sel } -; CHECK-LABEL: select_complicated -define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) { -; CHECK: ldr [[REG:d[0-9]+]], -; CHECK: fcmp d0, d2 -; CHECK-NEXT: fmov d2, #13.00000000 -; CHECK-NEXT: fccmp d1, d2, #4, ne -; CHECK-NEXT: fccmp d0, d1, #1, ne -; CHECK-NEXT: fccmp d0, d1, #4, vc -; CEHCK-NEXT: csel w0, w0, w1, eq - %1 = fcmp one double %v1, %v2 - %2 = fcmp oeq double %v2, 13.0 - %3 = fcmp oeq double %v1, 42.0 - %or0 = or i1 %2, %3 - %or1 = or i1 %1, %or0 - %sel = select i1 %or1, i16 %a, i16 %b - ret i16 %sel -} - ; CHECK-LABEL: gccbug define i64 @gccbug(i64 %x0, i64 %x1) { ; CHECK: cmp x0, #2 @@ -443,3 +425,163 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) { store volatile i32 %ext, i32* @g ret i64 %sel } + +; Test the IR CCs that expand to two cond codes. + +; CHECK-LABEL: _select_and_olt_one: +; CHECK-LABEL: ; BB#0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: fccmp d2, d3, #4, mi +; CHECK-NEXT: fccmp d2, d3, #1, ne +; CHECK-NEXT: csel w0, w0, w1, vc +; CHECK-NEXT: ret +define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { + %c0 = fcmp olt double %v0, %v1 + %c1 = fcmp one double %v2, %v3 + %cr = and i1 %c1, %c0 + %sel = select i1 %cr, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: _select_and_one_olt: +; CHECK-LABEL: ; BB#0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: fccmp d0, d1, #1, ne +; CHECK-NEXT: fccmp d2, d3, #0, vc +; CHECK-NEXT: csel w0, w0, w1, mi +; CHECK-NEXT: ret +define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { + %c0 = fcmp one double %v0, %v1 + %c1 = fcmp olt double %v2, %v3 + %cr = and i1 %c1, %c0 + %sel = select i1 %cr, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: _select_and_olt_ueq: +; CHECK-LABEL: ; BB#0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: fccmp d2, d3, #0, mi +; CHECK-NEXT: fccmp d2, d3, #8, le +; CHECK-NEXT: csel w0, w0, w1, pl +; CHECK-NEXT: ret +define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { + %c0 = fcmp olt double %v0, %v1 + %c1 = fcmp ueq double %v2, %v3 + %cr = and i1 %c1, %c0 + %sel = select i1 %cr, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: _select_and_ueq_olt: +; CHECK-LABEL: ; BB#0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: fccmp d0, d1, #8, le +; CHECK-NEXT: fccmp d2, d3, #0, pl +; CHECK-NEXT: csel w0, w0, w1, mi +; CHECK-NEXT: ret +define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { + %c0 = fcmp ueq double %v0, %v1 + %c1 = fcmp olt double %v2, %v3 + %cr = and i1 %c1, %c0 + %sel = select i1 %cr, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: _select_or_olt_one: +; CHECK-LABEL: ; BB#0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: fccmp d2, d3, #0, pl +; CHECK-NEXT: fccmp d2, d3, #8, le +; CHECK-NEXT: csel w0, w0, w1, mi +; CHECK-NEXT: ret +define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { + %c0 = fcmp olt double %v0, %v1 + %c1 = fcmp one double %v2, %v3 + %cr = or i1 %c1, %c0 + %sel = select i1 %cr, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: _select_or_one_olt: +; CHECK-LABEL: ; BB#0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: fccmp d0, d1, #1, ne +; CHECK-NEXT: fccmp d2, d3, #8, vs +; CHECK-NEXT: csel w0, w0, w1, mi +; CHECK-NEXT: ret +define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { + %c0 = fcmp one double %v0, %v1 + %c1 = fcmp olt double %v2, %v3 + %cr = or i1 %c1, %c0 + %sel = select i1 %cr, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: _select_or_olt_ueq: +; CHECK-LABEL: ; BB#0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: fccmp d2, d3, #4, pl +; CHECK-NEXT: fccmp d2, d3, #1, ne +; CHECK-NEXT: csel w0, w0, w1, vs +; CHECK-NEXT: ret +define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { + %c0 = fcmp olt double %v0, %v1 + %c1 = fcmp ueq double %v2, %v3 + %cr = or i1 %c1, %c0 + %sel = select i1 %cr, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: _select_or_ueq_olt: +; CHECK-LABEL: ; BB#0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: fccmp d0, d1, #8, le +; CHECK-NEXT: fccmp d2, d3, #8, mi +; CHECK-NEXT: csel w0, w0, w1, mi +; CHECK-NEXT: ret +define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { + %c0 = fcmp ueq double %v0, %v1 + %c1 = fcmp olt double %v2, %v3 + %cr = or i1 %c1, %c0 + %sel = select i1 %cr, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: _select_or_olt_ogt_ueq: +; CHECK-LABEL: ; BB#0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: fccmp d2, d3, #0, pl +; CHECK-NEXT: fccmp d4, d5, #4, le +; CHECK-NEXT: fccmp d4, d5, #1, ne +; CHECK-NEXT: csel w0, w0, w1, vs +; CHECK-NEXT: ret +define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3, double %v4, double %v5, i32 %a, i32 %b) #0 { + %c0 = fcmp olt double %v0, %v1 + %c1 = fcmp ogt double %v2, %v3 + %c2 = fcmp ueq double %v4, %v5 + %c3 = or i1 %c1, %c0 + %cr = or i1 %c2, %c3 + %sel = select i1 %cr, i32 %a, i32 %b + ret i32 %sel +} + +; CHECK-LABEL: _select_or_olt_ueq_ogt: +; CHECK-LABEL: ; BB#0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: fccmp d2, d3, #4, pl +; CHECK-NEXT: fccmp d2, d3, #1, ne +; CHECK-NEXT: fccmp d4, d5, #0, vc +; CHECK-NEXT: csel w0, w0, w1, gt +; CHECK-NEXT: ret +define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3, double %v4, double %v5, i32 %a, i32 %b) #0 { + %c0 = fcmp olt double %v0, %v1 + %c1 = fcmp ueq double %v2, %v3 + %c2 = fcmp ogt double %v4, %v5 + %c3 = or i1 %c1, %c0 + %cr = or i1 %c2, %c3 + %sel = select i1 %cr, i32 %a, i32 %b + ret i32 %sel +} + +attributes #0 = { nounwind } From 304bf8edb70276699e90ee7edeec18139b71fe7c Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 22 Jan 2016 19:43:57 +0000 Subject: [PATCH 0061/1132] [AArch64] Simplify emitConditionalCompare calls. NFC. Now that both callsites are identical, we can simplify the prototype and make it easier to reason about the 2-CC case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258534 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 172a01d638ad1a7bd1a5d3358805bf60f2c214eb) --- lib/Target/AArch64/AArch64ISelLowering.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 49acffcc8b9..44fa3d4b2bc 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1287,7 +1287,8 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate. static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, - SDValue Condition, unsigned NZCV, + AArch64CC::CondCode Predicate, + AArch64CC::CondCode OutCC, SDLoc DL, SelectionDAG &DAG) { unsigned Opcode = 0; if (LHS.getValueType().isFloatingPoint()) @@ -1303,6 +1304,9 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, if (Opcode == 0) Opcode = AArch64ISD::CCMP; + SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC); + AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); } @@ -1380,14 +1384,9 @@ static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, SDValue ExtraCmp; if (!CCOp.getNode()) ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); - else { - SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC); - AArch64CC::CondCode InvExtraCC = - AArch64CC::getInvertedCondCode(ExtraCC); - unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvExtraCC); - ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, - NZCV, DL, DAG); - } + else + ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, + ExtraCC, DL, DAG); CCOp = ExtraCmp; Predicate = ExtraCC; } @@ -1397,10 +1396,7 @@ static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, if (!CCOp.getNode()) return emitComparison(LHS, RHS, CC, DL, DAG); // Otherwise produce a ccmp. - SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC); - AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); - unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); - return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL, + return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL, DAG); } else if ((Opcode != ISD::AND && Opcode != ISD::OR) || !Val->hasOneUse()) return SDValue(); From 4a27dd583e0fa3888e5292c1c44d8d923239b10a Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 22 Jan 2016 20:02:26 +0000 Subject: [PATCH 0062/1132] [AArch64] Cleanup ccmp test check labels. NFC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258541 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit b16b67e46374312a3637d6cc90f0f33db682fc63) --- test/CodeGen/AArch64/arm64-ccmp.ll | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll index 28317261814..cd25eb43216 100644 --- a/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/test/CodeGen/AArch64/arm64-ccmp.ll @@ -428,7 +428,7 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) { ; Test the IR CCs that expand to two cond codes. -; CHECK-LABEL: _select_and_olt_one: +; CHECK-LABEL: select_and_olt_one: ; CHECK-LABEL: ; BB#0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #4, mi @@ -443,7 +443,7 @@ define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i ret i32 %sel } -; CHECK-LABEL: _select_and_one_olt: +; CHECK-LABEL: select_and_one_olt: ; CHECK-LABEL: ; BB#0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d0, d1, #1, ne @@ -458,7 +458,7 @@ define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i ret i32 %sel } -; CHECK-LABEL: _select_and_olt_ueq: +; CHECK-LABEL: select_and_olt_ueq: ; CHECK-LABEL: ; BB#0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #0, mi @@ -473,7 +473,7 @@ define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i ret i32 %sel } -; CHECK-LABEL: _select_and_ueq_olt: +; CHECK-LABEL: select_and_ueq_olt: ; CHECK-LABEL: ; BB#0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d0, d1, #8, le @@ -488,7 +488,7 @@ define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i ret i32 %sel } -; CHECK-LABEL: _select_or_olt_one: +; CHECK-LABEL: select_or_olt_one: ; CHECK-LABEL: ; BB#0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #0, pl @@ -503,7 +503,7 @@ define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i3 ret i32 %sel } -; CHECK-LABEL: _select_or_one_olt: +; CHECK-LABEL: select_or_one_olt: ; CHECK-LABEL: ; BB#0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d0, d1, #1, ne @@ -518,7 +518,7 @@ define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i3 ret i32 %sel } -; CHECK-LABEL: _select_or_olt_ueq: +; CHECK-LABEL: select_or_olt_ueq: ; CHECK-LABEL: ; BB#0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #4, pl @@ -533,7 +533,7 @@ define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i3 ret i32 %sel } -; CHECK-LABEL: _select_or_ueq_olt: +; CHECK-LABEL: select_or_ueq_olt: ; CHECK-LABEL: ; BB#0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d0, d1, #8, le @@ -548,7 +548,7 @@ define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i3 ret i32 %sel } -; CHECK-LABEL: _select_or_olt_ogt_ueq: +; CHECK-LABEL: select_or_olt_ogt_ueq: ; CHECK-LABEL: ; BB#0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #0, pl @@ -566,7 +566,7 @@ define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3 ret i32 %sel } -; CHECK-LABEL: _select_or_olt_ueq_ogt: +; CHECK-LABEL: select_or_olt_ueq_ogt: ; CHECK-LABEL: ; BB#0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #4, pl From ceb11e01f468d0e5580bbbcca188f8a87b2c9531 Mon Sep 17 00:00:00 2001 From: Sergei Larin Date: Fri, 22 Jan 2016 21:18:20 +0000 Subject: [PATCH 0063/1132] Make sure that any new and optimized objects created during GlobalOPT copy all the attributes from the base object. Summary: Make sure that any new and optimized objects created during GlobalOPT copy all the attributes from the base object. A good example of improper behavior in the current implementation is section information associated with the GlobalObject. If a section was set for it, and GlobalOpt is creating/modifying a new object based on this one (often copying the original name), without this change new object will be placed in a default section, resulting in inappropriate properties of the new variable. The argument here is that if customer specified a section for a variable, any changes to it that compiler does should not cause it to change that section allocation. Moreover, any other properties worth representation in copyAttributesFrom() should also be propagated. Reviewers: jmolloy, joker-eph, joker.eph Subscribers: slarin, joker.eph, rafael, tobiasvk, llvm-commits Differential Revision: http://reviews.llvm.org/D16074 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258556 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 84f76e4fbf11784c7a0c66e904b1d0a39933e7f3) --- lib/Transforms/IPO/GlobalOpt.cpp | 4 +++ test/Transforms/GlobalOpt/GSROA-section.ll | 30 +++++++++++++++++++ .../GlobalOpt/MallocSROA-section.ll | 28 +++++++++++++++++ test/Transforms/GlobalOpt/SROA-section.ll | 27 +++++++++++++++++ 4 files changed, 89 insertions(+) create mode 100644 test/Transforms/GlobalOpt/GSROA-section.ll create mode 100644 test/Transforms/GlobalOpt/MallocSROA-section.ll create mode 100644 test/Transforms/GlobalOpt/SROA-section.ll diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index f6299597b69..dcb99157ab0 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -499,6 +499,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); NGV->setExternallyInitialized(GV->isExternallyInitialized()); + NGV->copyAttributesFrom(GV); Globals.push_back(NGV); NewGlobals.push_back(NGV); @@ -533,6 +534,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); NGV->setExternallyInitialized(GV->isExternallyInitialized()); + NGV->copyAttributesFrom(GV); Globals.push_back(NGV); NewGlobals.push_back(NGV); @@ -1291,6 +1293,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo), nullptr, GV->getThreadLocalMode()); + NGV->copyAttributesFrom(GV); FieldGlobals.push_back(NGV); unsigned TypeSize = DL.getTypeAllocSize(FieldTy); @@ -1610,6 +1613,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GV->getName()+".b", GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); + NewGV->copyAttributesFrom(GV); GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV); Constant *InitVal = GV->getInitializer(); diff --git a/test/Transforms/GlobalOpt/GSROA-section.ll b/test/Transforms/GlobalOpt/GSROA-section.ll new file mode 100644 index 00000000000..a439fa0797d --- /dev/null +++ b/test/Transforms/GlobalOpt/GSROA-section.ll @@ -0,0 +1,30 @@ +; This test lets globalopt split the global struct and array into different +; values. The pass needs to preserve section attribute. + +; RUN: opt < %s -globalopt -S | FileCheck %s +; Check that the new global values still have their section assignment. +; CHECK: @struct +; CHECK: section ".foo" +; CHECK: @array +; CHECK-NOT: section ".foo" + +@struct = internal global { i32, i32 } zeroinitializer, section ".foo" +@array = internal global [ 2 x i32 ] zeroinitializer + +define i32 @foo() { + %A = load i32, i32* getelementptr ({ i32, i32 }, { i32, i32 }* @struct, i32 0, i32 0) + %B = load i32, i32* getelementptr ([ 2 x i32 ], [ 2 x i32 ]* @array, i32 0, i32 0) + ; Use the loaded values, so they won't get removed completely + %R = add i32 %A, %B + ret i32 %R +} + +; We put stores in a different function, so that the global variables won't get +; optimized away completely. +define void @bar(i32 %R) { + store i32 %R, i32* getelementptr ([ 2 x i32 ], [ 2 x i32 ]* @array, i32 0, i32 0) + store i32 %R, i32* getelementptr ({ i32, i32 }, { i32, i32 }* @struct, i32 0, i32 0) + ret void +} + + diff --git a/test/Transforms/GlobalOpt/MallocSROA-section.ll b/test/Transforms/GlobalOpt/MallocSROA-section.ll new file mode 100644 index 00000000000..75b3cfec137 --- /dev/null +++ b/test/Transforms/GlobalOpt/MallocSROA-section.ll @@ -0,0 +1,28 @@ +; RUN: opt -globalopt -S < %s | FileCheck %s +; CHECK: @Y.f0 +; CHECK: section ".foo" +; CHECK: @Y.f1 +; CHECK: section ".foo" + +%struct.xyz = type { double, i32 } + +@Y = internal global %struct.xyz* null ,section ".foo" ; <%struct.xyz**> [#uses=2] +@numf2s = external global i32 ; [#uses=1] + +define void @init_net() { +entry: + %0 = load i32, i32* @numf2s, align 4 ; [#uses=1] + %mallocsize2 = shl i32 %0, 4 ; [#uses=1] + %malloccall3 = tail call i8* @malloc(i32 %mallocsize2) ; [#uses=1] + %1 = bitcast i8* %malloccall3 to %struct.xyz* ; <%struct.xyz*> [#uses=1] + store %struct.xyz* %1, %struct.xyz** @Y, align 8 + ret void +} + +define void @load_train() { +entry: + %0 = load %struct.xyz*, %struct.xyz** @Y, align 8 ; <%struct.xyz*> [#uses=0] + ret void +} + +declare noalias i8* @malloc(i32) diff --git a/test/Transforms/GlobalOpt/SROA-section.ll b/test/Transforms/GlobalOpt/SROA-section.ll new file mode 100644 index 00000000000..1589608a67a --- /dev/null +++ b/test/Transforms/GlobalOpt/SROA-section.ll @@ -0,0 +1,27 @@ +; Verify that section assignment is copied during SROA +; RUN: opt < %s -globalopt -S | FileCheck %s +; CHECK: @G.0 +; CHECK: section ".foo" +; CHECK: @G.1 +; CHECK: section ".foo" +; CHECK: @G.2 +; CHECK: section ".foo" + +%T = type { double, double, double } +@G = internal global %T zeroinitializer, align 16, section ".foo" + +define void @test() { + store double 1.0, double* getelementptr (%T, %T* @G, i32 0, i32 0), align 16 + store double 2.0, double* getelementptr (%T, %T* @G, i32 0, i32 1), align 8 + store double 3.0, double* getelementptr (%T, %T* @G, i32 0, i32 2), align 16 + ret void +} + +define double @test2() { + %V1 = load double, double* getelementptr (%T, %T* @G, i32 0, i32 0), align 16 + %V2 = load double, double* getelementptr (%T, %T* @G, i32 0, i32 1), align 8 + %V3 = load double, double* getelementptr (%T, %T* @G, i32 0, i32 2), align 16 + %R = fadd double %V1, %V2 + %R2 = fadd double %R, %V3 + ret double %R2 +} From 80c4ab356fd9c5f1d2b63843bc6266db7e55ddb1 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 22 Jan 2016 22:07:24 +0000 Subject: [PATCH 0064/1132] Strip local symbols when using externalized debug info. When we build LLVM with externalized debug info, all debugging and symbolication related data is extracted into dSYM files prior to stripping. As such, there is no need to preserve local symbols in LLVM binaries after dSYM creation. This shrinks libLLVM.dylib from 58MB to 55MB on my system. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258566 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit cf2a93b691e695745271b053136e010007ce0b6f) --- cmake/modules/AddLLVM.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index b06e434a248..b06e5147bb0 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -1196,7 +1196,7 @@ function(llvm_externalize_debuginfo name) endif() add_custom_command(TARGET ${name} POST_BUILD COMMAND xcrun dsymutil $ - COMMAND xcrun strip -Sl $) + COMMAND xcrun strip -Sxl $) else() message(FATAL_ERROR "LLVM_EXTERNALIZE_DEBUGINFO isn't implemented for non-darwin platforms!") endif() From 30c8a210d06d47e6a6bad4379678c19ecf3c2b27 Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Fri, 22 Jan 2016 22:21:34 +0000 Subject: [PATCH 0065/1132] Fix LivePhysRegs::addLiveOuts Summary: The testing for returnBB was flipped which may cause ARM ld/st opt pass uses callee saved regs in returnBB when shrink-wrap is used. Reviewers: t.p.northover, apazos, MatzeB Subscribers: mcrosier, zzheng, aemerson, llvm-commits, rengolin Differential Revision: http://reviews.llvm.org/D16434 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258569 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 89fa455326656b220c66b010578dcb043350e361) --- lib/CodeGen/LivePhysRegs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index efbbcbe23e1..266f895a557 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -151,7 +151,7 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock *MBB, if (AddPristinesAndCSRs) { const MachineFunction &MF = *MBB->getParent(); addPristines(*this, MF, *TRI); - if (!MBB->isReturnBlock()) { + if (MBB->isReturnBlock()) { // The return block has no successors whose live-ins we could merge // below. So instead we add the callee saved registers manually. for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) From cfc4633921529385499781efb6e2f24bbb87edb6 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Fri, 22 Jan 2016 22:49:55 +0000 Subject: [PATCH 0066/1132] Fix the code that leads to the incorrect trigger of the report_fatal_error() in MachOObjectFile::getSymbolByIndex() when a Mach-O file has a symbol table load command but the number of symbols are zero. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code in MachOObjectFile::symbol_begin_impl() should not be assuming there is a symbol at index 0, in cases there is no symbol table load command or the count of symbol is zero. So I also fixed that. And needed to fix MachOObjectFile::symbol_end_impl() to also do the same thing for no symbol table or one with zero entries. The code in MachOObjectFile::getSymbolByIndex() should trigger the report_fatal_error() for programmatic errors for any index when there is no symbol table load command and not return the end iterator. So also fixed that. Note there is no test case as this is a programmatic error. The test case using the file macho-invalid-bad-symbol-index has a symbol table load command with its number of symbols (nsyms) is zero. Which was incorrectly testing the bad triggering of the report_fatal_error() in in MachOObjectFile::getSymbolByIndex(). This test case is an invalid Mach-O file but not for that reason. It appears this Mach-O file use to have an nsyms value of 11, and what makes this Mach-O file invalid is the counts and indexes into the symbol table of the dynamic load command are now invalid because the number of symbol table entries (nsyms) is now zero. Which can be seen with the existing llvm-obdump: % llvm-objdump -private-headers macho-invalid-bad-symbol-index … Load command 4 cmd LC_SYMTAB cmdsize 24 symoff 4216 nsyms 0 stroff 4392 strsize 144 Load command 5 cmd LC_DYSYMTAB cmdsize 80 ilocalsym 0 nlocalsym 8 (past the end of the symbol table) iextdefsym 8 (greater than the number of symbols) nextdefsym 2 (past the end of the symbol table) iundefsym 10 (greater than the number of symbols) nundefsym 1 (past the end of the symbol table) ... And the native darwin tools generates an error for this file: % nm macho-invalid-bad-symbol-index nm: object: macho-invalid-bad-symbol-index truncated or malformed object (ilocalsym plus nlocalsym in LC_DYSYMTAB load command extends past the end of the symbol table) I added new checks for the indexes and sizes for these in the constructor of MachOObjectFile. And added comments for what would be a proper diagnostic messages. And changed the test case using macho-invalid-bad-symbol-index to test for the new error now produced. Also added a test with a valid Mach-O file with a symbol table load command where the number of symbols is zero that shows the report_fatal_error() is not called. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258576 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 5479bf7bf05d9b24b34eb3222a8ac82ecaa8db7d) --- lib/Object/MachOObjectFile.cpp | 71 ++++++++++++++++++++++--- test/Object/Inputs/macho-valid-0-nsyms | Bin 0 -> 372 bytes test/Object/macho-invalid.test | 6 ++- 3 files changed, 69 insertions(+), 8 deletions(-) create mode 100644 test/Object/Inputs/macho-valid-0-nsyms diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 3fb8e537fad..ed0ca68653f 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -317,6 +317,61 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, Load = LoadOrErr.get(); } } + if (!SymtabLoadCmd) { + if (DysymtabLoadCmd) { + // Diagnostic("truncated or malformed object (contains LC_DYSYMTAB load " + // "command without a LC_SYMTAB load command)"); + EC = object_error::parse_failed; + return; + } + } else if (DysymtabLoadCmd) { + MachO::symtab_command Symtab = + getStruct(this, SymtabLoadCmd); + MachO::dysymtab_command Dysymtab = + getStruct(this, DysymtabLoadCmd); + if (Dysymtab.nlocalsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) { + // Diagnostic("truncated or malformed object (ilocalsym in LC_DYSYMTAB " + // "load command extends past the end of the symbol table)" + EC = object_error::parse_failed; + return; + } + uint64_t big_size = Dysymtab.ilocalsym; + big_size += Dysymtab.nlocalsym; + if (Dysymtab.nlocalsym != 0 && big_size > Symtab.nsyms) { + // Diagnostic("truncated or malformed object (ilocalsym plus nlocalsym " + // "in LC_DYSYMTAB load command extends past the end of the symbol table)" + EC = object_error::parse_failed; + return; + } + if (Dysymtab.nextdefsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) { + // Diagnostic("truncated or malformed object (nextdefsym in LC_DYSYMTAB " + // "load command extends past the end of the symbol table)" + EC = object_error::parse_failed; + return; + } + big_size = Dysymtab.iextdefsym; + big_size += Dysymtab.nextdefsym; + if (Dysymtab.nextdefsym != 0 && big_size > Symtab.nsyms) { + // Diagnostic("truncated or malformed object (iextdefsym plus nextdefsym " + // "in LC_DYSYMTAB load command extends past the end of the symbol table)" + EC = object_error::parse_failed; + return; + } + if (Dysymtab.nundefsym != 0 && Dysymtab.iundefsym > Symtab.nsyms) { + // Diagnostic("truncated or malformed object (nundefsym in LC_DYSYMTAB " + // "load command extends past the end of the symbol table)" + EC = object_error::parse_failed; + return; + } + big_size = Dysymtab.iundefsym; + big_size += Dysymtab.nundefsym; + if (Dysymtab.nundefsym != 0 && big_size > Symtab.nsyms) { + // Diagnostic("truncated or malformed object (iundefsym plus nundefsym " + // "in LC_DYSYMTAB load command extends past the end of the symbol table)" + EC = object_error::parse_failed; + return; + } + } assert(LoadCommands.size() == LoadCommandCount); } @@ -941,15 +996,20 @@ MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const { } basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const { + DataRefImpl DRI; + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Symtab.nsyms == 0) + return basic_symbol_iterator(SymbolRef(DRI, this)); + return getSymbolByIndex(0); } basic_symbol_iterator MachOObjectFile::symbol_end_impl() const { DataRefImpl DRI; - if (!SymtabLoadCmd) + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Symtab.nsyms == 0) return basic_symbol_iterator(SymbolRef(DRI, this)); - MachO::symtab_command Symtab = getSymtabLoadCommand(); unsigned SymbolTableEntrySize = is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); @@ -960,15 +1020,12 @@ basic_symbol_iterator MachOObjectFile::symbol_end_impl() const { } basic_symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const { - DataRefImpl DRI; - if (!SymtabLoadCmd) - return basic_symbol_iterator(SymbolRef(DRI, this)); - MachO::symtab_command Symtab = getSymtabLoadCommand(); - if (Index >= Symtab.nsyms) + if (!SymtabLoadCmd || Index >= Symtab.nsyms) report_fatal_error("Requested symbol index is out of range."); unsigned SymbolTableEntrySize = is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + DataRefImpl DRI; DRI.p = reinterpret_cast(getPtr(this, Symtab.symoff)); DRI.p += Index * SymbolTableEntrySize; return basic_symbol_iterator(SymbolRef(DRI, this)); diff --git a/test/Object/Inputs/macho-valid-0-nsyms b/test/Object/Inputs/macho-valid-0-nsyms new file mode 100644 index 0000000000000000000000000000000000000000..1a170ff8161ef1855eac0777e252c2432bdb09ba GIT binary patch literal 372 zcmX^A>+L^w1_nlE1|R{%AUXiVfC5P%{Q}0pLbE`{3ZQyme4v>OAj|}nkB=`&ttf$V zfK+^Zh-*X$f{)IEnTI6afW!vsPf09EM2N)4yEuk8BKR;C%sp7lgDC;iAa_cjh(I{p LKq>%>I3+>=eEkqG literal 0 HcmV?d00001 diff --git a/test/Object/macho-invalid.test b/test/Object/macho-invalid.test index 0cf264f8771..781c9367c1e 100644 --- a/test/Object/macho-invalid.test +++ b/test/Object/macho-invalid.test @@ -25,7 +25,11 @@ TOO-MANY-SECTS: Mach-O segment load command contains too many sections RUN: not llvm-objdump -t %p/Inputs/macho-invalid-bad-symbol-index 2>&1 \ RUN: | FileCheck -check-prefix BAD-SYMBOL %s -BAD-SYMBOL: Requested symbol index is out of range +BAD-SYMBOL: Invalid data was encountered while parsing the file. +RUN: llvm-objdump -t %p/Inputs/macho-valid-0-nsyms 2>&1 \ +RUN: | FileCheck -check-prefix ZERO-NSYMS %s +ZERO-NSYMS: SYMBOL TABLE +ZERO-NSYMS-NOT: Requested symbol index is out of range RUN: not llvm-objdump -t %p/Inputs/macho-invalid-symbol-name-past-eof 2>&1 \ RUN: | FileCheck -check-prefix NAME-PAST-EOF %s From 532b3141716407f6e9054f8af4a3071953667da6 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Sat, 23 Jan 2016 04:05:16 +0000 Subject: [PATCH 0067/1132] Fix wrong indentation git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258603 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 8e055c9655d075be54cdcec1abfc6fa132a556d0) --- lib/Target/AArch64/AArch64ISelLowering.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 44fa3d4b2bc..dea10218b58 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1296,10 +1296,10 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, else if (RHS.getOpcode() == ISD::SUB) { SDValue SubOp0 = RHS.getOperand(0); if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - // See emitComparison() on why we can only do this for SETEQ and SETNE. - Opcode = AArch64ISD::CCMN; - RHS = RHS.getOperand(1); - } + // See emitComparison() on why we can only do this for SETEQ and SETNE. + Opcode = AArch64ISD::CCMN; + RHS = RHS.getOperand(1); + } } if (Opcode == 0) Opcode = AArch64ISD::CCMP; From 750a7ff95918b7ce6125ec7e0cd92bb7a32ce393 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Sat, 23 Jan 2016 04:05:18 +0000 Subject: [PATCH 0068/1132] AArch64ISelLowering: Reduce maximum recursion depth of isConjunctionDisjunctionTree() This function will exhibit exponential runtime (2**n) so we should rather use a lower limit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258604 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 7b63e4a855053265dc3f5f638241d65f3b64e847) --- lib/Target/AArch64/AArch64ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index dea10218b58..e4da7070e94 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1326,8 +1326,8 @@ static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate, CanPushNegate = true; return true; } - // Protect against stack overflow. - if (Depth > 15) + // Protect against exponential runtime and stack overflow. + if (Depth > 6) return false; if (Opcode == ISD::AND || Opcode == ISD::OR) { SDValue O0 = Val->getOperand(0); From 0de86c5b8d22d05916259d01eed8eb381144c501 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Sat, 23 Jan 2016 04:05:22 +0000 Subject: [PATCH 0069/1132] AArch64ISel: Fix ccmp code selection matching deep expressions. Some of the conditions necessary to produce ccmp sequences were only checked in recursive calls to emitConjunctionDisjunctionTree() after some of the earlier expressions were already built. Move all checks over to isConjunctionDisjunctionTree() so they are all checked before we start emitting instructions. Also rename some variable to better reflect their usage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258605 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 52d72897ff2bd4da8d724160a893b321c861133a) --- lib/Target/AArch64/AArch64ISelLowering.cpp | 127 +++++++++++++-------- test/CodeGen/AArch64/arm64-ccmp.ll | 19 +++ 2 files changed, 98 insertions(+), 48 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index e4da7070e94..41c9a3e78a7 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1317,13 +1317,13 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, /// at the leafs only. i.e. "not (or (or x y) z)" can be changed to /// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be /// brought into such a form. -static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate, +static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate, unsigned Depth = 0) { if (!Val.hasOneUse()) return false; unsigned Opcode = Val->getOpcode(); if (Opcode == ISD::SETCC) { - CanPushNegate = true; + CanNegate = true; return true; } // Protect against exponential runtime and stack overflow. @@ -1332,16 +1332,32 @@ static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate, if (Opcode == ISD::AND || Opcode == ISD::OR) { SDValue O0 = Val->getOperand(0); SDValue O1 = Val->getOperand(1); - bool CanPushNegateL; - if (!isConjunctionDisjunctionTree(O0, CanPushNegateL, Depth+1)) + bool CanNegateL; + if (!isConjunctionDisjunctionTree(O0, CanNegateL, Depth+1)) return false; - bool CanPushNegateR; - if (!isConjunctionDisjunctionTree(O1, CanPushNegateR, Depth+1)) + bool CanNegateR; + if (!isConjunctionDisjunctionTree(O1, CanNegateR, Depth+1)) return false; - // We cannot push a negate through an AND operation (it would become an OR), - // we can however change a (not (or x y)) to (and (not x) (not y)) if we can - // push the negate through the x/y subtrees. - CanPushNegate = (Opcode == ISD::OR) && CanPushNegateL && CanPushNegateR; + + if (Opcode == ISD::OR) { + // For an OR expression we need to be able to negate at least one side or + // we cannot do the transformation at all. + if (!CanNegateL && !CanNegateR) + return false; + // We can however change a (not (or x y)) to (and (not x) (not y)) if we + // can negate the x and y subtrees. + CanNegate = CanNegateL && CanNegateR; + } else { + // If the operands are OR expressions then we finally need to negate their + // outputs, we can only do that for the operand with emitted last by + // negating OutCC, not for both operands. + bool NeedsNegOutL = O0->getOpcode() == ISD::OR; + bool NeedsNegOutR = O1->getOpcode() == ISD::OR; + if (NeedsNegOutL && NeedsNegOutR) + return false; + // We cannot negate an AND operation (it would become an OR), + CanNegate = false; + } return true; } return false; @@ -1357,10 +1373,9 @@ static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate, /// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate /// for the comparisons in the current subtree; @p Depth limits the search /// depth to avoid stack overflow. -static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, - AArch64CC::CondCode &OutCC, bool PushNegate = false, - SDValue CCOp = SDValue(), AArch64CC::CondCode Predicate = AArch64CC::AL, - unsigned Depth = 0) { +static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val, + AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, + AArch64CC::CondCode Predicate) { // We're at a tree leaf, produce a conditional comparison operation. unsigned Opcode = Val->getOpcode(); if (Opcode == ISD::SETCC) { @@ -1368,7 +1383,7 @@ static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, SDValue RHS = Val->getOperand(1); ISD::CondCode CC = cast(Val->getOperand(2))->get(); bool isInteger = LHS.getValueType().isInteger(); - if (PushNegate) + if (Negate) CC = getSetCCInverse(CC, isInteger); SDLoc DL(Val); // Determine OutCC and handle FP special case. @@ -1393,43 +1408,47 @@ static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, } // Produce a normal comparison if we are first in the chain - if (!CCOp.getNode()) + if (!CCOp) return emitComparison(LHS, RHS, CC, DL, DAG); // Otherwise produce a ccmp. return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL, DAG); - } else if ((Opcode != ISD::AND && Opcode != ISD::OR) || !Val->hasOneUse()) - return SDValue(); - - assert((Opcode == ISD::OR || !PushNegate) - && "Can only push negate through OR operation"); + } + assert(Opcode == ISD::AND || Opcode == ISD::OR && Val->hasOneUse() + && "Valid conjunction/disjunction tree"); // Check if both sides can be transformed. SDValue LHS = Val->getOperand(0); SDValue RHS = Val->getOperand(1); - bool CanPushNegateL; - if (!isConjunctionDisjunctionTree(LHS, CanPushNegateL, Depth+1)) - return SDValue(); - bool CanPushNegateR; - if (!isConjunctionDisjunctionTree(RHS, CanPushNegateR, Depth+1)) - return SDValue(); - // Do we need to negate our operands? - bool NegateOperands = Opcode == ISD::OR; + // In case of an OR we need to negate our operands and the result. + // (A v B) <=> not(not(A) ^ not(B)) + bool NegateOpsAndResult = Opcode == ISD::OR; // We can negate the results of all previous operations by inverting the - // predicate flags giving us a free negation for one side. For the other side - // we need to be able to push the negation to the leafs of the tree. - if (NegateOperands) { - if (!CanPushNegateL && !CanPushNegateR) - return SDValue(); - // Order the side where we can push the negate through to LHS. - if (!CanPushNegateL && CanPushNegateR) + // predicate flags giving us a free negation for one side. The other side + // must be negatable by itself. + if (NegateOpsAndResult) { + // See which side we can negate. + bool CanNegateL; + bool isValidL = isConjunctionDisjunctionTree(LHS, CanNegateL); + assert(isValidL && "Valid conjunction/disjunction tree"); + (void)isValidL; + +#ifndef NDEBUG + bool CanNegateR; + bool isValidR = isConjunctionDisjunctionTree(RHS, CanNegateR); + assert(isValidR && "Valid conjunction/disjunction tree"); + assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree"); +#endif + + // Order the side which we cannot negate to RHS so we can emit it first. + if (!CanNegateL) std::swap(LHS, RHS); } else { bool NeedsNegOutL = LHS->getOpcode() == ISD::OR; bool NeedsNegOutR = RHS->getOpcode() == ISD::OR; - if (NeedsNegOutL && NeedsNegOutR) - return SDValue(); + assert((!NeedsNegOutR || !NeedsNegOutL) && + "Valid conjunction/disjunction tree"); // Order the side where we need to negate the output flags to RHS so it // gets emitted first. if (NeedsNegOutL) @@ -1440,22 +1459,34 @@ static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, // through if we are already in a PushNegate case, otherwise we can negate // the "flags to test" afterwards. AArch64CC::CondCode RHSCC; - SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, PushNegate, - CCOp, Predicate, Depth+1); - assert(CmpR && "Transform legality should have been checked already!"); - if (NegateOperands && !PushNegate) + SDValue CmpR = emitConjunctionDisjunctionTreeRec(DAG, RHS, RHSCC, Negate, + CCOp, Predicate); + if (NegateOpsAndResult && !Negate) RHSCC = AArch64CC::getInvertedCondCode(RHSCC); - // Emit LHS. We must push the negate through if we need to negate it. - SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, OutCC, NegateOperands, - CmpR, RHSCC, Depth+1); - assert(CmpL && "Transform legality should have been checked already!"); + // Emit LHS. We may need to negate it. + SDValue CmpL = emitConjunctionDisjunctionTreeRec(DAG, LHS, OutCC, + NegateOpsAndResult, CmpR, + RHSCC); // If we transformed an OR to and AND then we have to negate the result - // (or absorb a PushNegate resulting in a double negation). - if (Opcode == ISD::OR && !PushNegate) + // (or absorb the Negate parameter). + if (NegateOpsAndResult && !Negate) OutCC = AArch64CC::getInvertedCondCode(OutCC); return CmpL; } +/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain +/// of CCMP/CFCMP ops. See @ref AArch64CCMP. +/// \see emitConjunctionDisjunctionTreeRec(). +static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, + AArch64CC::CondCode &OutCC) { + bool CanNegate; + if (!isConjunctionDisjunctionTree(Val, CanNegate)) + return SDValue(); + + return emitConjunctionDisjunctionTreeRec(DAG, Val, OutCC, false, SDValue(), + AArch64CC::AL); +} + /// @} static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll index cd25eb43216..767df7416cb 100644 --- a/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/test/CodeGen/AArch64/arm64-ccmp.ll @@ -426,6 +426,25 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) { ret i64 %sel } +; The following is not possible to implement with a single cmp;ccmp;csel +; sequence. +; CHECK-LABEL: select_noccmp3 +define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) { + %c0 = icmp slt i32 %v0, 0 + %c1 = icmp sgt i32 %v0, 13 + %c2 = icmp slt i32 %v0, 22 + %c3 = icmp sgt i32 %v0, 44 + %c4 = icmp eq i32 %v0, 99 + %c5 = icmp eq i32 %v0, 77 + %or0 = or i1 %c0, %c1 + %or1 = or i1 %c2, %c3 + %and0 = and i1 %or0, %or1 + %or2 = or i1 %c4, %c5 + %and1 = and i1 %and0, %or2 + %sel = select i1 %and1, i32 %v1, i32 %v2 + ret i32 %sel +} + ; Test the IR CCs that expand to two cond codes. ; CHECK-LABEL: select_and_olt_one: From a194fb41b561b5b60a0b979e5cea667b3794a475 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 23 Jan 2016 06:34:59 +0000 Subject: [PATCH 0070/1132] AArch64ISelLowering.cpp: Fix a warning. [-Wunused-variable] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258618 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 71c1bf1b4f771f35a27c45c3308820af003bde0c) --- lib/Target/AArch64/AArch64ISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 41c9a3e78a7..c89becd0c81 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1449,6 +1449,7 @@ static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val, bool NeedsNegOutR = RHS->getOpcode() == ISD::OR; assert((!NeedsNegOutR || !NeedsNegOutL) && "Valid conjunction/disjunction tree"); + (void)NeedsNegOutR; // Order the side where we need to negate the output flags to RHS so it // gets emitted first. if (NeedsNegOutL) From 9fe7e96d05d1871c875c4a04999ad6434f71a0d7 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Sat, 23 Jan 2016 06:49:29 +0000 Subject: [PATCH 0071/1132] Inline variable into assert Seems like some compilers still give unused variable warnings for bool var = ...; (void)var; so I have to inline the variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258619 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 578afd47d551d2a2daef4a62f5dcd5fde1b7653c) --- lib/Target/AArch64/AArch64ISelLowering.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index c89becd0c81..0687bd803e8 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1446,10 +1446,8 @@ static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val, std::swap(LHS, RHS); } else { bool NeedsNegOutL = LHS->getOpcode() == ISD::OR; - bool NeedsNegOutR = RHS->getOpcode() == ISD::OR; - assert((!NeedsNegOutR || !NeedsNegOutL) && + assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && "Valid conjunction/disjunction tree"); - (void)NeedsNegOutR; // Order the side where we need to negate the output flags to RHS so it // gets emitted first. if (NeedsNegOutL) From 1eaa5daa5885762d4074a7c18b0c9977907ad8da Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 23 Jan 2016 16:02:10 +0000 Subject: [PATCH 0072/1132] [Bitcode] Insert the darwin wrapper at the beginning of a file when the target is macho. It looks like the check for macho was accidentally dropped in r132959. I don't have a test case, but I'll add one if anyone knows how this can be tested. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258627 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 11db5ceb3ae9a06476d1669ea90c208c89985cc4) --- lib/Bitcode/Writer/BitcodeWriter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index a522a1ed7d2..fa92b841614 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3048,7 +3048,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, // If this is darwin or another generic macho target, reserve space for the // header. Triple TT(M->getTargetTriple()); - if (TT.isOSDarwin()) + if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) Buffer.insert(Buffer.begin(), DarwinBCHeaderSize, 0); // Emit the module into the buffer. @@ -3070,7 +3070,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, EmitFunctionSummary); } - if (TT.isOSDarwin()) + if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) EmitDarwinBCHeaderAndTrailer(Buffer, TT); // Write the generated bitstream to "Out". From 88bda2ce303b9bcb0fdf12d59415628cb8be3dbf Mon Sep 17 00:00:00 2001 From: Joseph Tremoulet Date: Sat, 23 Jan 2016 18:36:01 +0000 Subject: [PATCH 0073/1132] [ORC] Update ObjectTransformLayer signature Summary: Update ObjectTransformLayer::addObjectSet to take the object set by value rather than reference and pass it to the base layer with move semantics rather than copy, to match r258185's changes to ObjectLinkingLayer. Update the unit test to verify that ObjectTransformLayer's signature stays in sync with ObjectLinkingLayer's. Reviewers: lhames Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D16414 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258630 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 88a19038a78b55ca46533d68523da00045c09c0f) --- .../Orc/ObjectTransformLayer.h | 4 +- .../Orc/ObjectTransformLayerTest.cpp | 75 +++++++++++++++++-- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h index f96e83ed5a1..2ffe71c9435 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h @@ -42,13 +42,13 @@ class ObjectTransformLayer { /// @return A handle for the added objects. template - ObjSetHandleT addObjectSet(ObjSetT &Objects, MemoryManagerPtrT MemMgr, + ObjSetHandleT addObjectSet(ObjSetT Objects, MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver) { for (auto I = Objects.begin(), E = Objects.end(); I != E; ++I) *I = Transform(std::move(*I)); - return BaseLayer.addObjectSet(Objects, std::move(MemMgr), + return BaseLayer.addObjectSet(std::move(Objects), std::move(MemMgr), std::move(Resolver)); } diff --git a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp index c88c94f17b1..fda596f6a70 100644 --- a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp +++ b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp @@ -7,9 +7,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/NullResolver.h" +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h" +#include "llvm/Object/ObjectFile.h" #include "gtest/gtest.h" using namespace llvm::orc; @@ -51,7 +56,7 @@ class MockBaseLayer { template - ObjSetHandleT addObjectSet(ObjSetT &Objects, MemoryManagerPtrT MemMgr, + ObjSetHandleT addObjectSet(ObjSetT Objects, MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver) { EXPECT_EQ(MockManager, *MemMgr) << "MM should pass through"; EXPECT_EQ(MockResolver, *Resolver) << "Resolver should pass through"; @@ -216,13 +221,14 @@ TEST(ObjectTransformLayerTest, Main) { auto MM = llvm::make_unique(MockManager); auto SR = llvm::make_unique(MockResolver); M.expectAddObjectSet(Objs1, MM.get(), SR.get()); - auto H = T1.addObjectSet(Objs1, std::move(MM), std::move(SR)); + auto H = T1.addObjectSet(std::move(Objs1), std::move(MM), std::move(SR)); M.verifyAddObjectSet(H); // Test addObjectSet with T2 (mutating, naked pointers) - llvm::SmallVector Objs2; - Objs2.push_back(&MockObject1); - Objs2.push_back(&MockObject2); + llvm::SmallVector Objs2Vec; + Objs2Vec.push_back(&MockObject1); + Objs2Vec.push_back(&MockObject2); + llvm::MutableArrayRef Objs2(Objs2Vec); M.expectAddObjectSet(Objs2, &MockManager, &MockResolver); H = T2.addObjectSet(Objs2, &MockManager, &MockResolver); M.verifyAddObjectSet(H); @@ -271,5 +277,62 @@ TEST(ObjectTransformLayerTest, Main) { const auto &T1C = T1; OwnedObj = T1C.getTransform()(std::move(OwnedObj)); EXPECT_EQ(289, *OwnedObj) << "Expected incrementing transform"; + + volatile bool RunStaticChecks = false; + if (RunStaticChecks) { + // Make sure that ObjectTransformLayer implements the object layer concept + // correctly by sandwitching one between an ObjectLinkingLayer and an + // IRCompileLayer, verifying that it compiles if we have a call to the + // IRComileLayer's addModuleSet that should call the transform layer's + // addObjectSet, and also calling the other public transform layer methods + // directly to make sure the methods they intend to forward to exist on + // the ObjectLinkingLayer. + + // We'll need a concrete MemoryManager class. + class NullManager : public llvm::RuntimeDyld::MemoryManager { + public: + uint8_t *allocateCodeSection(uintptr_t, unsigned, unsigned, + llvm::StringRef) override { + return nullptr; + } + uint8_t *allocateDataSection(uintptr_t, unsigned, unsigned, + llvm::StringRef, bool) override { + return nullptr; + } + void registerEHFrames(uint8_t *, uint64_t, size_t) override {} + void deregisterEHFrames(uint8_t *, uint64_t, size_t) override {} + bool finalizeMemory(std::string *) { return false; } + }; + + // Construct the jit layers. + ObjectLinkingLayer<> BaseLayer; + auto IdentityTransform = []( + std::unique_ptr> + Obj) { return std::move(Obj); }; + ObjectTransformLayer + TransformLayer(BaseLayer, IdentityTransform); + auto NullCompiler = [](llvm::Module &) { + return llvm::object::OwningBinary(); + }; + IRCompileLayer CompileLayer(TransformLayer, + NullCompiler); + std::vector Modules; + + // Make sure that the calls from IRCompileLayer to ObjectTransformLayer + // compile. + NullResolver Resolver; + NullManager Manager; + CompileLayer.addModuleSet(std::vector(), &Manager, + &Resolver); + + // Make sure that the calls from ObjectTransformLayer to ObjectLinkingLayer + // compile. + decltype(TransformLayer)::ObjSetHandleT ObjSet; + TransformLayer.emitAndFinalize(ObjSet); + TransformLayer.findSymbolIn(ObjSet, Name, false); + TransformLayer.findSymbol(Name, true); + TransformLayer.mapSectionAddress(ObjSet, nullptr, 0); + TransformLayer.removeObjectSet(ObjSet); + } } } From 8e5fa4061688c30d10c92283b01513eba1f5f45f Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 23 Jan 2016 20:58:09 +0000 Subject: [PATCH 0074/1132] Don't check if a list is empty with ilist::size. ilist::size() is O(n) while ilist::empty() is O(1) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258636 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit a2815b5dd775f01cf823cd4141569ad7c803ea4f) --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b252367ea97..707ec2a6778 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1621,7 +1621,7 @@ SelectionDAGISel::FinishBasicBlock() { // CodeGen Failure MBB if we have not codegened it yet. MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); - if (!FailureMBB->size()) { + if (FailureMBB->empty()) { FuncInfo->MBB = FailureMBB; FuncInfo->InsertPt = FailureMBB->end(); SDB->visitSPDescriptorFailure(SDB->SPDescriptor); From 0adea08c83e3a73c97c04d524b5b3acbd6dc76a1 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sun, 24 Jan 2016 21:56:40 +0000 Subject: [PATCH 0075/1132] [Object][COFF] Set the generic SF_Exported flag on COFF exported symbols. The ORC ObjectLinkingLayer uses this flag during symbol lookup. Failure to set it causes all symbols to behave as if they were non-exported, which has caused failures in the kaleidoscope tutorials on Windows. Raising the flag should un-break the tutorials. No test case yet - none of the existing command line tools for printing symbol tables (llvm-nm, llvm-objdump) show the status of this flag, and I don't want to change the format from these tools without consulting their owners. I'll send an email to the dev-list to figure out the right way forward. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258665 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit a6a79a64c852c406c39f49c99ae46849f5224cb7) --- lib/Object/COFFObjectFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 4cd6aff5f17..35e9392a08e 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -207,7 +207,7 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { uint32_t Result = SymbolRef::SF_None; if (Symb.isExternal() || Symb.isWeakExternal()) - Result |= SymbolRef::SF_Global; + Result |= (SymbolRef::SF_Global | SymbolRef::SF_Exported); if (Symb.isWeakExternal()) Result |= SymbolRef::SF_Weak; From aff91d2dd8200a2de85916727402d0b0f71b7308 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 25 Jan 2016 01:21:45 +0000 Subject: [PATCH 0076/1132] [Object][COFF] Revert r258665 - It doesn't do what I had intended. I'm discussing the right approach for tracking visibility for COFF symbols on the llvm-dev list. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258666 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit b4125011b57c45e10a7188efc60bcd47d693d37d) --- lib/Object/COFFObjectFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 35e9392a08e..4cd6aff5f17 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -207,7 +207,7 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { uint32_t Result = SymbolRef::SF_None; if (Symb.isExternal() || Symb.isWeakExternal()) - Result |= (SymbolRef::SF_Global | SymbolRef::SF_Exported); + Result |= SymbolRef::SF_Global; if (Symb.isWeakExternal()) Result |= SymbolRef::SF_Weak; From a65061291c0183c8aa02764567105486affeb657 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Mon, 25 Jan 2016 14:49:36 +0000 Subject: [PATCH 0077/1132] [DemandedBits] Fix computation of demanded bits for ICmps The computation of ICmp demanded bits is independent of the individual operand being evaluated. We simply return a mask consisting of the minimum leading zeroes of both operands. We were incorrectly passing "I" to ComputeKnownBits - this should be "UserI->getOperand(0)". In cases where we were evaluating the 1th operand, we were taking the minimum leading zeroes of it and itself. This should fix PR26266. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258690 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 6672aa947841b83ef8b110a795bc9dac51fc04c1) --- lib/Analysis/DemandedBits.cpp | 2 +- test/Analysis/DemandedBits/basic.ll | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 912c5ceb754..143d0b79f18 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -244,7 +244,7 @@ void DemandedBits::determineLiveOperandBits( break; case Instruction::ICmp: // Count the number of leading zeroes in each operand. - ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); + ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1)); auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes); diff --git a/test/Analysis/DemandedBits/basic.ll b/test/Analysis/DemandedBits/basic.ll index 487e522e9db..9973edf79c1 100644 --- a/test/Analysis/DemandedBits/basic.ll +++ b/test/Analysis/DemandedBits/basic.ll @@ -24,11 +24,20 @@ define i1 @test_icmp1(i32 %a, i32 %b) { ; CHECK-LABEL: 'test_icmp2' ; CHECK-DAG: DemandedBits: 0x1 for %3 = icmp eq i32 %1, %2 -; CHECK-DAG: DemandedBits: 0xFF for %1 = and i32 %a, 255 -; CHECK-DAG: DemandedBits: 0xF for %2 = ashr i32 %1, 4 +; CHECK-DAG: DemandedBits: 0xFFF for %1 = and i32 %a, 255 +; CHECK-DAG: DemandedBits: 0xFF for %2 = ashr i32 %1, 4 define i1 @test_icmp2(i32 %a, i32 %b) { %1 = and i32 %a, 255 %2 = ashr i32 %1, 4 %3 = icmp eq i32 %1, %2 ret i1 %3 } + +; CHECK-LABEL: 'test_icmp3' +; CHECK-DAG: DemandedBits: 0xFFFFFFFF for %1 = and i32 %a, 255 +; CHECK-DAG: DemandedBits: 0x1 for %2 = icmp eq i32 -1, %1 +define i1 @test_icmp3(i32 %a) { + %1 = and i32 %a, 255 + %2 = icmp eq i32 -1, %1 + ret i1 %2 +} From 6a2c84c86170286f811d0676949d5a168ef24b77 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Mon, 25 Jan 2016 22:08:25 +0000 Subject: [PATCH 0078/1132] X86ISelLowering: Fix cmov(cmov) special lowering bug There's a special case in EmitLoweredSelect() that produces an improved lowering for cmov(cmov) patterns. However this special lowering is currently broken if the inner cmov has multiple users so this patch stops using it in this case. If you wonder why this wasn't fixed by continuing to use the special lowering and inserting a 2nd PHI for the inner cmov: I believe this would incur additional copies/register pressure so the special lowering does not improve upon the normal one anymore in this case. This fixes http://llvm.org/PR26256 (= rdar://24329747) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258729 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 64d9b9e7834039a41e68e2105377467fc0c8b764) --- lib/Target/X86/X86ISelLowering.cpp | 3 +- test/CodeGen/X86/cmovcmov.ll | 49 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9b5920ab605..8571311a79b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -21955,7 +21955,8 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, if (LastCMOV == MI && NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() && NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() && - NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg()) { + NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg() && + NextMIIt->getOperand(1).isKill()) { CascadedCMOV = &*NextMIIt; } diff --git a/test/CodeGen/X86/cmovcmov.ll b/test/CodeGen/X86/cmovcmov.ll index d3d9748d653..9363d31866d 100644 --- a/test/CodeGen/X86/cmovcmov.ll +++ b/test/CodeGen/X86/cmovcmov.ll @@ -224,3 +224,52 @@ entry: } attributes #0 = { nounwind } + +@g8 = global i8 0 + +; The following test failed because llvm had a bug where a structure like: +; +; %vreg12 = CMOV_GR8 %vreg7, %vreg11 ... (lt) +; %vreg13 = CMOV_GR8 %vreg12, %vreg11 ... (gt) +; +; was lowered to: +; +; The first two cmovs got expanded to: +; BB#0: +; JL_1 BB#9 +; BB#7: +; JG_1 BB#9 +; BB#8: +; BB#9: +; vreg12 = phi(vreg7, BB#8, vreg11, BB#0, vreg12, BB#7) +; vreg13 = COPY vreg12 +; Which was invalid as %vreg12 is not the same value as %vreg13 + +; CHECK-LABEL: no_cascade_opt: +; CMOV-DAG: cmpl %edx, %esi +; CMOV-DAG: movb $20, %al +; CMOV-DAG: movb $20, %dl +; CMOV: jl [[BB0:.LBB[0-9_]+]] +; CMOV: movb %cl, %dl +; CMOV: [[BB0]]: +; CMOV: jg [[BB1:.LBB[0-9_]+]] +; CMOV: movb %dl, %al +; CMOV: [[BB1]]: +; CMOV: testl %edi, %edi +; CMOV: je [[BB2:.LBB[0-9_]+]] +; CMOV: movb %dl, %al +; CMOV: [[BB2]]: +; CMOV: movb %al, g8(%rip) +; CMOV: retq +define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) { +entry: + %c0 = icmp eq i32 %v0, 0 + %c1 = icmp slt i32 %v1, %v2 + %c2 = icmp sgt i32 %v1, %v2 + %trunc = trunc i32 %v3 to i8 + %sel0 = select i1 %c1, i8 20, i8 %trunc + %sel1 = select i1 %c2, i8 20, i8 %sel0 + %sel2 = select i1 %c0, i8 %sel1, i8 %sel0 + store volatile i8 %sel2, i8* @g8 + ret void +} From c7c252754abb22da2879bad10cc3a5c9624dfc18 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 26 Jan 2016 00:43:50 +0000 Subject: [PATCH 0079/1132] LiveIntervalAnalysis: Cleanup handleMove{Down|Up}() functions, NFC These two functions are hard to reason about. This commit makes the code more comprehensible: - Use four distinct variables (OldIdxIn, OldIdxOut, NewIdxIn, NewIdxOut) with a fixed value instead of a changing iterator I that points to different things during the function. - Remove the early explanation before the function in favor of more detailed comments inside the function. Should have more/clearer comments now stating which conditions are tested and which invariants hold at different points in the functions. The behaviour of the code was not changed. I hope that this will make it easier to review the changes in http://reviews.llvm.org/D9067 which I will adapt next. Differential Revision: http://reviews.llvm.org/D16379 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258756 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 1b9e9d8523378ec098b029cf01f229f5c43a629d) --- include/llvm/CodeGen/SlotIndexes.h | 6 + lib/CodeGen/LiveIntervalAnalysis.cpp | 272 ++++++++++++++------------- 2 files changed, 147 insertions(+), 131 deletions(-) diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 7b621bee259..71982e81fb1 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -213,6 +213,12 @@ namespace llvm { return A.listEntry()->getIndex() < B.listEntry()->getIndex(); } + /// Return true if A referes to the same or an earlier instruction as B. + /// This is equivalent to !isEarlierInstr(B, A). + static bool isEarlierEqualInstr(SlotIndex A, SlotIndex B) { + return !isEarlierInstr(B, A); + } + /// Return the distance from this index to the given one. int distance(SlotIndex other) const { return other.getIndex() - getIndex(); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index a6dd48913dd..bcc276f0e37 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1021,172 +1021,182 @@ class LiveIntervals::HMEditor { } /// Update LR to reflect an instruction has been moved downwards from OldIdx - /// to NewIdx. - /// - /// 1. Live def at OldIdx: - /// Move def to NewIdx, assert endpoint after NewIdx. - /// - /// 2. Live def at OldIdx, killed at NewIdx: - /// Change to dead def at NewIdx. - /// (Happens when bundling def+kill together). - /// - /// 3. Dead def at OldIdx: - /// Move def to NewIdx, possibly across another live value. - /// - /// 4. Def at OldIdx AND at NewIdx: - /// Remove segment [OldIdx;NewIdx) and value defined at OldIdx. - /// (Happens when bundling multiple defs together). - /// - /// 5. Value read at OldIdx, killed before NewIdx: - /// Extend kill to NewIdx. - /// + /// to NewIdx (OldIdx < NewIdx). void handleMoveDown(LiveRange &LR) { - // First look for a kill at OldIdx. - LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); LiveRange::iterator E = LR.end(); - // Is LR even live at OldIdx? - if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + // Segment going into OldIdx. + LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex()); + + // No value live before or after OldIdx? Nothing to do. + if (OldIdxIn == E || SlotIndex::isEarlierInstr(OldIdx, OldIdxIn->start)) return; - // Handle a live-in value. - if (!SlotIndex::isSameInstr(I->start, OldIdx)) { - bool isKill = SlotIndex::isSameInstr(OldIdx, I->end); + LiveRange::iterator OldIdxOut; + // Do we have a value live-in to OldIdx? + if (SlotIndex::isEarlierInstr(OldIdxIn->start, OldIdx)) { // If the live-in value already extends to NewIdx, there is nothing to do. - if (!SlotIndex::isEarlierInstr(I->end, NewIdx)) + if (SlotIndex::isEarlierEqualInstr(NewIdx, OldIdxIn->end)) return; // Aggressively remove all kill flags from the old kill point. // Kill flags shouldn't be used while live intervals exist, they will be // reinserted by VirtRegRewriter. - if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end)) + if (MachineInstr *KillMI = LIS.getInstructionFromIndex(OldIdxIn->end)) for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) if (MO->isReg() && MO->isUse()) MO->setIsKill(false); - // Adjust I->end to reach NewIdx. This may temporarily make LR invalid by - // overlapping ranges. Case 5 above. - I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); - // If this was a kill, there may also be a def. Otherwise we're done. + // Adjust OldIdxIn->end to reach NewIdx. This may temporarily make LR + // invalid by overlapping ranges. Case 5 above. + bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end); + OldIdxIn->end = NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber()); + // If this was not a kill, then there was no def and we're done. if (!isKill) return; - ++I; + + // Did we have a Def at OldIdx? + OldIdxOut = std::next(OldIdxIn); + if (OldIdxOut == E || !SlotIndex::isSameInstr(OldIdx, OldIdxOut->start)) + return; + } else { + OldIdxOut = OldIdxIn; } - // Check for a def at OldIdx. - if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start)) - return; - // We have a def at OldIdx. - VNInfo *DefVNI = I->valno; - assert(DefVNI->def == I->start && "Inconsistent def"); - DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); - // If the defined value extends beyond NewIdx, just move the def down. - // This is case 1 above. - if (SlotIndex::isEarlierInstr(NewIdx, I->end)) { - I->start = DefVNI->def; + // If we are here then there is a Definition at OldIdx. OldIdxOut points + // to the segment starting there. + assert(OldIdxOut != E && SlotIndex::isSameInstr(OldIdx, OldIdxOut->start) && + "No def?"); + VNInfo *OldIdxVNI = OldIdxOut->valno; + assert(OldIdxVNI->def == OldIdxOut->start && "Inconsistent def"); + + // If the defined value extends beyond NewIdx, just move the beginning + // of the segment to NewIdx. + SlotIndex NewIdxDef = NewIdx.getRegSlot(OldIdxOut->start.isEarlyClobber()); + if (SlotIndex::isEarlierInstr(NewIdxDef, OldIdxOut->end)) { + OldIdxVNI->def = NewIdxDef; + OldIdxOut->start = OldIdxVNI->def; return; } - // The remaining possibilities are now: - // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx). - // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot(). - // In either case, it is possible that there is an existing def at NewIdx. - assert((I->end == OldIdx.getDeadSlot() || - SlotIndex::isSameInstr(I->end, NewIdx)) && + + // If we are here then we have a Definition at OldIdx which ends before + // NewIdx. Moving across unrelated defs is not allowed; That means we either + // had a dead-def at OldIdx or the OldIdxOut segment ends at NewIdx. + assert((OldIdxOut->end == OldIdx.getDeadSlot() || + SlotIndex::isSameInstr(OldIdxOut->end, NewIdxDef)) && "Cannot move def below kill"); - LiveRange::iterator NewI = LR.advanceTo(I, NewIdx.getRegSlot()); - if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { - // There is an existing def at NewIdx, case 4 above. The def at OldIdx is - // coalesced into that value. - assert(NewI->valno != DefVNI && "Multiple defs of value?"); - LR.removeValNo(DefVNI); - return; + // Is there an existing Def at NewIdx? + LiveRange::iterator AfterNewIdx + = LR.advanceTo(OldIdxOut, NewIdx.getRegSlot()); + if (AfterNewIdx != E && + SlotIndex::isSameInstr(AfterNewIdx->start, NewIdxDef)) { + // There is an existing def at NewIdx. The def at OldIdx is coalesced into + // that value. + assert(AfterNewIdx->valno != OldIdxVNI && "Multiple defs of value?"); + LR.removeValNo(OldIdxVNI); + } else { + // There was no existing def at NewIdx. We need to create a dead def + // at NewIdx. Shift segments over the old OldIdxOut segment, this frees + // a new segment at the place where we want to construct the dead def. + // |- OldIdxOut -| |- X0 -| ... |- Xn -| |- AfterNewIdx -| + // => |- X0/OldIdxOut -| ... |- Xn -| |- undef/NewS. -| |- AfterNewIdx -| + assert(AfterNewIdx != OldIdxOut && "Inconsistent iterators"); + std::copy(std::next(OldIdxOut), AfterNewIdx, OldIdxOut); + // We can reuse OldIdxVNI now. + LiveRange::iterator NewSegment = std::prev(AfterNewIdx); + VNInfo *NewSegmentVNI = OldIdxVNI; + NewSegmentVNI->def = NewIdxDef; + *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(), + NewSegmentVNI); } - // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. - // If the def at OldIdx was dead, we allow it to be moved across other LR - // values. The new range should be placed immediately before NewI, move any - // intermediate ranges up. - assert(NewI != I && "Inconsistent iterators"); - std::copy(std::next(I), NewI, I); - *std::prev(NewI) - = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } /// Update LR to reflect an instruction has been moved upwards from OldIdx - /// to NewIdx. - /// - /// 1. Live def at OldIdx: - /// Hoist def to NewIdx. - /// - /// 2. Dead def at OldIdx: - /// Hoist def+end to NewIdx, possibly move across other values. - /// - /// 3. Dead def at OldIdx AND existing def at NewIdx: - /// Remove value defined at OldIdx, coalescing it with existing value. - /// - /// 4. Live def at OldIdx AND existing def at NewIdx: - /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx. - /// (Happens when bundling multiple defs together). - /// - /// 5. Value killed at OldIdx: - /// Hoist kill to NewIdx, then scan for last kill between NewIdx and - /// OldIdx. - /// + /// to NewIdx (NewIdx < OldIdx). void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { - // First look for a kill at OldIdx. - LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); LiveRange::iterator E = LR.end(); - // Is LR even live at OldIdx? - if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + // Segment going into OldIdx. + LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex()); + + // No value live before or after OldIdx? Nothing to do. + if (OldIdxIn == E || SlotIndex::isEarlierInstr(OldIdx, OldIdxIn->start)) return; - // Handle a live-in value. - if (!SlotIndex::isSameInstr(I->start, OldIdx)) { - // If the live-in value isn't killed here, there is nothing to do. - if (!SlotIndex::isSameInstr(OldIdx, I->end)) + LiveRange::iterator OldIdxOut; + // Do we have a value live-in to OldIdx? + if (SlotIndex::isEarlierInstr(OldIdxIn->start, OldIdx)) { + // If the live-in value isn't killed here, then we have no Def at + // OldIdx, moreover the value must be live at NewIdx so there is nothing + // to do. + bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end); + if (!isKill) return; - // Adjust I->end to end at NewIdx. If we are hoisting a kill above - // another use, we need to search for that use. Case 5 above. - I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); - ++I; - // If OldIdx also defines a value, there couldn't have been another use. - if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { - // No def, search for the new kill. + + // At this point we have to move OldIdxIn->end back to the nearest + // previous use but no further than NewIdx. Moreover OldIdx is a Def then + // we cannot have any intermediate uses or the move would be illegal. + + OldIdxOut = std::next(OldIdxIn); + // Did we have a Def at OldIdx? + if (OldIdxOut == E || !SlotIndex::isSameInstr(OldIdx, OldIdxOut->start)) { + // No def, search for the nearest previous use. // This can never be an early clobber kill since there is no def. - std::prev(I)->end = findLastUseBefore(Reg, LaneMask).getRegSlot(); + OldIdxIn->end = findLastUseBefore(Reg, LaneMask).getRegSlot(); + // We are done if there is no def at OldIdx. return; + } else { + // There can't have been any intermediate uses or defs, so move + // OldIdxIn->end to NewIdx. + OldIdxIn->end = NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber()); } + } else { + OldIdxOut = OldIdxIn; } - // Now deal with the def at OldIdx. - assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?"); - VNInfo *DefVNI = I->valno; - assert(DefVNI->def == I->start && "Inconsistent def"); - DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); - - // Check for an existing def at NewIdx. - LiveRange::iterator NewI = LR.find(NewIdx.getRegSlot()); - if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { - assert(NewI->valno != DefVNI && "Same value defined more than once?"); - // There is an existing def at NewIdx. - if (I->end.isDead()) { - // Case 3: Remove the dead def at OldIdx. - LR.removeValNo(DefVNI); - return; + // If we are here then there is a Definition at OldIdx. OldIdxOut points + // to the segment starting there. + assert(OldIdxOut != E && SlotIndex::isSameInstr(OldIdx, OldIdxOut->start) && + "No def?"); + VNInfo *OldIdxVNI = OldIdxOut->valno; + assert(OldIdxVNI->def == OldIdxOut->start && "Inconsistent def"); + bool OldIdxDefIsDead = OldIdxOut->end.isDead(); + + // Is there an existing def at NewIdx? + SlotIndex NewIdxDef = NewIdx.getRegSlot(OldIdxOut->start.isEarlyClobber()); + LiveRange::iterator NewIdxOut = LR.find(NewIdx.getRegSlot()); + if (SlotIndex::isSameInstr(NewIdxOut->start, NewIdx)) { + assert(NewIdxOut->valno != OldIdxVNI && + "Same value defined more than once?"); + // If OldIdx was a dead def remove it. + if (!OldIdxDefIsDead) { + // Case 3: Remove segment starting at NewIdx and move begin of OldIdxOut + // to NewIdx so it can take its place. + OldIdxVNI->def = NewIdxDef; + OldIdxOut->start = NewIdxDef; + LR.removeValNo(NewIdxOut->valno); + } else { + // Case 4: Remove the dead def at OldIdx. + LR.removeValNo(OldIdxVNI); + } + } else { + // Previously nothing was live after NewIdx, so all we have to do now is + // move the begin of OldIdxOut to NewIdx. + if (!OldIdxDefIsDead) { + // Leave the end point of a live def. + OldIdxVNI->def = NewIdxDef; + OldIdxOut->start = NewIdxDef; + } else { + // OldIdxVNI is a dead def. It may have been moved across other values + // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut) + // down one position. + // |- X0/NewIdxOut -| ... |- Xn-1 -| |- Xn/OldIdxOut -| |- next - | + // => |- undef/NewIdxOut -| |- X0 -| ... |- Xn-1 -| |- next -| + std::copy_backward(NewIdxOut, OldIdxOut, std::next(OldIdxOut)); + // OldIdxVNI can be reused now to build a new dead def segment. + LiveRange::iterator NewSegment = NewIdxOut; + VNInfo *NewSegmentVNI = OldIdxVNI; + *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(), + NewSegmentVNI); + NewSegmentVNI->def = NewIdxDef; } - // Case 4: Replace def at NewIdx with live def at OldIdx. - I->start = DefVNI->def; - LR.removeValNo(NewI->valno); - return; - } - - // There is no existing def at NewIdx. Hoist DefVNI. - if (!I->end.isDead()) { - // Leave the end point of a live def. - I->start = DefVNI->def; - return; } - - // DefVNI is a dead def. It may have been moved across other values in LR, - // so move I up to NewI. Slide [NewI;I) down one position. - std::copy_backward(NewI, I, std::next(I)); - *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } void updateRegMaskSlots() { From 3f6b575d51d99337740ec4e736484239e76df46a Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 26 Jan 2016 01:40:48 +0000 Subject: [PATCH 0080/1132] LiveIntervalAnalysis: Improve some comments As recommended by Justin. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258771 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 632580f369b4abbda80ed248cfe4086cfea3ecc5) --- include/llvm/CodeGen/SlotIndexes.h | 2 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 71982e81fb1..a83e1587c7c 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -213,7 +213,7 @@ namespace llvm { return A.listEntry()->getIndex() < B.listEntry()->getIndex(); } - /// Return true if A referes to the same or an earlier instruction as B. + /// Return true if A refers to the same instruction as B or an earlier one. /// This is equivalent to !isEarlierInstr(B, A). static bool isEarlierEqualInstr(SlotIndex A, SlotIndex B) { return !isEarlierInstr(B, A); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index bcc276f0e37..bab3d0d01bd 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1045,7 +1045,7 @@ class LiveIntervals::HMEditor { if (MO->isReg() && MO->isUse()) MO->setIsKill(false); // Adjust OldIdxIn->end to reach NewIdx. This may temporarily make LR - // invalid by overlapping ranges. Case 5 above. + // invalid by overlapping ranges. bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end); OldIdxIn->end = NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber()); // If this was not a kill, then there was no def and we're done. @@ -1166,13 +1166,13 @@ class LiveIntervals::HMEditor { "Same value defined more than once?"); // If OldIdx was a dead def remove it. if (!OldIdxDefIsDead) { - // Case 3: Remove segment starting at NewIdx and move begin of OldIdxOut - // to NewIdx so it can take its place. + // Remove segment starting at NewIdx and move begin of OldIdxOut to + // NewIdx so it can take its place. OldIdxVNI->def = NewIdxDef; OldIdxOut->start = NewIdxDef; LR.removeValNo(NewIdxOut->valno); } else { - // Case 4: Remove the dead def at OldIdx. + // Simply remove the dead def at OldIdx. LR.removeValNo(OldIdxVNI); } } else { From c056180f81525e2faa395274970fc46091ec47ca Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 26 Jan 2016 06:10:15 +0000 Subject: [PATCH 0081/1132] [X86] Mark LDS/LES as not being allowed in 64-bit mode. Their opcodes are used as part of the VEX prefix in 64-bit mode. Clearly the disassembler implicitly decoded them as AVX instructions in 64-bit mode, but I think the AsmParser would have encoded them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258793 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit a57544039bc403cc069fc41cdec2eb986a172db3) --- lib/Target/X86/X86InstrSystem.td | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index a97d1e5c86d..97dcd40d212 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -339,9 +339,11 @@ def POPGS64 : I<0xa9, RawFrm, (outs), (ins), def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lds{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16; + "lds{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16, + Requires<[Not64BitMode]>; def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lds{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32; + "lds{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32, + Requires<[Not64BitMode]>; def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "lss{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16; @@ -351,9 +353,11 @@ def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), "lss{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "les{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16; + "les{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16, + Requires<[Not64BitMode]>; def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "les{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32; + "les{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32, + Requires<[Not64BitMode]>; def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "lfs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16; From aaaae03076beb1eed5fe726ecb231f78b52ce5b1 Mon Sep 17 00:00:00 2001 From: Igor Laevsky Date: Tue, 26 Jan 2016 13:31:11 +0000 Subject: [PATCH 0082/1132] [DebugInfo] Fix DWARFDebugFrame instruction operand ordering We can't rely on the evalution order of function arguments. Differential Revision: http://reviews.llvm.org/D16509 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258806 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 622ed26f8a0ada8e164db8941fbfacaa41e3476b) --- lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 1aa31be71fe..72dc95e977a 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -160,18 +160,26 @@ void FrameEntry::parseInstructions(DataExtractor Data, uint32_t *Offset, case DW_CFA_offset_extended: case DW_CFA_register: case DW_CFA_def_cfa: - case DW_CFA_val_offset: + case DW_CFA_val_offset: { // Operands: ULEB128, ULEB128 - addInstruction(Opcode, Data.getULEB128(Offset), - Data.getULEB128(Offset)); + // Note: We can not embed getULEB128 directly into function + // argument list. getULEB128 changes Offset and order of evaluation + // for arguments is unspecified. + auto op1 = Data.getULEB128(Offset); + auto op2 = Data.getULEB128(Offset); + addInstruction(Opcode, op1, op2); break; + } case DW_CFA_offset_extended_sf: case DW_CFA_def_cfa_sf: - case DW_CFA_val_offset_sf: + case DW_CFA_val_offset_sf: { // Operands: ULEB128, SLEB128 - addInstruction(Opcode, Data.getULEB128(Offset), - Data.getSLEB128(Offset)); + // Note: see comment for the previous case + auto op1 = Data.getULEB128(Offset); + auto op2 = (uint64_t)Data.getSLEB128(Offset); + addInstruction(Opcode, op1, op2); break; + } case DW_CFA_def_cfa_expression: case DW_CFA_expression: case DW_CFA_val_expression: From 2aa3b13560c0dec06072fa3ee4affc9d81ad4dbe Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 26 Jan 2016 16:44:37 +0000 Subject: [PATCH 0083/1132] Reflect the MC/MCDisassembler split on the include/ level. No functional change, just moving code around. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258818 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit b6242a88c246e87d54413d32d69afead139c38e3) --- include/llvm/MC/{ => MCDisassembler}/MCDisassembler.h | 6 +++--- include/llvm/MC/{ => MCDisassembler}/MCExternalSymbolizer.h | 6 +++--- include/llvm/MC/{ => MCDisassembler}/MCRelocationInfo.h | 4 ++-- include/llvm/MC/{ => MCDisassembler}/MCSymbolizer.h | 6 +++--- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp | 2 +- lib/MC/CMakeLists.txt | 1 - lib/MC/MCDisassembler/CMakeLists.txt | 5 +++-- lib/MC/MCDisassembler/Disassembler.cpp | 6 +++--- lib/MC/MCDisassembler/MCDisassembler.cpp | 4 ++-- lib/MC/MCDisassembler/MCExternalSymbolizer.cpp | 2 +- lib/MC/MCDisassembler/MCRelocationInfo.cpp | 2 +- lib/MC/{ => MCDisassembler}/MCSymbolizer.cpp | 2 +- lib/Target/AArch64/Disassembler/AArch64Disassembler.h | 2 +- lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h | 2 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 2 +- lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp | 2 +- lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp | 2 +- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 2 +- lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp | 2 +- lib/Target/Sparc/Disassembler/SparcDisassembler.cpp | 2 +- lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp | 2 +- lib/Target/X86/Disassembler/X86Disassembler.cpp | 2 +- lib/Target/X86/Disassembler/X86Disassembler.h | 2 +- lib/Target/XCore/Disassembler/XCoreDisassembler.cpp | 2 +- tools/llvm-mc/Disassembler.cpp | 2 +- tools/llvm-objdump/MachODump.cpp | 2 +- tools/llvm-objdump/llvm-objdump.cpp | 6 +++--- tools/llvm-rtdyld/llvm-rtdyld.cpp | 2 +- tools/sancov/sancov.cc | 2 +- 30 files changed, 43 insertions(+), 43 deletions(-) rename include/llvm/MC/{ => MCDisassembler}/MCDisassembler.h (96%) rename include/llvm/MC/{ => MCDisassembler}/MCExternalSymbolizer.h (92%) rename include/llvm/MC/{ => MCDisassembler}/MCRelocationInfo.h (93%) rename include/llvm/MC/{ => MCDisassembler}/MCSymbolizer.h (95%) rename lib/MC/{ => MCDisassembler}/MCSymbolizer.cpp (89%) diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler/MCDisassembler.h similarity index 96% rename from include/llvm/MC/MCDisassembler.h rename to include/llvm/MC/MCDisassembler/MCDisassembler.h index 57c40d660f6..e8821337412 100644 --- a/include/llvm/MC/MCDisassembler.h +++ b/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -6,12 +6,12 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCDISASSEMBLER_H -#define LLVM_MC_MCDISASSEMBLER_H +#ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H +#define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H #include "llvm-c/Disassembler.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/MC/MCSymbolizer.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" #include "llvm/Support/DataTypes.h" namespace llvm { diff --git a/include/llvm/MC/MCExternalSymbolizer.h b/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h similarity index 92% rename from include/llvm/MC/MCExternalSymbolizer.h rename to include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h index 2c7d23707c9..bd3e5d4638e 100644 --- a/include/llvm/MC/MCExternalSymbolizer.h +++ b/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h @@ -13,11 +13,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCEXTERNALSYMBOLIZER_H -#define LLVM_MC_MCEXTERNALSYMBOLIZER_H +#ifndef LLVM_MC_MCDISASSEMBLER_MCEXTERNALSYMBOLIZER_H +#define LLVM_MC_MCDISASSEMBLER_MCEXTERNALSYMBOLIZER_H #include "llvm-c/Disassembler.h" -#include "llvm/MC/MCSymbolizer.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" #include namespace llvm { diff --git a/include/llvm/MC/MCRelocationInfo.h b/include/llvm/MC/MCDisassembler/MCRelocationInfo.h similarity index 93% rename from include/llvm/MC/MCRelocationInfo.h rename to include/llvm/MC/MCDisassembler/MCRelocationInfo.h index 02ff1921b07..25334f755ee 100644 --- a/include/llvm/MC/MCRelocationInfo.h +++ b/include/llvm/MC/MCDisassembler/MCRelocationInfo.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCRELOCATIONINFO_H -#define LLVM_MC_MCRELOCATIONINFO_H +#ifndef LLVM_MC_MCDISASSEMBLER_MCRELOCATIONINFO_H +#define LLVM_MC_MCDISASSEMBLER_MCRELOCATIONINFO_H #include "llvm/Support/Compiler.h" diff --git a/include/llvm/MC/MCSymbolizer.h b/include/llvm/MC/MCDisassembler/MCSymbolizer.h similarity index 95% rename from include/llvm/MC/MCSymbolizer.h rename to include/llvm/MC/MCDisassembler/MCSymbolizer.h index 2ef17673f09..713467c0a3e 100644 --- a/include/llvm/MC/MCSymbolizer.h +++ b/include/llvm/MC/MCDisassembler/MCSymbolizer.h @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCSYMBOLIZER_H -#define LLVM_MC_MCSYMBOLIZER_H +#ifndef LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H +#define LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H -#include "llvm/MC/MCRelocationInfo.h" +#include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index 58ce88a68f2..5371f983962 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -12,7 +12,7 @@ #include "RuntimeDyldImpl.h" #include "llvm/ExecutionEngine/RuntimeDyldChecker.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Path.h" #include diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 8c015644d8a..d1aac8104a3 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -38,7 +38,6 @@ add_llvm_library(LLVMMC MCSubtargetInfo.cpp MCSymbol.cpp MCSymbolELF.cpp - MCSymbolizer.cpp MCTargetOptions.cpp MCValue.cpp MCWin64EH.cpp diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt index f266f8fcd30..e940afc56f5 100644 --- a/lib/MC/MCDisassembler/CMakeLists.txt +++ b/lib/MC/MCDisassembler/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMMCDisassembler Disassembler.cpp - MCRelocationInfo.cpp - MCExternalSymbolizer.cpp MCDisassembler.cpp + MCExternalSymbolizer.cpp + MCRelocationInfo.cpp + MCSymbolizer.cpp ) diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 82063fb7469..21e8748b797 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -11,14 +11,14 @@ #include "llvm-c/Disassembler.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCRelocationInfo.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCRelocationInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbolizer.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/MC/MCDisassembler/MCDisassembler.cpp b/lib/MC/MCDisassembler/MCDisassembler.cpp index 1084e5ea766..3a4f7382bd3 100644 --- a/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCExternalSymbolizer.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp index 5fc2ca44f5d..1969c5dc66a 100644 --- a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp +++ b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCExternalSymbolizer.h" +#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/lib/MC/MCDisassembler/MCRelocationInfo.cpp index 08158e7f737..904553bff54 100644 --- a/lib/MC/MCDisassembler/MCRelocationInfo.cpp +++ b/lib/MC/MCDisassembler/MCRelocationInfo.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCRelocationInfo.h" +#include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm-c/Disassembler.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/MC/MCSymbolizer.cpp b/lib/MC/MCDisassembler/MCSymbolizer.cpp similarity index 89% rename from lib/MC/MCSymbolizer.cpp rename to lib/MC/MCDisassembler/MCSymbolizer.cpp index 4080e40b3f1..c0f707d356c 100644 --- a/lib/MC/MCSymbolizer.cpp +++ b/lib/MC/MCDisassembler/MCSymbolizer.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCSymbolizer.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" using namespace llvm; diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h index 7fb57adfeeb..e475e505e7d 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H #define LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" namespace llvm { diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h index 12b8450b13c..49e84496379 100644 --- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h @@ -14,7 +14,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64EXTERNALSYMBOLIZER_H #define LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64EXTERNALSYMBOLIZER_H -#include "llvm/MC/MCExternalSymbolizer.h" +#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" namespace llvm { diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 5a63f04d6ab..52e1e91203e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -20,7 +20,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e63defed228..21ad775ae33 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 4468132588c..482bcf90251 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -12,7 +12,7 @@ #include "llvm-c/Disassembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCRelocationInfo.h" +#include "llvm/MC/MCDisassembler/MCRelocationInfo.h" using namespace llvm; using namespace object; diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 4a9c3413cb2..fc39d096007 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -16,7 +16,7 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 3c1a771f97e..ca149a19d63 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -15,7 +15,7 @@ #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 1fc84fb7655..fd3c813acf4 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index 51751ec511c..8585d5c4392 100644 --- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -14,7 +14,7 @@ #include "Sparc.h" #include "SparcRegisterInfo.h" #include "SparcSubtarget.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCContext.h" diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index bf67b75d533..70fa6888fab 100644 --- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "SystemZ.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index ce8fcf16466..b9985675ffd 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -17,7 +17,7 @@ #include "X86Disassembler.h" #include "X86DisassemblerDecoder.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index d7f426b2641..a43bbcd4354 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -75,7 +75,7 @@ #define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLER_H #include "X86DisassemblerDecoderCommon.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" namespace llvm { diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index aaf267af531..2e8f762458a 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -15,7 +15,7 @@ #include "XCore.h" #include "XCoreRegisterInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp index 5ffeffc0768..8185947fc5e 100644 --- a/tools/llvm-mc/Disassembler.cpp +++ b/tools/llvm-mc/Disassembler.cpp @@ -16,7 +16,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 258c0b520a3..6917f705de5 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -22,7 +22,7 @@ #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrDesc.h" diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index d5ae5de4b5a..87ea52ace61 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -24,18 +24,18 @@ #include "llvm/CodeGen/FaultMaps.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCRelocationInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/Archive.h" -#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp index 58c56f5cae4..81b9c7dc502 100644 --- a/tools/llvm-rtdyld/llvm-rtdyld.cpp +++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp @@ -19,7 +19,7 @@ #include "llvm/ExecutionEngine/RuntimeDyldChecker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/tools/sancov/sancov.cc b/tools/sancov/sancov.cc index 7daabe5eb3e..39471460760 100644 --- a/tools/sancov/sancov.cc +++ b/tools/sancov/sancov.cc @@ -14,7 +14,7 @@ #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrAnalysis.h" From 95ccd89cf80ef21e0bbf7f28238823d8f8b6b39e Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 26 Jan 2016 18:20:49 +0000 Subject: [PATCH 0084/1132] Update the comments for the macho-invalid-zero-ncmds test and fix llvm-objdump when printing the Mach Header to print the unknown cputype and cpusubtype fields as decimal instead of not printing them at all. And change the test to check for that. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258826 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit ce2bf8c364b33386bdb75ff87b4f69fe5ff53cfb) --- test/Object/macho-invalid.test | 8 ++++++-- tools/llvm-objdump/MachODump.cpp | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/test/Object/macho-invalid.test b/test/Object/macho-invalid.test index 781c9367c1e..77a3944bdd6 100644 --- a/test/Object/macho-invalid.test +++ b/test/Object/macho-invalid.test @@ -1,5 +1,9 @@ -// No crash, might not be totally invalid -RUN: llvm-objdump -private-headers %p/Inputs/macho-invalid-zero-ncmds +// An odd Mach-O file, with just a mach header with all but the magic field +// and filetype zeros. The cputype and cpusubtype fields being zero are invalid, +// but that does not mater for the most part to display some of the contents. +RUN: llvm-objdump -private-headers %p/Inputs/macho-invalid-zero-ncmds -macho \ +RUN: | FileCheck -check-prefix ZERO-NCMDS %s +ZERO-NCMDS: MH_MAGIC_64 0 0 0x00 OBJECT 0 0 0x00000000 RUN: not llvm-objdump -private-headers %p/Inputs/macho64-invalid-incomplete-load-command 2>&1 \ RUN: | FileCheck -check-prefix INCOMPLETE-LOADC %s diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 6917f705de5..6fd6e3f448b 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -7145,6 +7145,10 @@ static void PrintMachHeader(uint32_t magic, uint32_t cputype, break; } break; + default: + outs() << format(" %7d", cputype); + outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK); + break; } if ((cpusubtype & MachO::CPU_SUBTYPE_MASK) == MachO::CPU_SUBTYPE_LIB64) { outs() << " LIB64"; From 1aab97969b08e7071d88e6ab645c61b98cce2381 Mon Sep 17 00:00:00 2001 From: Aditya Nandakumar Date: Tue, 26 Jan 2016 18:42:36 +0000 Subject: [PATCH 0085/1132] Reassociate: Reprocess RedoInsts after each inst Previously the RedoInsts was processed at the end of the block. However it was possible that it left behind some instructions that were not canonicalized. This should guarantee that any previous instruction in the basic block is canonicalized before we process a new instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258830 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit a2fd503e0ee45f26c0cd670c527e278b3d84bcbb) --- lib/Transforms/Scalar/Reassociate.cpp | 67 +++++++++++-------- .../Reassociate/prev_insts_canonicalized.ll | 57 ++++++++++++++++ .../Reassociate/reassoc-intermediate-fnegs.ll | 6 +- test/Transforms/Reassociate/xor_reassoc.ll | 4 +- 4 files changed, 101 insertions(+), 33 deletions(-) create mode 100644 test/Transforms/Reassociate/prev_insts_canonicalized.ll diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index bcadd4e2bee..a6fe51cc872 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -163,7 +163,8 @@ namespace { AU.addPreserved(); } private: - void BuildRankMap(Function &F); + void BuildRankMap(Function &F, ReversePostOrderTraversal &RPOT); + unsigned getRank(Value *V); void canonicalizeOperands(Instruction *I); void ReassociateExpression(BinaryOperator *I); @@ -246,7 +247,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1, return nullptr; } -void Reassociate::BuildRankMap(Function &F) { +void Reassociate::BuildRankMap(Function &F, + ReversePostOrderTraversal &RPOT) { unsigned i = 2; // Assign distinct ranks to function arguments. @@ -255,7 +257,6 @@ void Reassociate::BuildRankMap(Function &F) { DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n"); } - ReversePostOrderTraversal RPOT(&F); for (ReversePostOrderTraversal::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { BasicBlock *BB = *I; @@ -2259,13 +2260,28 @@ bool Reassociate::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; - // Calculate the rank map for F - BuildRankMap(F); + // Reassociate needs for each instruction to have its operands already + // processed, so we first perform a RPOT of the basic blocks so that + // when we process a basic block, all its dominators have been processed + // before. + ReversePostOrderTraversal RPOT(&F); + BuildRankMap(F, RPOT); MadeChange = false; - for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + for (BasicBlock *BI : RPOT) { + // Use a worklist to keep track of which instructions have been processed + // (and which insts won't be optimized again) so when redoing insts, + // optimize insts rightaway which won't be processed later. + SmallSet Worklist; + + // Insert all instructions in the BB + for (Instruction &I : *BI) + Worklist.insert(&I); + // Optimize every instruction in the basic block. - for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ) + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) { + // This instruction has been processed. + Worklist.erase(&*II); if (isInstructionTriviallyDead(&*II)) { EraseInst(&*II++); } else { @@ -2274,27 +2290,22 @@ bool Reassociate::runOnFunction(Function &F) { ++II; } - // Make a copy of all the instructions to be redone so we can remove dead - // instructions. - SetVector> ToRedo(RedoInsts); - // Iterate over all instructions to be reevaluated and remove trivially dead - // instructions. If any operand of the trivially dead instruction becomes - // dead mark it for deletion as well. Continue this process until all - // trivially dead instructions have been removed. - while (!ToRedo.empty()) { - Instruction *I = ToRedo.pop_back_val(); - if (isInstructionTriviallyDead(I)) - RecursivelyEraseDeadInsts(I, ToRedo); - } - - // Now that we have removed dead instructions, we can reoptimize the - // remaining instructions. - while (!RedoInsts.empty()) { - Instruction *I = RedoInsts.pop_back_val(); - if (isInstructionTriviallyDead(I)) - EraseInst(I); - else - OptimizeInst(I); + // If the above optimizations produced new instructions to optimize or + // made modifications which need to be redone, do them now if they won't + // be handled later. + while (!RedoInsts.empty()) { + Instruction *I = RedoInsts.pop_back_val(); + // Process instructions that won't be processed later, either + // inside the block itself or in another basic block (based on rank), + // since these will be processed later. + if ((I->getParent() != BI || !Worklist.count(I)) && + RankMap[I->getParent()] <= RankMap[BI]) { + if (isInstructionTriviallyDead(I)) + EraseInst(I); + else + OptimizeInst(I); + } + } } } diff --git a/test/Transforms/Reassociate/prev_insts_canonicalized.ll b/test/Transforms/Reassociate/prev_insts_canonicalized.ll new file mode 100644 index 00000000000..649761e57c9 --- /dev/null +++ b/test/Transforms/Reassociate/prev_insts_canonicalized.ll @@ -0,0 +1,57 @@ +; RUN: opt < %s -reassociate -S | FileCheck %s + +; These tests make sure that before processing insts +; any previous instructions are already canonicalized. +define i32 @foo(i32 %in) { +; CHECK-LABEL: @foo +; CHECK-NEXT: %factor = mul i32 %in, -4 +; CHECK-NEXT: %factor1 = mul i32 %in, 2 +; CHECK-NEXT: %_3 = add i32 %factor, 1 +; CHECK-NEXT: %_5 = add i32 %_3, %factor1 +; CHECK-NEXT: ret i32 %_5 + %_0 = add i32 %in, 1 + %_1 = mul i32 %in, -2 + %_2 = add i32 %_0, %_1 + %_3 = add i32 %_1, %_2 + %_4 = add i32 %_3, 1 + %_5 = add i32 %in, %_3 + ret i32 %_5 +} + +; CHECK-LABEL: @foo1 +define void @foo1(float %in, i1 %cmp) { +wrapper_entry: + br label %foo1 + +for.body: + %0 = fadd float %in1, %in1 + br label %foo1 + +foo1: + %_0 = fmul fast float %in, -3.000000e+00 + %_1 = fmul fast float %_0, 3.000000e+00 + %in1 = fadd fast float -3.000000e+00, %_1 + %in1use = fadd fast float %in1, %in1 + br label %for.body + + +} + +; CHECK-LABEL: @foo2 +define void @foo2(float %in, i1 %cmp) { +wrapper_entry: + br label %for.body + +for.body: +; If the operands of the phi are sheduled for processing before +; foo1 is processed, the invariant of reassociate are not preserved + %unused = phi float [%in1, %foo1], [undef, %wrapper_entry] + br label %foo1 + +foo1: + %_0 = fmul fast float %in, -3.000000e+00 + %_1 = fmul fast float %_0, 3.000000e+00 + %in1 = fadd fast float -3.000000e+00, %_1 + %in1use = fadd fast float %in1, %in1 + br label %for.body +} diff --git a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll index c2cdffce61e..7d82ef7e7a2 100644 --- a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll +++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -reassociate -S | FileCheck %s ; CHECK-LABEL: faddsubAssoc1 -; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500 -; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500 -; CHECK: fsub fast half [[TMP2]], [[TMP1]] +; CHECK: [[TMP1:%.*]] = fsub fast half 0xH8000, %a +; CHECK: [[TMP2:%.*]] = fadd fast half %b, [[TMP1]] +; CHECK: fmul fast half [[TMP2]], 0xH4500 ; CHECK: ret ; Input is A op (B op C) define half @faddsubAssoc1(half %a, half %b) { diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll index 0bed6f35880..a22689805fb 100644 --- a/test/Transforms/Reassociate/xor_reassoc.ll +++ b/test/Transforms/Reassociate/xor_reassoc.ll @@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) { %xor1 = xor i32 %xor, %and ret i32 %xor1 ; CHECK-LABEL: @xor_special2( -; CHECK: %xor = xor i32 %x, 123 -; CHECK: %xor1 = xor i32 %xor, %y +; CHECK: %xor = xor i32 %y, 123 +; CHECK: %xor1 = xor i32 %xor, %x ; CHECK: ret i32 %xor1 } From 8de8bd50dea7f03e81b22977f24c2b926018a870 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 26 Jan 2016 23:43:37 +0000 Subject: [PATCH 0086/1132] Fix identify_magic() to check that a file that starts with MH_MAGIC is at least as big as the mach header to be identified as a Mach-O file and make sure smaller files are not identified as a Mach-O files but as unknown files. Also fix identify_magic() so it looks at all 4 bytes of the filetype field when determining the type of the Mach-O file. Then fix the macho-invalid-header test case to check that it is an unknown file and make sure it does not get the error for object_error::parse_failed. And also update the unit tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258883 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 6bc5c395e182d3abd5ee113f1c2f5d6649870a8f) --- lib/Support/Path.cpp | 17 +++++++++++++++-- test/Object/macho-invalid.test | 3 ++- unittests/Support/Path.cpp | 30 +++++++++++++++++++----------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index 4952f59fc24..875bf75b1f0 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/COFF.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" @@ -1040,12 +1041,24 @@ file_magic identify_magic(StringRef Magic) { Magic[2] == char(0xFA) && (Magic[3] == char(0xCE) || Magic[3] == char(0xCF))) { /* Native endian */ - if (Magic.size() >= 16) type = Magic[14] << 8 | Magic[15]; + size_t MinSize; + if (Magic[3] == char(0xCE)) + MinSize = sizeof(MachO::mach_header); + else + MinSize = sizeof(MachO::mach_header_64); + if (Magic.size() >= MinSize) + type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15]; } else if ((Magic[0] == char(0xCE) || Magic[0] == char(0xCF)) && Magic[1] == char(0xFA) && Magic[2] == char(0xED) && Magic[3] == char(0xFE)) { /* Reverse endian */ - if (Magic.size() >= 14) type = Magic[13] << 8 | Magic[12]; + size_t MinSize; + if (Magic[0] == char(0xCE)) + MinSize = sizeof(MachO::mach_header); + else + MinSize = sizeof(MachO::mach_header_64); + if (Magic.size() >= MinSize) + type = Magic[15] << 24 | Magic[14] << 12 |Magic[13] << 8 | Magic[12]; } switch (type) { default: break; diff --git a/test/Object/macho-invalid.test b/test/Object/macho-invalid.test index 77a3944bdd6..cbd378a0332 100644 --- a/test/Object/macho-invalid.test +++ b/test/Object/macho-invalid.test @@ -56,7 +56,8 @@ RUN: | FileCheck -check-prefix INVALID-SECTION-IDX-SYMBOL-SEC-pax %s INVALID-SECTION-IDX-SYMBOL-SEC-pax: 0000000100000000 0f 42 0010 00000065 __mh_execute_header RUN: not llvm-objdump -private-headers %p/Inputs/macho-invalid-header 2>&1 | FileCheck -check-prefix INVALID-HEADER %s -INVALID-HEADER: Invalid data was encountered while parsing the file +INVALID-HEADER: The file was not recognized as a valid object file. +NOT-INVALID-HEADER: Invalid data was encountered while parsing the file. RUN: not llvm-objdump -private-headers %p/Inputs/macho64-invalid-incomplete-segment-load-command 2>&1 | FileCheck -check-prefix INCOMPLETE-SEGMENT-LOADC %s INCOMPLETE-SEGMENT-LOADC: Invalid data was encountered while parsing the file diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp index 3f626f87888..7bbac7ae9df 100644 --- a/unittests/Support/Path.cpp +++ b/unittests/Support/Path.cpp @@ -726,21 +726,29 @@ const char coff_import_library[] = "\x00\x00\xff\xff...."; const char elf_relocatable[] = { 0x7f, 'E', 'L', 'F', 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; const char macho_universal_binary[] = "\xca\xfe\xba\xbe...\0x00"; -const char macho_object[] = "\xfe\xed\xfa\xce..........\x00\x01"; -const char macho_executable[] = "\xfe\xed\xfa\xce..........\x00\x02"; +const char macho_object[] = + "\xfe\xed\xfa\xce........\x00\x00\x00\x01............"; +const char macho_executable[] = + "\xfe\xed\xfa\xce........\x00\x00\x00\x02............"; const char macho_fixed_virtual_memory_shared_lib[] = - "\xfe\xed\xfa\xce..........\x00\x03"; -const char macho_core[] = "\xfe\xed\xfa\xce..........\x00\x04"; -const char macho_preload_executable[] = "\xfe\xed\xfa\xce..........\x00\x05"; + "\xfe\xed\xfa\xce........\x00\x00\x00\x03............"; +const char macho_core[] = + "\xfe\xed\xfa\xce........\x00\x00\x00\x04............"; +const char macho_preload_executable[] = + "\xfe\xed\xfa\xce........\x00\x00\x00\x05............"; const char macho_dynamically_linked_shared_lib[] = - "\xfe\xed\xfa\xce..........\x00\x06"; -const char macho_dynamic_linker[] = "\xfe\xed\xfa\xce..........\x00\x07"; -const char macho_bundle[] = "\xfe\xed\xfa\xce..........\x00\x08"; -const char macho_dsym_companion[] = "\xfe\xed\xfa\xce..........\x00\x0a"; -const char macho_kext_bundle[] = "\xfe\xed\xfa\xce..........\x00\x0b"; + "\xfe\xed\xfa\xce........\x00\x00\x00\x06............"; +const char macho_dynamic_linker[] = + "\xfe\xed\xfa\xce........\x00\x00\x00\x07............"; +const char macho_bundle[] = + "\xfe\xed\xfa\xce........\x00\x00\x00\x08............"; +const char macho_dsym_companion[] = + "\xfe\xed\xfa\xce........\x00\x00\x00\x0a............"; +const char macho_kext_bundle[] = + "\xfe\xed\xfa\xce........\x00\x00\x00\x0b............"; const char windows_resource[] = "\x00\x00\x00\x00\x020\x00\x00\x00\xff"; const char macho_dynamically_linked_shared_lib_stub[] = - "\xfe\xed\xfa\xce..........\x00\x09"; + "\xfe\xed\xfa\xce........\x00\x00\x00\x09............"; TEST_F(FileSystemTest, Magic) { struct type { From 9ed96396f9e668d25d2e00b576ccca9eb4e19777 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Wed, 27 Jan 2016 02:43:28 +0000 Subject: [PATCH 0087/1132] [SimplifyCFG] Don't mistake icmp of and for a tree of comparisons SimplifyCFG tries to turn complex branch conditions into a switch. Some of it's logic attempts to reason about bitwise arithmetic produced by InstCombine. InstCombine can turn things like (X == 2) || (X == 3) into (X & 1) == 2 and so SimplifyCFG tries to detect when this occurs so that it can produce a switch instruction. However, the legality checking was not sufficient to determine whether or not this had occured. Correctly check this case by requiring that the right-hand side of the comparison be a power of two. This fixes PR26323. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258904 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit bc3672991cf112d5b1061345dfa8693400a9f823) --- lib/Transforms/Utils/SimplifyCFG.cpp | 5 ++-- test/Transforms/SimplifyCFG/switch_create.ll | 25 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 3125a2c359b..93ec53e9a5e 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -420,13 +420,14 @@ struct ConstantComparesGatherer { ConstantInt *RHSC; // Pattern match a special case - // (x & ~2^x) == y --> x == y || x == y|2^x + // (x & ~2^z) == y --> x == y || x == y|2^z // This undoes a transformation done by instcombine to fuse 2 compares. if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) { if (match(ICI->getOperand(0), m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) { APInt Not = ~RHSC->getValue(); - if (Not.isPowerOf2()) { + if (Not.isPowerOf2() && C->getValue().isPowerOf2() && + Not != C->getValue()) { // If we already have a value for the switch, it has to match! if(!setValueOnce(RHSVal)) return false; diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll index 490b7513a94..f895a9f0284 100644 --- a/test/Transforms/SimplifyCFG/switch_create.ll +++ b/test/Transforms/SimplifyCFG/switch_create.ll @@ -554,3 +554,28 @@ bb20: ; preds = %bb19, %bb8 ; CHECK: %arg.off = add i32 %arg, -8 ; CHECK: icmp ult i32 %arg.off, 11 } + +define void @PR26323(i1 %tobool23, i32 %tmp3) { +entry: + %tobool5 = icmp ne i32 %tmp3, 0 + %neg14 = and i32 %tmp3, -2 + %cmp17 = icmp ne i32 %neg14, -1 + %or.cond = and i1 %tobool5, %tobool23 + %or.cond1 = and i1 %cmp17, %or.cond + br i1 %or.cond1, label %if.end29, label %if.then27 + +if.then27: ; preds = %entry + call void @foo1() + unreachable + +if.end29: ; preds = %entry + ret void +} + +; CHECK-LABEL: define void @PR26323( +; CHECK: %tobool5 = icmp ne i32 %tmp3, 0 +; CHECK: %neg14 = and i32 %tmp3, -2 +; CHECK: %cmp17 = icmp ne i32 %neg14, -1 +; CHECK: %or.cond = and i1 %tobool5, %tobool23 +; CHECK: %or.cond1 = and i1 %cmp17, %or.cond +; CHECK: br i1 %or.cond1, label %if.end29, label %if.then27 From d1e77ddac414abbeca92914e2452fb72b1fd2624 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Wed, 27 Jan 2016 03:45:25 +0000 Subject: [PATCH 0088/1132] Function: Slightly simplify code by using existing hasFnAttribute() convenience function git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258907 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 46cf99c555105b914ea086ce5be66c294b2577e9) --- include/llvm/IR/Function.h | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 4afe44c1bef..fae7b3931b8 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -262,8 +262,7 @@ class Function : public GlobalObject, public ilist_node { /// @brief Determine if the function does not access memory. bool doesNotAccessMemory() const { - return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReadNone); + return hasFnAttribute(Attribute::ReadNone); } void setDoesNotAccessMemory() { addFnAttr(Attribute::ReadNone); @@ -271,9 +270,7 @@ class Function : public GlobalObject, public ilist_node { /// @brief Determine if the function does not access or only reads memory. bool onlyReadsMemory() const { - return doesNotAccessMemory() || - AttributeSets.hasAttribute(AttributeSet::FunctionIndex, - Attribute::ReadOnly); + return doesNotAccessMemory() || hasFnAttribute(Attribute::ReadOnly); } void setOnlyReadsMemory() { addFnAttr(Attribute::ReadOnly); @@ -282,16 +279,14 @@ class Function : public GlobalObject, public ilist_node { /// @brief Determine if the call can access memmory only using pointers based /// on its arguments. bool onlyAccessesArgMemory() const { - return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, - Attribute::ArgMemOnly); + return hasFnAttribute(Attribute::ArgMemOnly); } void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); } /// @brief Determine if the function may only access memory that is /// inaccessible from the IR. bool onlyAccessesInaccessibleMemory() const { - return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, - Attribute::InaccessibleMemOnly); + return hasFnAttribute(Attribute::InaccessibleMemOnly); } void setOnlyAccessesInaccessibleMemory() { addFnAttr(Attribute::InaccessibleMemOnly); @@ -300,8 +295,7 @@ class Function : public GlobalObject, public ilist_node { /// @brief Determine if the function may only access memory that is // either inaccessible from the IR or pointed to by its arguments. bool onlyAccessesInaccessibleMemOrArgMem() const { - return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, - Attribute::InaccessibleMemOrArgMemOnly); + return hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly); } void setOnlyAccessesInaccessibleMemOrArgMem() { addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); @@ -309,8 +303,7 @@ class Function : public GlobalObject, public ilist_node { /// @brief Determine if the function cannot return. bool doesNotReturn() const { - return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoReturn); + return hasFnAttribute(Attribute::NoReturn); } void setDoesNotReturn() { addFnAttr(Attribute::NoReturn); From 69986923646dac5090697e361b96718be6c24c5b Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Wed, 27 Jan 2016 04:20:24 +0000 Subject: [PATCH 0089/1132] SmallPtrSet: Inline the part of insert_imp in the small case Most of the time we only hit the small case, so it is beneficial to pull it out of the insert_imp() implementation. This improves compile time at least for non-LTO builds. Differential Revision: http://reviews.llvm.org/D16619 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258908 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 903845e2b08ff1f67d5c376e22108f749943daf9) --- include/llvm/ADT/SmallPtrSet.h | 20 +++++++++++++++++++- lib/Support/SmallPtrSet.cpp | 17 +---------------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index 3d98e8fac43..7e126d4fdd8 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -102,7 +102,23 @@ class SmallPtrSetImplBase { /// insert_imp - This returns true if the pointer was new to the set, false if /// it was already in the set. This is hidden from the client so that the /// derived class can check that the right type of pointer is passed in. - std::pair insert_imp(const void *Ptr); + std::pair insert_imp(const void *Ptr) { + if (isSmall()) { + // Check to see if it is already in the set. + for (const void **APtr = SmallArray, **E = SmallArray+NumElements; + APtr != E; ++APtr) + if (*APtr == Ptr) + return std::make_pair(APtr, false); + + // Nope, there isn't. If we stay small, just 'pushback' now. + if (NumElements < CurArraySize) { + SmallArray[NumElements++] = Ptr; + return std::make_pair(SmallArray + (NumElements - 1), true); + } + // Otherwise, hit the big set case, which will call grow. + } + return insert_imp_big(Ptr); + } /// erase_imp - If the set contains the specified pointer, remove it and /// return true, otherwise return false. This is hidden from the client so @@ -127,6 +143,8 @@ class SmallPtrSetImplBase { private: bool isSmall() const { return CurArray == SmallArray; } + std::pair insert_imp_big(const void *Ptr); + const void * const *FindBucketFor(const void *Ptr) const; void shrink_and_clear(); diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 358c8e8abbe..e674fc5948c 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -35,22 +35,7 @@ void SmallPtrSetImplBase::shrink_and_clear() { } std::pair -SmallPtrSetImplBase::insert_imp(const void *Ptr) { - if (isSmall()) { - // Check to see if it is already in the set. - for (const void **APtr = SmallArray, **E = SmallArray+NumElements; - APtr != E; ++APtr) - if (*APtr == Ptr) - return std::make_pair(APtr, false); - - // Nope, there isn't. If we stay small, just 'pushback' now. - if (NumElements < CurArraySize) { - SmallArray[NumElements++] = Ptr; - return std::make_pair(SmallArray + (NumElements - 1), true); - } - // Otherwise, hit the big set case, which will call grow. - } - +SmallPtrSetImplBase::insert_imp_big(const void *Ptr) { if (LLVM_UNLIKELY(NumElements * 4 >= CurArraySize * 3)) { // If more than 3/4 of the array is full, grow. Grow(CurArraySize < 64 ? 128 : CurArraySize*2); From d79e3bf035640ef94913de6def635145b488949e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 27 Jan 2016 10:01:28 +0000 Subject: [PATCH 0090/1132] Move MCTargetAsmParser.h to llvm/MC/MCParser where it belongs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258917 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 030c9dc3089849d52ce9c412008fef77d3da74e8) --- include/llvm/MC/MCObjectWriter.h | 1 + include/llvm/MC/{ => MCParser}/MCTargetAsmParser.h | 4 ++-- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 2 +- lib/LTO/LTOModule.cpp | 2 +- lib/MC/MCParser/AsmParser.cpp | 2 +- lib/MC/MCParser/COFFAsmParser.cpp | 2 +- lib/MC/MCParser/MCAsmParser.cpp | 2 +- lib/MC/MCParser/MCTargetAsmParser.cpp | 2 +- lib/Object/IRObjectFile.cpp | 2 +- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 2 +- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 10 +++++----- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 4 ++-- lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp | 10 +++++----- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 2 +- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 6 +++--- lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 4 ++-- lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp | 2 +- lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp | 4 ++-- lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 +- tools/llvm-mc/llvm-mc.cpp | 2 +- 20 files changed, 34 insertions(+), 33 deletions(-) rename include/llvm/MC/{ => MCParser}/MCTargetAsmParser.h (98%) diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h index 63c833ac20d..0ecebe42a0b 100644 --- a/include/llvm/MC/MCObjectWriter.h +++ b/include/llvm/MC/MCObjectWriter.h @@ -22,6 +22,7 @@ class MCAsmLayout; class MCAssembler; class MCFixup; class MCFragment; +class MCSymbol; class MCSymbolRefExpr; class MCValue; diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCParser/MCTargetAsmParser.h similarity index 98% rename from include/llvm/MC/MCTargetAsmParser.h rename to include/llvm/MC/MCParser/MCTargetAsmParser.h index 03b2dc9a282..61e27cf877e 100644 --- a/include/llvm/MC/MCTargetAsmParser.h +++ b/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_MC_MCTARGETASMPARSER_H -#define LLVM_MC_MCTARGETASMPARSER_H +#ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H +#define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 4171657b528..9d040407b6a 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -23,10 +23,10 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 4806f903bdf..987da3e3210 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -26,10 +26,10 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index f01b285b3fa..d34dce38db6 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -28,11 +28,11 @@ #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserUtils.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index a4b2b195f71..653627ad8dc 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -15,10 +15,10 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/COFF.h" using namespace llvm; diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index 290dcb29774..9bd7d9eebef 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -11,7 +11,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/Support/Debug.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp index 4e4b47805cd..14a22c6b8a2 100644 --- a/lib/MC/MCParser/MCTargetAsmParser.cpp +++ b/lib/MC/MCParser/MCTargetAsmParser.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCTargetAsmParser.h" using namespace llvm; MCTargetAsmParser::MCTargetAsmParser(MCTargetOptions const &MCOptions, diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp index c35c413b3c3..9602d4daeee 100644 --- a/lib/Object/IRObjectFile.cpp +++ b/lib/Object/IRObjectFile.cpp @@ -24,9 +24,9 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 394c8e78581..4938b38b3b0 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -24,11 +24,11 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index d9f753f4013..0b430ffd05f 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "AMDKernelCodeT.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" -#include "Utils/AMDGPUBaseInfo.h" -#include "AMDKernelCodeT.h" #include "SIDefines.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" @@ -25,16 +25,16 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 52e1e91203e..899480c6d6c 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -31,20 +31,20 @@ #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserUtils.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/TargetParser.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index a8622a96527..df20be7a2a4 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -13,12 +13,12 @@ #include "HexagonRegisterInfo.h" #include "HexagonTargetStreamer.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCELFStreamer.h" +#include "MCTargetDesc/HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" +#include "MCTargetDesc/HexagonMCELFStreamer.h" #include "MCTargetDesc/HexagonMCExpr.h" #include "MCTargetDesc/HexagonMCShuffler.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" -#include "MCTargetDesc/HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonShuffler.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -31,16 +31,16 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/Format.h" -#include "llvm/Support/SourceMgr.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index d4e061f00d3..a59a4361c43 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -22,10 +22,10 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 220c70a4854..5c64d7818ac 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPCTargetStreamer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" @@ -22,11 +22,11 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index a55274744fd..036e9ffc652 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -7,18 +7,18 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/SparcMCTargetDesc.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "MCTargetDesc/SparcMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 9c995bf42b0..a894bea91b6 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -13,9 +13,9 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index 09cc53a8e6d..c38a7d1dd44 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86BaseInfo.h" #include "X86AsmInstrumentation.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86Operand.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" @@ -18,9 +18,9 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/CommandLine.h" #include diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 4d8ffac1a82..e1b44778465 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -24,12 +24,12 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index 96e3f7c21a5..7803e35a44c 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -20,11 +20,11 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" From 502087d3cf8c53ae7a66f0c8aa0dcb108c7fcacd Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Wed, 20 Jan 2016 22:02:07 +0000 Subject: [PATCH 0091/1132] Fix build warning. error: field 'CCMgr' will be initialized after field 'IndirectStubsMgr' [-Werror,-Wreorder] : DL(TM.createDataLayout()), CCMgr(std::move(CCMgr)), git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258354 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 2da1416c6eeb6ba615bcff37faa6a06828f76384) --- lib/ExecutionEngine/Orc/OrcCBindingsStack.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index 3b2f9ab1cda..5978b95e068 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -89,8 +89,9 @@ class OrcCBindingsStack { OrcCBindingsStack(TargetMachine &TM, std::unique_ptr CCMgr, IndirectStubsManagerBuilder IndirectStubsMgrBuilder) - : DL(TM.createDataLayout()), CCMgr(std::move(CCMgr)), + : DL(TM.createDataLayout()), IndirectStubsMgr(IndirectStubsMgrBuilder()), + CCMgr(std::move(CCMgr)), ObjectLayer(), CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)), CODLayer(CompileLayer, From 49df593ccf9c1ad9a721f96c4c8f5dec6fbd9222 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 23 Jan 2016 20:45:50 +0000 Subject: [PATCH 0092/1132] ObjectTransformLayerTest.cpp: Fix a warning. [-Winconsistent-missing-override] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258633 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 550eae18a98249a7239831cf9e129ba71f50628d) --- unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp index fda596f6a70..61e3db8826c 100644 --- a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp +++ b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp @@ -301,7 +301,7 @@ TEST(ObjectTransformLayerTest, Main) { } void registerEHFrames(uint8_t *, uint64_t, size_t) override {} void deregisterEHFrames(uint8_t *, uint64_t, size_t) override {} - bool finalizeMemory(std::string *) { return false; } + virtual bool finalizeMemory(std::string *) { return false; } }; // Construct the jit layers. From 70cdf899c8e683ed1a10eba9559a1433965ffb26 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 23 Jan 2016 20:45:55 +0000 Subject: [PATCH 0093/1132] ObjectTransformLayerTest.cpp: Fix a warning. [-Wredundant-move] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258634 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 3dd226c84b9966a06df8978d5331178018991738) --- unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp index 61e3db8826c..a6766c901de 100644 --- a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp +++ b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp @@ -308,7 +308,7 @@ TEST(ObjectTransformLayerTest, Main) { ObjectLinkingLayer<> BaseLayer; auto IdentityTransform = []( std::unique_ptr> - Obj) { return std::move(Obj); }; + Obj) { return Obj; }; ObjectTransformLayer TransformLayer(BaseLayer, IdentityTransform); auto NullCompiler = [](llvm::Module &) { From 3f0559aedc3879f6e0768e8aa6f4a547f3945d14 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sat, 23 Jan 2016 20:48:50 +0000 Subject: [PATCH 0094/1132] ObjectTransformLayerTest.cpp: Rework r258633. [-Winconsistent-missing-override] Sorry for the noise. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258635 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit d755861f7e272c6118dc2cec82dbd53f9a1a37cc) --- unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp index a6766c901de..e91194be6fe 100644 --- a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp +++ b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp @@ -301,7 +301,7 @@ TEST(ObjectTransformLayerTest, Main) { } void registerEHFrames(uint8_t *, uint64_t, size_t) override {} void deregisterEHFrames(uint8_t *, uint64_t, size_t) override {} - virtual bool finalizeMemory(std::string *) { return false; } + bool finalizeMemory(std::string *) override { return false; } }; // Construct the jit layers. From 2b8d07242ec014f84a040b94c1d7d07ad48f039d Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 29 Jan 2016 01:08:41 +0000 Subject: [PATCH 0095/1132] [AArch64] Fix i64 nontemporal high-half extraction. Since we only have pair - not single - nontemporal store instructions, we have to extract the high part into a separate register to be able to use them. When the initial nontemporal codegen support was added, I wrote the extract using the nonsensical UBFX [0,32[. Use the correct LSR form instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259134 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 3ea0d92811bd4483f4a5039de4efc4aa105de5cb) --- lib/Target/AArch64/AArch64InstrInfo.td | 2 +- test/CodeGen/AArch64/nontemporal.ll | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index d02bc9ff394..4f052e81de2 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -5982,7 +5982,7 @@ def : NTStore64Pat; def : Pat<(nontemporalstore GPR64:$Rt, (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), - (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32), + (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), GPR64sp:$Rn, simm7s4:$offset)>; } // AddedComplexity=10 } // Predicates = [IsLE] diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll index db9779e0319..d8785f845c2 100644 --- a/test/CodeGen/AArch64/nontemporal.ll +++ b/test/CodeGen/AArch64/nontemporal.ll @@ -112,7 +112,7 @@ define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 { define void @test_stnp_i64(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64: -; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 +; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 ; CHECK-NEXT: stnp w1, w[[HI]], [x0] ; CHECK-NEXT: ret store i64 %v, i64* %p, align 1, !nontemporal !0 @@ -162,7 +162,7 @@ define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 { define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64_offset: -; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 +; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 ; CHECK-NEXT: stnp w1, w[[HI]], [x0, #8] ; CHECK-NEXT: ret %tmp0 = getelementptr i64, i64* %p, i32 1 @@ -172,7 +172,7 @@ define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 { define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64_offset_neg: -; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 +; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 ; CHECK-NEXT: stnp w1, w[[HI]], [x0, #-8] ; CHECK-NEXT: ret %tmp0 = getelementptr i64, i64* %p, i32 -1 From 5a395607ac6a5c6508dae3e716006af77b62256d Mon Sep 17 00:00:00 2001 From: George Burgess IV Date: Fri, 29 Jan 2016 07:51:15 +0000 Subject: [PATCH 0096/1132] Minor bugfix in AAResults::getModRefInfo. Also removed a few redundant `else`s. Bug was found by a test I wrote for MemorySSA (in review at http://reviews.llvm.org/D7864; shiny update coming soon). So, assuming that lands at some point, this should be covered by that. If anyone feels this deserves its own explicit test case, please let me know. I'll write one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259179 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit ca9ee4390ae35cecabc52b29ddd2069b54fdd972) --- include/llvm/Analysis/AliasAnalysis.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 4bf656a2f2a..3255a9b8beb 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -450,11 +450,11 @@ class AAResults { ModRefInfo getModRefInfo(const Instruction *I) { if (auto CS = ImmutableCallSite(I)) { auto MRB = getModRefBehavior(CS); - if (MRB & MRI_ModRef) + if ((MRB & MRI_ModRef) == MRI_ModRef) return MRI_ModRef; - else if (MRB & MRI_Ref) + if (MRB & MRI_Ref) return MRI_Ref; - else if (MRB & MRI_Mod) + if (MRB & MRI_Mod) return MRI_Mod; return MRI_NoModRef; } From 35845299cac484f1abd1bc2010359acdc787a252 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 28 Jan 2016 18:20:05 +0000 Subject: [PATCH 0097/1132] [X86] Don't transform X << 1 to X + X during type legalization While legalizing a 64-bit shift left by 1, the following occurs: We split the shift operand in half: a high half and a low half. We then create an ADDC with the low half and a ADDE with the high half + the carry bit from the ADDC. This is problematic if X is any_ext'd because the high half computation is now undef + undef + carry bit and there is no way to ensure that the two undef values had the same bitwise representation. This results in the lowest bit in the high half turning into garbage. Instead, do not try to turn shifts into arithmetic during type legalization. This fixes PR26350. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259065 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 98232aa5c9b83687a120583d421d3bba4d690cca) --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 9 -------- test/CodeGen/X86/fold-tied-op.ll | 5 +---- test/CodeGen/X86/pr26350.ll | 21 +++++++++++++++++++ 3 files changed, 22 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/X86/pr26350.ll diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 74f80db6d01..abbfb1fdc1a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1442,15 +1442,6 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, } else if (Amt == NVTBits) { Lo = DAG.getConstant(0, DL, NVT); Hi = InL; - } else if (Amt == 1 && - TLI.isOperationLegalOrCustom(ISD::ADDC, - TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { - // Emit this X << 1 as X+X. - SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); - SDValue LoOps[2] = { InL, InL }; - Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps); - SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; - Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps); } else { Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy)); Hi = DAG.getNode(ISD::OR, DL, NVT, diff --git a/test/CodeGen/X86/fold-tied-op.ll b/test/CodeGen/X86/fold-tied-op.ll index 62fed421938..ac6ac524915 100644 --- a/test/CodeGen/X86/fold-tied-op.ll +++ b/test/CodeGen/X86/fold-tied-op.ll @@ -6,10 +6,7 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386--netbsd" ; CHECK-LABEL: fn1 -; CHECK: shldl {{.*#+}} 4-byte Folded Spill -; CHECK: orl {{.*#+}} 4-byte Folded Reload -; CHECK: shldl {{.*#+}} 4-byte Folded Spill -; CHECK: orl {{.*#+}} 4-byte Folded Reload +; CHECK addl {{.*#+}} 4-byte Folded Reload ; CHECK: addl {{.*#+}} 4-byte Folded Reload ; CHECK: imull {{.*#+}} 4-byte Folded Reload ; CHECK: orl {{.*#+}} 4-byte Folded Reload diff --git a/test/CodeGen/X86/pr26350.ll b/test/CodeGen/X86/pr26350.ll new file mode 100644 index 00000000000..6e87cb3e8b7 --- /dev/null +++ b/test/CodeGen/X86/pr26350.ll @@ -0,0 +1,21 @@ +; RUN: llc -disable-constant-hoisting < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +@d = global i32 8, align 4 + +define i32 @main() { +entry: + %load = load i32, i32* @d, align 4 + %conv1 = zext i32 %load to i64 + %shl = shl i64 %conv1, 1 + %mul = and i64 %shl, 4294967312 + %cmp = icmp ugt i64 4294967295, %mul + %zext = zext i1 %cmp to i32 + ret i32 %zext +} +; CHECK: main: +; CHECK: movl d, %[[load:.*]] +; CHECK: movl %[[load]], %[[copy:.*]] +; CHECK: shrl $31, %[[copy]] +; CHECK: addl %[[load]], %[[load]] From 635692835bb61a88cf252b97b3c60c05de8f34c4 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 28 Jan 2016 18:59:04 +0000 Subject: [PATCH 0098/1132] Address buildbot fallout from r259065 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259074 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 2404e4b0255806df3537174e1e1ecbca1505d1d2) --- test/CodeGen/ARM/carry.ll | 7 ++++--- test/CodeGen/PowerPC/ppcf128-endian.ll | 8 +++++--- test/CodeGen/Thumb2/carry.ll | 8 ++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll index 7ea9be2c61e..558e2b0e43f 100644 --- a/test/CodeGen/ARM/carry.ll +++ b/test/CodeGen/ARM/carry.ll @@ -11,9 +11,10 @@ entry: define i64 @f2(i64 %a, i64 %b) { ; CHECK-LABEL: f2: -; CHECK: adc r -; CHECK: subs r -; CHECK: sbc r +; CHECK: lsl r +; CHECK: orr r +; CHECK: rsbs r +; CHECK: sbc r entry: %tmp1 = shl i64 %a, 1 %tmp2 = sub i64 %tmp1, %b diff --git a/test/CodeGen/PowerPC/ppcf128-endian.ll b/test/CodeGen/PowerPC/ppcf128-endian.ll index ee314c1db58..49dea37a2da 100644 --- a/test/CodeGen/PowerPC/ppcf128-endian.ll +++ b/test/CodeGen/PowerPC/ppcf128-endian.ll @@ -104,9 +104,10 @@ entry: %0 = bitcast i128 %x to ppc_fp128 ret ppc_fp128 %0 } -; CHECK: @convert_to +; CHECK: convert_to: ; CHECK: std 3, [[OFF1:.*]](1) ; CHECK: std 4, [[OFF2:.*]](1) +; CHECK: ori 2, 2, 0 ; CHECK: lfd 1, [[OFF1]](1) ; CHECK: lfd 2, [[OFF2]](1) ; CHECK: blr @@ -118,9 +119,10 @@ entry: ret ppc_fp128 %0 } -; CHECK: @convert_to +; CHECK: convert_to2: ; CHECK: std 3, [[OFF1:.*]](1) -; CHECK: std 4, [[OFF2:.*]](1) +; CHECK: std 5, [[OFF2:.*]](1) +; CHECK: ori 2, 2, 0 ; CHECK: lfd 1, [[OFF1]](1) ; CHECK: lfd 2, [[OFF2]](1) ; CHECK: blr diff --git a/test/CodeGen/Thumb2/carry.ll b/test/CodeGen/Thumb2/carry.ll index 26622e23dd4..1e2b332be98 100644 --- a/test/CodeGen/Thumb2/carry.ll +++ b/test/CodeGen/Thumb2/carry.ll @@ -12,10 +12,10 @@ entry: define i64 @f2(i64 %a, i64 %b) { entry: ; CHECK-LABEL: f2: -; CHECK: adds r0, r0, r0 -; CHECK: adcs r1, r1 -; CHECK: subs r0, r0, r2 -; CHECK: sbcs r1, r3 +; CHECK: lsls r1, r1, #1 +; CHECK: orr.w r1, r1, r0, lsr #31 +; CHECK: rsbs r0, r2, r0, lsl #1 +; CHECK: sbcs r1, r3 %tmp1 = shl i64 %a, 1 %tmp2 = sub i64 %tmp1, %b ret i64 %tmp2 From d26caa5802d1ab9cbd9650a0e05284001d39a19a Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 29 Jan 2016 19:18:46 +0000 Subject: [PATCH 0099/1132] ARM: don't mangle DAG constant if it has more than one use The basic optimisation was to convert (mul $LHS, $complex_constant) into roughly "(shl (mul $LHS, $simple_constant), $simple_amt)" when it was expected to be cheaper. The original logic checks that the mul only has one use (since we're mangling $complex_constant), but when used in even more complex addressing modes there may be an outer addition that can pick up the wrong value too. I *think* the ARM addressing-mode problem is actually unreachable at the moment, but that depends on complex assessments of the profitability of pre-increment addressing modes so I've put a real check in there instead of an assertion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259228 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 4 ++-- test/CodeGen/ARM/shifter_operand.ll | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 4cb80da4839..f89212ef1ba 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -747,7 +747,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, // If Offset is a multiply-by-constant and it's profitable to extract a shift // and use it in a shifted operand do so. - if (Offset.getOpcode() == ISD::MUL) { + if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { @@ -1422,7 +1422,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, // If OffReg is a multiply-by-constant and it's profitable to extract a shift // and use it in a shifted operand do so. - if (OffReg.getOpcode() == ISD::MUL) { + if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll index 5d44eb0f11d..e5f9b118527 100644 --- a/test/CodeGen/ARM/shifter_operand.ll +++ b/test/CodeGen/ARM/shifter_operand.ll @@ -239,3 +239,20 @@ define void @test_well_formed_dag(i32 %in1, i32 %in2, i32* %addr) { store i32 %add, i32* %addr ret void } + +define { i32, i32 } @test_multi_use_add(i32 %base, i32 %offset) { +; CHECK-LABEL: test_multi_use_add: +; CHECK-THUMB: movs [[CONST:r[0-9]+]], #28 +; CHECK-THUMB: movt [[CONST]], #1 + + %prod = mul i32 %offset, 65564 + %sum = add i32 %base, %prod + + %ptr = inttoptr i32 %sum to i32* + %loaded = load i32, i32* %ptr + + %ret.tmp = insertvalue { i32, i32 } undef, i32 %sum, 0 + %ret = insertvalue { i32, i32 } %ret.tmp, i32 %loaded, 1 + + ret { i32, i32 } %ret +} From 932ffd0a88cea3c494d2befc5b4626231096d1a4 Mon Sep 17 00:00:00 2001 From: Frederic Riss Date: Sun, 31 Jan 2016 04:29:22 +0000 Subject: [PATCH 0100/1132] [dsymutil] Allow debug map mappings with no object file address. NFC This change just changes the data structure that ties symbol names, object file address and linked binary addresses to accept mappings with no object file address. Such symbol mappings are not fed into the debug map yet, so this patch is NFC. A subsequent patch will make use of this functionality for common symbols. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259317 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit c5413fcc11958f06d26fbaaae62d28edff38c0ef) --- tools/dsymutil/DebugMap.cpp | 19 ++++++++++++------- tools/dsymutil/DebugMap.h | 13 ++++++++----- tools/dsymutil/DwarfLinker.cpp | 18 ++++++++++-------- tools/dsymutil/MachODebugMapParser.cpp | 2 +- 4 files changed, 31 insertions(+), 21 deletions(-) diff --git a/tools/dsymutil/DebugMap.cpp b/tools/dsymutil/DebugMap.cpp index 4717085f432..d2d5b615a32 100644 --- a/tools/dsymutil/DebugMap.cpp +++ b/tools/dsymutil/DebugMap.cpp @@ -24,13 +24,13 @@ DebugMapObject::DebugMapObject(StringRef ObjectFilename, sys::TimeValue Timestamp) : Filename(ObjectFilename), Timestamp(Timestamp) {} -bool DebugMapObject::addSymbol(StringRef Name, uint64_t ObjectAddress, +bool DebugMapObject::addSymbol(StringRef Name, Optional ObjectAddress, uint64_t LinkedAddress, uint32_t Size) { auto InsertResult = Symbols.insert( std::make_pair(Name, SymbolMapping(ObjectAddress, LinkedAddress, Size))); - if (InsertResult.second) - AddressToMapping[ObjectAddress] = &*InsertResult.first; + if (ObjectAddress && InsertResult.second) + AddressToMapping[*ObjectAddress] = &*InsertResult.first; return InsertResult.second; } @@ -47,8 +47,11 @@ void DebugMapObject::print(raw_ostream &OS) const { Entries.begin(), Entries.end(), [](const Entry &LHS, const Entry &RHS) { return LHS.first < RHS.first; }); for (const auto &Sym : Entries) { - OS << format("\t%016" PRIx64 " => %016" PRIx64 "+0x%x\t%s\n", - uint64_t(Sym.second.ObjectAddress), + if (Sym.second.ObjectAddress) + OS << format("\t%016" PRIx64, uint64_t(*Sym.second.ObjectAddress)); + else + OS << "\t????????????????"; + OS << format(" => %016" PRIx64 "+0x%x\t%s\n", uint64_t(Sym.second.BinaryAddress), uint32_t(Sym.second.Size), Sym.first.data()); } @@ -136,7 +139,7 @@ struct MappingTraits::YamlDMO { void MappingTraits>:: mapping(IO &io, std::pair &s) { io.mapRequired("sym", s.first); - io.mapRequired("objAddr", s.second.ObjectAddress); + io.mapOptional("objAddr", s.second.ObjectAddress); io.mapRequired("binAddr", s.second.BinaryAddress); io.mapOptional("size", s.second.Size); } @@ -237,7 +240,9 @@ MappingTraits::YamlDMO::denormalize(IO &IO) { dsymutil::DebugMapObject Res(Path, TV); for (auto &Entry : Entries) { auto &Mapping = Entry.second; - uint64_t ObjAddress = Mapping.ObjectAddress; + Optional ObjAddress; + if (Mapping.ObjectAddress) + ObjAddress = *Mapping.ObjectAddress; auto AddressIt = SymbolAddresses.find(Entry.first); if (AddressIt != SymbolAddresses.end()) ObjAddress = AddressIt->getValue(); diff --git a/tools/dsymutil/DebugMap.h b/tools/dsymutil/DebugMap.h index 4907b8f1a72..1a3d62b67b7 100644 --- a/tools/dsymutil/DebugMap.h +++ b/tools/dsymutil/DebugMap.h @@ -117,12 +117,15 @@ class DebugMap { class DebugMapObject { public: struct SymbolMapping { - yaml::Hex64 ObjectAddress; + Optional ObjectAddress; yaml::Hex64 BinaryAddress; yaml::Hex32 Size; - SymbolMapping(uint64_t ObjectAddress, uint64_t BinaryAddress, uint32_t Size) - : ObjectAddress(ObjectAddress), BinaryAddress(BinaryAddress), - Size(Size) {} + SymbolMapping(Optional ObjectAddr, uint64_t BinaryAddress, + uint32_t Size) + : BinaryAddress(BinaryAddress), Size(Size) { + if (ObjectAddr) + ObjectAddress = *ObjectAddr; + } /// For YAML IO support SymbolMapping() = default; }; @@ -132,7 +135,7 @@ class DebugMapObject { /// \brief Adds a symbol mapping to this DebugMapObject. /// \returns false if the symbol was already registered. The request /// is discarded in this case. - bool addSymbol(llvm::StringRef SymName, uint64_t ObjectAddress, + bool addSymbol(llvm::StringRef SymName, Optional ObjectAddress, uint64_t LinkedAddress, uint32_t Size); /// \brief Lookup a symbol mapping. diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp index 37dd02851dc..fe971e99607 100644 --- a/tools/dsymutil/DwarfLinker.cpp +++ b/tools/dsymutil/DwarfLinker.cpp @@ -1854,10 +1854,10 @@ void DwarfLinker::startDebugObject(DWARFContext &Dwarf, DebugMapObject &Obj) { // -gline-tables-only on Darwin. for (const auto &Entry : Obj.symbols()) { const auto &Mapping = Entry.getValue(); - if (Mapping.Size) - Ranges[Mapping.ObjectAddress] = std::make_pair( - Mapping.ObjectAddress + Mapping.Size, - int64_t(Mapping.BinaryAddress) - Mapping.ObjectAddress); + if (Mapping.Size && Mapping.ObjectAddress) + Ranges[*Mapping.ObjectAddress] = std::make_pair( + *Mapping.ObjectAddress + Mapping.Size, + int64_t(Mapping.BinaryAddress) - *Mapping.ObjectAddress); } } @@ -1988,14 +1988,16 @@ hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset, const auto &ValidReloc = ValidRelocs[NextValidReloc++]; const auto &Mapping = ValidReloc.Mapping->getValue(); + uint64_t ObjectAddress = + Mapping.ObjectAddress ? uint64_t(*Mapping.ObjectAddress) : UINT64_MAX; if (Linker.Options.Verbose) outs() << "Found valid debug map entry: " << ValidReloc.Mapping->getKey() - << " " << format("\t%016" PRIx64 " => %016" PRIx64, - uint64_t(Mapping.ObjectAddress), + << " " << format("\t%016" PRIx64 " => %016" PRIx64, ObjectAddress, uint64_t(Mapping.BinaryAddress)); - Info.AddrAdjust = int64_t(Mapping.BinaryAddress) + ValidReloc.Addend - - Mapping.ObjectAddress; + Info.AddrAdjust = int64_t(Mapping.BinaryAddress) + ValidReloc.Addend; + if (Mapping.ObjectAddress) + Info.AddrAdjust -= ObjectAddress; Info.InDebugMap = true; return true; } diff --git a/tools/dsymutil/MachODebugMapParser.cpp b/tools/dsymutil/MachODebugMapParser.cpp index 4412db25426..33845f40cba 100644 --- a/tools/dsymutil/MachODebugMapParser.cpp +++ b/tools/dsymutil/MachODebugMapParser.cpp @@ -391,7 +391,7 @@ void MachODebugMapParser::handleStabSymbolTableEntry(uint32_t StringIndex, Twine(Name)); if (!ObjectSymIt->getValue()) return; - if (!CurrentDebugMapObject->addSymbol(Name, *ObjectSymIt->getValue(), Value, + if (!CurrentDebugMapObject->addSymbol(Name, ObjectSymIt->getValue(), Value, Size)) return Warning(Twine("failed to insert symbol '") + Name + "' in the debug map."); From 879034799e46d2a7054a57531164e083f170ccf1 Mon Sep 17 00:00:00 2001 From: Frederic Riss Date: Sun, 31 Jan 2016 04:29:34 +0000 Subject: [PATCH 0101/1132] [dsymutil] Fix handling of common symbols. llvm-dsymutil was misinterpreting the value of common symbols as their address when it actually contains their size. This didn't impact llvm-dsymutil's ability to link the debug information for common symbols because these are always found by name and not by address. Things could however go wrong when the size of a common object matched the object file address of another symbol. Depending on the link order of the symbols the common object might incorrectly evict this other object from the address to symbol mapping, and then link the evicted symbol with a wrong binary address. Use the new ability to have symbols without an object file address to fix this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259318 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 99a07120d47e1881a14a7b36bf7cf6bbd54954af) --- .../tools/dsymutil/Inputs/common.macho.x86_64 | Bin 0 -> 4592 bytes .../dsymutil/Inputs/common.macho.x86_64.o | Bin 0 -> 2404 bytes test/tools/dsymutil/X86/common-sym.test | 24 ++++++++++++++++++ test/tools/dsymutil/absolute_symbol.test | 3 ++- test/tools/dsymutil/basic-linking.test | 4 +-- test/tools/dsymutil/debug-map-parsing.test | 4 +-- .../dsymutil/yaml-object-address-rewrite.test | 4 +-- tools/dsymutil/DebugMap.cpp | 3 ++- tools/dsymutil/MachODebugMapParser.cpp | 17 +++++++------ 9 files changed, 43 insertions(+), 16 deletions(-) create mode 100755 test/tools/dsymutil/Inputs/common.macho.x86_64 create mode 100644 test/tools/dsymutil/Inputs/common.macho.x86_64.o create mode 100644 test/tools/dsymutil/X86/common-sym.test diff --git a/test/tools/dsymutil/Inputs/common.macho.x86_64 b/test/tools/dsymutil/Inputs/common.macho.x86_64 new file mode 100755 index 0000000000000000000000000000000000000000..c5c090ea1ad47c03bba50e74b580d3e080817f86 GIT binary patch literal 4592 zcmeHLziU%b6u!?yTWYjXM3f2*Qv5Z%*ivc+9U=`TRBEZQXh9G8@j?Q5NzD&Tow9V$ zDVqhs|G>rBq2M66baixb6r78Q-}iEF^Ab{Vba)5Oz3)5s-0zK;={O~dy9c2o>sN?nAf;Ua6vd#TOT`U6IX%^HSd5qT0w6v?I@XNOa~{n6o=(ipX= z<8YMZnMlj&v^2mp%GbpO$ah|+5Qhip1YJKAY1dvEHg_AxN!@ngBFo8a(w6~!#_Fb1s2h2`9Lyqxy6e_!dih8` z|1$JL-`!0^`nMT8d;@Av?bd+LV%|Cf z0?a2FgVRoFXZ=_nIuG!l>@>1s@CZEKZZ_i8No5GjP`N{O!Tc=7;(0$FJ{J z_OHI)m_rPup`a>A4DKK?HK#&Z&IuJMl6)EAU*4bH~-54W2w$F=X;sH2ic z66^|`=cO@BV!p01^$9FcAW$GsAW$GsAW$GsAW$GsAW-1{Qs7bg)%W?mN5>*=Q+ zF{U1Sq)WO~|IFTiy~84A`WxLx@SFW!1U*A^(D7L35TjWy*Ok?YTG7bFq(|NlV?oeg zhs7<6({T#zmLp{+S9BW6c5{`QTFB=b>Xz;uZ}aq!>ds>5ap=YjO?S56a4^I4NL*uI z=Etn36tdJz`~3#jc~2Vy@5Ju~_?!W+Rzo+-JhvI&Nxz7D3;51_H zH~0>43~r3;MGo3hn5V7&X$(K{@PPjraTo(qxL(rh<~~(m#19!`DxRrrwOh@&>WgX@ w4kJ3?<_hJy-AOFT(xP2g;=P0L9ZU}|W1ed`!P~=jRhq4KUh?>d15XP31L)tSZ~y=R literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/common.macho.x86_64.o b/test/tools/dsymutil/Inputs/common.macho.x86_64.o new file mode 100644 index 0000000000000000000000000000000000000000..491009bc866e2a3c4acff3923817fada6df0bc80 GIT binary patch literal 2404 zcmb7G&u<$=6n2jub8}#e9DQC66Zi%3{0U0OhU9CG%gQfKaS5WzIEP<)Gji3{&}zNXsBNt2{-9^lS}&ZRUw(C1;&3W9Q?dMp1tO^te~8!W)J-~x z*BUK`|MfocssYpL(|9j(F?gb7ps@<#8?)@=R>Z=svb!Bf>UQN=*(=Kh6EloX3zC?-Wc{WlJ!* ziD3^PKuA%QGRM^C9CPM8;W>C!d4u`1@|Iz}jL6d2`6cq=yQrj0VNOZ>Nua+2`SzV{ z|Kf+&x4yaa>tE!^ADe)K8wg 03 00 10 00 00 01 00 00 00 ) + +CHECK: DW_TAG_subprogram +CHECK-NEXT: DW_AT_low_pc{{.*}}(0x0000000100000f80) +CHECK-NOT: {{NULL|DW_TAG}} +CHECK: DW_AT_name {{.*}} "main" + +CHECK: DW_TAG_subprogram +CHECK-NEXT: DW_AT_low_pc{{.*}}(0x0000000100000f90) +CHECK-NOT: {{NULL|DW_TAG}} +CHECK: DW_AT_name {{.*}} "bar" diff --git a/test/tools/dsymutil/absolute_symbol.test b/test/tools/dsymutil/absolute_symbol.test index cdd6ae83213..65eb0de3b91 100644 --- a/test/tools/dsymutil/absolute_symbol.test +++ b/test/tools/dsymutil/absolute_symbol.test @@ -13,4 +13,5 @@ compiled for i386. This create an absolute symbol .objc_class_name_Foo We must not consider this symbol for debug info linking as its address might conflict with other real symbols in the same file. -CHECK-NOT: objc_class_name_Foo +CHECK: objc_class_name_Foo +CHECK-SAME-NOT: objAddr diff --git a/test/tools/dsymutil/basic-linking.test b/test/tools/dsymutil/basic-linking.test index bff5b5df9e6..5be90105cb7 100644 --- a/test/tools/dsymutil/basic-linking.test +++ b/test/tools/dsymutil/basic-linking.test @@ -44,7 +44,7 @@ CHECK-NEXT: TAG_compile_unit CHECK-NOT: TAG CHECK: AT_name {{.*}}basic3.c CHECK-NOT: Found valid debug map entry -CHECK: Found valid debug map entry: _val 0000000000000004 => 0000000100001004 +CHECK: Found valid debug map entry: _val ffffffffffffffff => 0000000100001004 CHECK-NEXT: DW_TAG_variable CHECK-NEXT: DW_AT_name {{.*}}"val" CHECK-NOT: Found valid debug map entry @@ -137,7 +137,7 @@ CHECK-ARCHIVE-NEXT: TAG_compile_unit CHECK-ARCHIVE-NOT: TAG CHECK-ARCHIVE: AT_name {{.*}}basic3.c CHECK-ARCHIVE-NOT: Found valid debug map entry -CHECK-ARCHIVE: Found valid debug map entry: _val 0000000000000004 => 0000000100001008 +CHECK-ARCHIVE: Found valid debug map entry: _val ffffffffffffffff => 0000000100001008 CHECK-ARCHIVE-NEXT: DW_TAG_variable CHECK-ARCHIVE-NEXT: DW_AT_name {{.*}}"val" CHECK-ARCHIVE-NOT: Found valid debug map entry diff --git a/test/tools/dsymutil/debug-map-parsing.test b/test/tools/dsymutil/debug-map-parsing.test index 2b9d0917609..05beb8e9bcc 100644 --- a/test/tools/dsymutil/debug-map-parsing.test +++ b/test/tools/dsymutil/debug-map-parsing.test @@ -21,7 +21,7 @@ CHECK-DAG: sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x000000010 CHECK: filename{{.*}}/Inputs/basic3.macho.x86_64.o CHECK-DAG: sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050 CHECK-DAG: sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F90, size: 0x00000019 -CHECK-DAG: sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001004, size: 0x00000000 +CHECK-DAG: sym: _val, binAddr: 0x0000000100001004, size: 0x00000000 CHECK: ... @@ -65,7 +65,7 @@ CHECK-ARCHIVE-DAG: sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x0 CHECK-ARCHIVE: /Inputs/./libbasic.a(basic3.macho.x86_64.o) CHECK-ARCHIVE-DAG: sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050 CHECK-ARCHIVE-DAG: sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F90, size: 0x00000019 -CHECK-ARCHIVE-DAG: sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001008, size: 0x00000000 +CHECK-ARCHIVE-DAG: sym: _val, binAddr: 0x0000000100001008, size: 0x00000000 CHECK-ARCHIVE: ... Check that we warn about missing object files (this presumes that the files aren't diff --git a/test/tools/dsymutil/yaml-object-address-rewrite.test b/test/tools/dsymutil/yaml-object-address-rewrite.test index 749719fc5bd..a108d63ce18 100644 --- a/test/tools/dsymutil/yaml-object-address-rewrite.test +++ b/test/tools/dsymutil/yaml-object-address-rewrite.test @@ -23,7 +23,7 @@ # CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic3.macho.x86_64.o)' # CHECK-NEXT: timestamp: 0 # CHECK-NEXT: symbols: -# CHECK-DAG: sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001008, size: 0x00000000 +# CHECK-DAG: sym: _val, binAddr: 0x0000000100001008, size: 0x00000000 # CHECK-DAG: sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050 # CHECK-DAG: sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F90, size: 0x00000019 # CHECK-NOT: { sym: @@ -42,7 +42,7 @@ objects: - { sym: _baz, objAddr: 0x0, binAddr: 0x0000000100001000, size: 0x00000000 } - filename: /Inputs/./libbasic.a(basic3.macho.x86_64.o) symbols: - - { sym: _val, objAddr: 0x0, binAddr: 0x0000000100001008, size: 0x00000000 } + - { sym: _val, binAddr: 0x0000000100001008, size: 0x00000000 } - { sym: _bar, objAddr: 0x0, binAddr: 0x0000000100000F40, size: 0x00000050 } - { sym: _inc, objAddr: 0x0, binAddr: 0x0000000100000F90, size: 0x00000019 } ... diff --git a/tools/dsymutil/DebugMap.cpp b/tools/dsymutil/DebugMap.cpp index d2d5b615a32..114e22c0745 100644 --- a/tools/dsymutil/DebugMap.cpp +++ b/tools/dsymutil/DebugMap.cpp @@ -229,7 +229,8 @@ MappingTraits::YamlDMO::denormalize(IO &IO) { for (const auto &Sym : ErrOrObjectFile->symbols()) { uint64_t Address = Sym.getValue(); ErrorOr Name = Sym.getName(); - if (!Name) + if (!Name || + (Sym.getFlags() & (SymbolRef::SF_Absolute | SymbolRef::SF_Common))) continue; SymbolAddresses[*Name] = Address; } diff --git a/tools/dsymutil/MachODebugMapParser.cpp b/tools/dsymutil/MachODebugMapParser.cpp index 33845f40cba..02c3ab07f6a 100644 --- a/tools/dsymutil/MachODebugMapParser.cpp +++ b/tools/dsymutil/MachODebugMapParser.cpp @@ -389,8 +389,6 @@ void MachODebugMapParser::handleStabSymbolTableEntry(uint32_t StringIndex, if (ObjectSymIt == CurrentObjectAddresses.end()) return Warning("could not find object file symbol for symbol " + Twine(Name)); - if (!ObjectSymIt->getValue()) - return; if (!CurrentDebugMapObject->addSymbol(Name, ObjectSymIt->getValue(), Value, Size)) return Warning(Twine("failed to insert symbol '") + Name + @@ -407,12 +405,15 @@ void MachODebugMapParser::loadCurrentObjectFileSymbols( ErrorOr Name = Sym.getName(); if (!Name) continue; - // Objective-C on i386 uses artificial absolute symbols to - // perform some link time checks. Those symbols have a fixed 0 - // address that might conflict with real symbols in the object - // file. As I cannot see a way for absolute symbols to find - // their way into the debug information, let's just ignore those. - if (Sym.getFlags() & SymbolRef::SF_Absolute) + // The value of some categories of symbols isn't meaningful. For + // example common symbols store their size in the value field, not + // their address. Absolute symbols have a fixed address that can + // conflict with standard symbols. These symbols (especially the + // common ones), might still be referenced by relocations. These + // relocations will use the symbol itself, and won't need an + // object file address. The object file address field is optional + // in the DebugMap, leave it unassigned for these symbols. + if (Sym.getFlags() & (SymbolRef::SF_Absolute | SymbolRef::SF_Common)) CurrentObjectAddresses[*Name] = None; else CurrentObjectAddresses[*Name] = Addr; From 2875e282c8f9a3a28f65e22ae6341c27e8164da8 Mon Sep 17 00:00:00 2001 From: Frederic Riss Date: Sun, 31 Jan 2016 04:39:16 +0000 Subject: [PATCH 0102/1132] [dsymutil] Fix FileCheck command. Damn case-insensitive filesystem... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259319 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 79c213f615de77eb4b72279e2e3f654a977462ab) --- test/tools/dsymutil/X86/common-sym.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tools/dsymutil/X86/common-sym.test b/test/tools/dsymutil/X86/common-sym.test index a3ba419479a..a5e4e73bf8b 100644 --- a/test/tools/dsymutil/X86/common-sym.test +++ b/test/tools/dsymutil/X86/common-sym.test @@ -1,4 +1,4 @@ -RUN: llvm-dsymutil -oso-prepend-path %p/.. %p/../Inputs/common.macho.x86_64 -f -o - | llvm-dwarfdump -debug-dump=info - | Filecheck %s +RUN: llvm-dsymutil -oso-prepend-path %p/.. %p/../Inputs/common.macho.x86_64 -f -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s The test was compiled from a single source: $ cat common.c From 1148ec300fdd35f25d8eedf1ac0649d19419e6e0 Mon Sep 17 00:00:00 2001 From: Frederic Riss Date: Sun, 31 Jan 2016 22:06:35 +0000 Subject: [PATCH 0103/1132] [MCDwarf] Fix encoding of line tables with weird custom parameters With poorly chosen custom parameters, the line table encoding logic would sometimes end up generating a special opcode bigger than 255, which is wrong. The set of default parameters that LLVM uses isn't subject to this bug. When carefully chosing the line table parameters, it's impossible to fall into the corner case that this patch fixes. The standard however doesn't require that these parameters be carefully chosen. And even if it did, we shouldn't generate broken encoding. Add a unittest for this specific encoding bug, and while at it, create some unit tests for the encoding logic using different sets of parameters. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259334 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit f87dc438fd28550abfe9692daa50aaa331e7cf1a) --- lib/MC/MCDwarf.cpp | 7 +- unittests/MC/CMakeLists.txt | 1 + unittests/MC/DwarfLineTables.cpp | 179 +++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+), 2 deletions(-) create mode 100644 unittests/MC/DwarfLineTables.cpp diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index dafa7683b1a..f815c9995e1 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -452,7 +452,8 @@ void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params, // If the line increment is out of range of a special opcode, we must encode // it with DW_LNS_advance_line. - if (Temp >= Params.DWARF2LineRange) { + if (Temp >= Params.DWARF2LineRange || + Temp + Params.DWARF2LineOpcodeBase > 255) { OS << char(dwarf::DW_LNS_advance_line); encodeSLEB128(LineDelta, OS); @@ -494,8 +495,10 @@ void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params, if (NeedCopy) OS << char(dwarf::DW_LNS_copy); - else + else { + assert(Temp <= 255 && "Buggy special opcode encoding."); OS << char(Temp); + } } // Utility function to write a tuple for .debug_abbrev. diff --git a/unittests/MC/CMakeLists.txt b/unittests/MC/CMakeLists.txt index f83eaf4779f..9d68e783a55 100644 --- a/unittests/MC/CMakeLists.txt +++ b/unittests/MC/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(MCTests Disassembler.cpp + DwarfLineTables.cpp StringTableBuilderTest.cpp YAMLTest.cpp ) diff --git a/unittests/MC/DwarfLineTables.cpp b/unittests/MC/DwarfLineTables.cpp new file mode 100644 index 00000000000..4bfb5acea03 --- /dev/null +++ b/unittests/MC/DwarfLineTables.cpp @@ -0,0 +1,179 @@ +//===- llvm/unittest/MC/DwarfLineTables.cpp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Dwarf.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { +struct Context { + const char *Triple = "x86_64-pc-linux"; + std::unique_ptr MRI; + std::unique_ptr MAI; + std::unique_ptr Ctx; + + Context() { + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllDisassemblers(); + + // If we didn't build x86, do not run the test. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); + if (!TheTarget) + return; + + MRI.reset(TheTarget->createMCRegInfo(Triple)); + MAI.reset(TheTarget->createMCAsmInfo(*MRI, Triple)); + Ctx = llvm::make_unique(MAI.get(), MRI.get(), nullptr); + } + + operator bool() { return Ctx.get(); } + operator MCContext &() { return *Ctx; }; +}; + +Context Ctxt; +} + +void verifyEncoding(MCDwarfLineTableParams Params, int LineDelta, int AddrDelta, + ArrayRef ExpectedEncoding) { + SmallString<16> Buffer; + raw_svector_ostream EncodingOS(Buffer); + MCDwarfLineAddr::Encode(Ctxt, Params, LineDelta, AddrDelta, EncodingOS); + ArrayRef Encoding(reinterpret_cast(Buffer.data()), + Buffer.size()); + EXPECT_EQ(ExpectedEncoding, Encoding); +} + +TEST(DwarfLineTables, TestDefaultParams) { + if (!Ctxt) + return; + + MCDwarfLineTableParams Params; + + // Minimal line offset expressible through extended opcode, 0 addr delta + const uint8_t Encoding0[] = {13}; // Special opcode Addr += 0, Line += -5 + verifyEncoding(Params, -5, 0, Encoding0); + + // Maximal line offset expressible through extended opcode, + const uint8_t Encoding1[] = {26}; // Special opcode Addr += 0, Line += +8 + verifyEncoding(Params, 8, 0, Encoding1); + + // Random value in the middle of the special ocode range + const uint8_t Encoding2[] = {146}; // Special opcode Addr += 9, Line += 2 + verifyEncoding(Params, 2, 9, Encoding2); + + // Minimal line offset expressible through extended opcode, max addr delta + const uint8_t Encoding3[] = {251}; // Special opcode Addr += 17, Line += -5 + verifyEncoding(Params, -5, 17, Encoding3); + + // Biggest special opcode + const uint8_t Encoding4[] = {255}; // Special opcode Addr += 17, Line += -1 + verifyEncoding(Params, -1, 17, Encoding4); + + // Line delta outside of the special opcode range, address delta in range + const uint8_t Encoding5[] = {dwarf::DW_LNS_advance_line, 9, + 158}; // Special opcode Addr += 10, Line += 0 + verifyEncoding(Params, 9, 10, Encoding5); + + // Address delta outside of the special opcode range, but small + // enough to do DW_LNS_const_add_pc + special opcode. + const uint8_t Encoding6[] = {dwarf::DW_LNS_const_add_pc, // pc += 17 + 62}; // Special opcode Addr += 3, Line += 2 + verifyEncoding(Params, 2, 20, Encoding6); + + // Address delta big enough to require the use of DW_LNS_advance_pc + // Line delta in special opcode range + const uint8_t Encoding7[] = {dwarf::DW_LNS_advance_pc, 100, + 20}; // Special opcode Addr += 0, Line += 2 + verifyEncoding(Params, 2, 100, Encoding7); + + // No special opcode possible. + const uint8_t Encoding8[] = {dwarf::DW_LNS_advance_line, 20, + dwarf::DW_LNS_advance_pc, 100, + dwarf::DW_LNS_copy}; + verifyEncoding(Params, 20, 100, Encoding8); +} + +TEST(DwarfLineTables, TestCustomParams) { + if (!Ctxt) + return; + + // Some tests against the example values given in the standard. + MCDwarfLineTableParams Params; + Params.DWARF2LineOpcodeBase = 13; + Params.DWARF2LineBase = -3; + Params.DWARF2LineRange = 12; + + // Minimal line offset expressible through extended opcode, 0 addr delta + const uint8_t Encoding0[] = {13}; // Special opcode Addr += 0, Line += -5 + verifyEncoding(Params, -3, 0, Encoding0); + + // Maximal line offset expressible through extended opcode, + const uint8_t Encoding1[] = {24}; // Special opcode Addr += 0, Line += +8 + verifyEncoding(Params, 8, 0, Encoding1); + + // Random value in the middle of the special ocode range + const uint8_t Encoding2[] = {126}; // Special opcode Addr += 9, Line += 2 + verifyEncoding(Params, 2, 9, Encoding2); + + // Minimal line offset expressible through extended opcode, max addr delta + const uint8_t Encoding3[] = {253}; // Special opcode Addr += 20, Line += -3 + verifyEncoding(Params, -3, 20, Encoding3); + + // Biggest special opcode + const uint8_t Encoding4[] = {255}; // Special opcode Addr += 17, Line += -1 + verifyEncoding(Params, -1, 20, Encoding4); + + // Line delta outside of the special opcode range, address delta in range + const uint8_t Encoding5[] = {dwarf::DW_LNS_advance_line, 9, + 136}; // Special opcode Addr += 10, Line += 0 + verifyEncoding(Params, 9, 10, Encoding5); + + // Address delta outside of the special opcode range, but small + // enough to do DW_LNS_const_add_pc + special opcode. + const uint8_t Encoding6[] = {dwarf::DW_LNS_const_add_pc, // pc += 20 + 138}; // Special opcode Addr += 10, Line += 2 + verifyEncoding(Params, 2, 30, Encoding6); + + // Address delta big enough to require the use of DW_LNS_advance_pc + // Line delta in special opcode range + const uint8_t Encoding7[] = {dwarf::DW_LNS_advance_pc, 100, + 18}; // Special opcode Addr += 0, Line += 2 + verifyEncoding(Params, 2, 100, Encoding7); + + // No special opcode possible. + const uint8_t Encoding8[] = {dwarf::DW_LNS_advance_line, 20, + dwarf::DW_LNS_advance_pc, 100, + dwarf::DW_LNS_copy}; + verifyEncoding(Params, 20, 100, Encoding8); +} + +TEST(DwarfLineTables, TestCustomParams2) { + if (!Ctxt) + return; + + // Corner case param values. + MCDwarfLineTableParams Params; + Params.DWARF2LineOpcodeBase = 13; + Params.DWARF2LineBase = 1; + Params.DWARF2LineRange = 255; + + const uint8_t Encoding0[] = {dwarf::DW_LNS_advance_line, 248, 1, + dwarf::DW_LNS_copy}; + verifyEncoding(Params, 248, 0, Encoding0); +} From e336f8eb18b1435c6a921719d26d01afd8734865 Mon Sep 17 00:00:00 2001 From: Frederic Riss Date: Mon, 1 Feb 2016 03:44:22 +0000 Subject: [PATCH 0104/1132] [dsymutil] Support scattered relocs. Although it seems like clang will never emit scattered relocations in the debug information (at least I couldn't find a way), we have too support them for the benefit of other compilers. As clang doesn't generate them, the included testcase was produced from hacked up assembly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259339 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit f768b25cefd09f4c0b2adae72f47c9a15bae74ff) --- test/tools/dsymutil/ARM/dummy-debug-map.map | 15 ++ test/tools/dsymutil/ARM/scattered.c | 12 ++ .../tools/dsymutil/Inputs/scattered-reloc/1.o | Bin 0 -> 1528 bytes .../tools/dsymutil/Inputs/scattered-reloc/1.s | 186 ++++++++++++++++++ tools/dsymutil/DwarfLinker.cpp | 17 +- 5 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 test/tools/dsymutil/ARM/dummy-debug-map.map create mode 100644 test/tools/dsymutil/ARM/scattered.c create mode 100644 test/tools/dsymutil/Inputs/scattered-reloc/1.o create mode 100644 test/tools/dsymutil/Inputs/scattered-reloc/1.s diff --git a/test/tools/dsymutil/ARM/dummy-debug-map.map b/test/tools/dsymutil/ARM/dummy-debug-map.map new file mode 100644 index 00000000000..aa69aeafedb --- /dev/null +++ b/test/tools/dsymutil/ARM/dummy-debug-map.map @@ -0,0 +1,15 @@ +# This is a dummy debug map used for some tests where the contents of the +# map are just an implementation detail. The tests wanting to use that file +# should put all there object files in an explicitely named sub-directory +# of Inputs, and they should be named 1.o, 2.o, ... +# As not finding an object file or symbols isn't a fatal error for dsymutil, +# you can extend this file with as much object files and symbols as needed. + +--- +triple: 'thumbv7-apple-darwin' +objects: + - filename: 1.o + symbols: + - { sym: _bar, objAddr: 0x0, binAddr: 0x10000, size: 0x10 } +... + diff --git a/test/tools/dsymutil/ARM/scattered.c b/test/tools/dsymutil/ARM/scattered.c new file mode 100644 index 00000000000..c84a602063f --- /dev/null +++ b/test/tools/dsymutil/ARM/scattered.c @@ -0,0 +1,12 @@ +RUN: llvm-dsymutil -y %p/dummy-debug-map.map -oso-prepend-path %p/../Inputs/scattered-reloc/ -f -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +// See Inputs/scattered-reloc/scattered.s to see how this test +// actually works. +int bar = 42; + +CHECK: DW_TAG_variable +CHECK-NOT: DW_TAG +CHECK: DW_AT_name{{.*}}"bar" +CHECK-NOT: DW_TAG +CHECK: DW_AT_location{{.*}}<0x05> 03 10 00 01 00 + diff --git a/test/tools/dsymutil/Inputs/scattered-reloc/1.o b/test/tools/dsymutil/Inputs/scattered-reloc/1.o new file mode 100644 index 0000000000000000000000000000000000000000..df5cc9bd3e0bc7e25f3cef8b29f52940cc82e8a8 GIT binary patch literal 1528 zcmbVM%Wl(95FN*HcsLD3Wr2i*45U(vP}DC?aP74afs!KCHw) z*6}{Z-XeJed0bCnpP1?9DHVCIqkfT6wXGMmzMsCaK4YEKS8B(O?9>xcYO`9a_Iy>L3S3 z;vXD#5HvjH+fB+C>E#1ryq9kibtD`uXD_~(td6g!F9TO_4jCIbj{zNl?PfU)G(wK8 zDLA4x!Ps6F!SM(j^@$wGoHpvdN3B^M0XR4xA183kV}F*#k|eRoi@*t7l*h0!MY-0M zyu=-DF9VvU89L8G=O4nr5N$uJZl_Ih Date: Mon, 1 Feb 2016 04:43:14 +0000 Subject: [PATCH 0105/1132] [dsymutil] Skip mach-o paired relocations Noticed while working on scattered relocations. I do not think these relocs can actually happen in the debug_info section, but if they happen the code would mishandle them. Explicitely skip them and warn if we encounter one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259341 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 870f3ff0d284b38c18b38e90eb154c90bb283197) --- tools/dsymutil/DwarfLinker.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp index 1cabc9a66c3..5a7a80576c3 100644 --- a/tools/dsymutil/DwarfLinker.cpp +++ b/tools/dsymutil/DwarfLinker.cpp @@ -1875,6 +1875,26 @@ void DwarfLinker::endDebugObject() { DIEAlloc.Reset(); } +static bool isMachOPairedReloc(uint64_t RelocType, uint64_t Arch) { + switch (Arch) { + case Triple::x86: + return RelocType == MachO::GENERIC_RELOC_SECTDIFF || + RelocType == MachO::GENERIC_RELOC_LOCAL_SECTDIFF; + case Triple::x86_64: + return RelocType == MachO::X86_64_RELOC_SUBTRACTOR; + case Triple::arm: + case Triple::thumb: + return RelocType == MachO::ARM_RELOC_SECTDIFF || + RelocType == MachO::ARM_RELOC_LOCAL_SECTDIFF || + RelocType == MachO::ARM_RELOC_HALF || + RelocType == MachO::ARM_RELOC_HALF_SECTDIFF; + case Triple::aarch64: + return RelocType == MachO::ARM64_RELOC_SUBTRACTOR; + default: + return false; + } +} + /// \brief Iterate over the relocations of the given \p Section and /// store the ones that correspond to debug map entries into the /// ValidRelocs array. @@ -1885,10 +1905,24 @@ findValidRelocsMachO(const object::SectionRef &Section, StringRef Contents; Section.getContents(Contents); DataExtractor Data(Contents, Obj.isLittleEndian(), 0); + bool SkipNext = false; for (const object::RelocationRef &Reloc : Section.relocations()) { + if (SkipNext) { + SkipNext = false; + continue; + } + object::DataRefImpl RelocDataRef = Reloc.getRawDataRefImpl(); MachO::any_relocation_info MachOReloc = Obj.getRelocation(RelocDataRef); + + if (isMachOPairedReloc(Obj.getAnyRelocationType(MachOReloc), + Obj.getArch())) { + SkipNext = true; + Linker.reportWarning(" unsupported relocation in debug_info section."); + continue; + } + unsigned RelocSize = 1 << Obj.getAnyRelocationLength(MachOReloc); uint64_t Offset64 = Reloc.getOffset(); if ((RelocSize != 4 && RelocSize != 8)) { From 39b9976a280b14ebcf6916a14569c82e138d4d90 Mon Sep 17 00:00:00 2001 From: Xinliang David Li Date: Fri, 22 Jan 2016 20:25:56 +0000 Subject: [PATCH 0106/1132] [PGO] Remove use of static variable. /NFC Make the variable a member of the writer trait object owned now by the writer. Also use a different generator interface to pass the infoObject from the writer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258544 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 5c32749d7b5cc03745a310e5917b811c3eeae711) --- include/llvm/ProfileData/InstrProfWriter.h | 7 +++++- lib/ProfileData/InstrProfWriter.cpp | 26 +++++++++++++--------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h index 7e4f6011a39..5c21bd12d10 100644 --- a/include/llvm/ProfileData/InstrProfWriter.h +++ b/include/llvm/ProfileData/InstrProfWriter.h @@ -25,6 +25,8 @@ namespace llvm { /// Writer for instrumentation based profile data. class ProfOStream; +class InstrProfRecordWriterTrait; + class InstrProfWriter { public: typedef SmallDenseMap ProfilingData; @@ -32,9 +34,12 @@ class InstrProfWriter { private: StringMap FunctionData; uint64_t MaxFunctionCount; + // Use raw pointer here for the incomplete type object. + InstrProfRecordWriterTrait *InfoObj; public: - InstrProfWriter() : MaxFunctionCount(0) {} + InstrProfWriter(); + ~InstrProfWriter(); /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 4c7f5de26aa..8557d8083e2 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -71,12 +71,8 @@ class ProfOStream { raw_ostream &OS; support::endian::Writer LE; }; -} - -namespace { -static support::endianness ValueProfDataEndianness = support::little; -class InstrProfRecordTrait { +class InstrProfRecordWriterTrait { public: typedef StringRef key_type; typedef StringRef key_type_ref; @@ -87,6 +83,9 @@ class InstrProfRecordTrait { typedef uint64_t hash_value_type; typedef uint64_t offset_type; + support::endianness ValueProfDataEndianness; + + InstrProfRecordWriterTrait() : ValueProfDataEndianness(support::little) {} static hash_value_type ComputeHash(key_type_ref K) { return IndexedInstrProf::ComputeHash(K); } @@ -114,12 +113,11 @@ class InstrProfRecordTrait { return std::make_pair(N, M); } - static void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N){ + void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N) { Out.write(K.data(), N); } - static void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, - offset_type) { + void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) { using namespace llvm::support; endian::Writer LE(Out); for (const auto &ProfileData : *V) { @@ -141,10 +139,16 @@ class InstrProfRecordTrait { }; } +InstrProfWriter::InstrProfWriter() + : FunctionData(), MaxFunctionCount(0), + InfoObj(new InstrProfRecordWriterTrait()) {} + +InstrProfWriter::~InstrProfWriter() { delete InfoObj; } + // Internal interface for testing purpose only. void InstrProfWriter::setValueProfDataEndianness( support::endianness Endianness) { - ValueProfDataEndianness = Endianness; + InfoObj->ValueProfDataEndianness = Endianness; } std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, @@ -181,7 +185,7 @@ std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, } void InstrProfWriter::writeImpl(ProfOStream &OS) { - OnDiskChainedHashTableGenerator Generator; + OnDiskChainedHashTableGenerator Generator; // Populate the hash table generator. for (const auto &I : FunctionData) Generator.insert(I.getKey(), &I.getValue()); @@ -205,7 +209,7 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) { // Reserve the space for HashOffset field. OS.write(0); // Write the hash table. - uint64_t HashTableStart = Generator.Emit(OS.OS); + uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); // Now do the final patch: PatchItem PatchItems[1] = {{HashTableStartLoc, &HashTableStart, 1}}; From 361153a94877c34e1dcb8e7d760ce4f24c409293 Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Fri, 29 Jan 2016 22:54:45 +0000 Subject: [PATCH 0107/1132] [Profiling] Add a -sparse mode to llvm-profdata merge Add an option to llvm-profdata merge for writing out sparse indexed profiles. These profiles omit InstrProfRecords for functions which are never executed. Differential Revision: http://reviews.llvm.org/D16727 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259258 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 0c94d7d4410f65d70b10709b98cbe5cc3208b1b0) --- docs/CommandGuide/llvm-profdata.rst | 6 ++ include/llvm/ProfileData/InstrProfWriter.h | 5 +- lib/ProfileData/InstrProfWriter.cpp | 30 +++++++-- test/tools/llvm-profdata/general.proftext | 18 +++--- tools/llvm-profdata/llvm-profdata.cpp | 10 ++- unittests/ProfileData/CoverageMappingTest.cpp | 37 +++++++---- unittests/ProfileData/InstrProfTest.cpp | 61 ++++++++++++++----- 7 files changed, 124 insertions(+), 43 deletions(-) diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst index 74fe4ee9d21..7f647ef1c47 100644 --- a/docs/CommandGuide/llvm-profdata.rst +++ b/docs/CommandGuide/llvm-profdata.rst @@ -90,6 +90,12 @@ OPTIONS Emit the profile using GCC's gcov format (Not yet supported). + .. option:: -sparse[=true|false] + + Do not emit function records with 0 execution count. Can only be used in + conjunction with -instr. Defaults to false, since it can inhibit compiler + optimization during PGO. + EXAMPLES ^^^^^^^^ Basic Usage diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h index 5c21bd12d10..d751df33bd1 100644 --- a/include/llvm/ProfileData/InstrProfWriter.h +++ b/include/llvm/ProfileData/InstrProfWriter.h @@ -32,13 +32,14 @@ class InstrProfWriter { typedef SmallDenseMap ProfilingData; private: + bool Sparse; StringMap FunctionData; uint64_t MaxFunctionCount; // Use raw pointer here for the incomplete type object. InstrProfRecordWriterTrait *InfoObj; public: - InstrProfWriter(); + InstrProfWriter(bool Sparse = false); ~InstrProfWriter(); /// Add function counts for the given function. If there are already counts @@ -57,8 +58,10 @@ class InstrProfWriter { // Internal interface for testing purpose only. void setValueProfDataEndianness(support::endianness Endianness); + void setOutputSparse(bool Sparse); private: + bool shouldEncodeData(const ProfilingData &PD); void writeImpl(ProfOStream &OS); }; diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 8557d8083e2..204d3403c8c 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -139,8 +139,8 @@ class InstrProfRecordWriterTrait { }; } -InstrProfWriter::InstrProfWriter() - : FunctionData(), MaxFunctionCount(0), +InstrProfWriter::InstrProfWriter(bool Sparse) + : Sparse(Sparse), FunctionData(), MaxFunctionCount(0), InfoObj(new InstrProfRecordWriterTrait()) {} InstrProfWriter::~InstrProfWriter() { delete InfoObj; } @@ -150,6 +150,9 @@ void InstrProfWriter::setValueProfDataEndianness( support::endianness Endianness) { InfoObj->ValueProfDataEndianness = Endianness; } +void InstrProfWriter::setOutputSparse(bool Sparse) { + this->Sparse = Sparse; +} std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) { @@ -184,11 +187,24 @@ std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, return Result; } +bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { + if (!Sparse) + return true; + for (const auto &Func : PD) { + const InstrProfRecord &IPR = Func.second; + if (std::any_of(IPR.Counts.begin(), IPR.Counts.end(), + [](uint64_t Count) { return Count > 0; })) + return true; + } + return false; +} + void InstrProfWriter::writeImpl(ProfOStream &OS) { OnDiskChainedHashTableGenerator Generator; // Populate the hash table generator. for (const auto &I : FunctionData) - Generator.insert(I.getKey(), &I.getValue()); + if (shouldEncodeData(I.getValue())) + Generator.insert(I.getKey(), &I.getValue()); // Write the header. IndexedInstrProf::Header Header; Header.Magic = IndexedInstrProf::Magic; @@ -279,10 +295,12 @@ void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, void InstrProfWriter::writeText(raw_fd_ostream &OS) { InstrProfSymtab Symtab; for (const auto &I : FunctionData) - Symtab.addFuncName(I.getKey()); + if (shouldEncodeData(I.getValue())) + Symtab.addFuncName(I.getKey()); Symtab.finalizeSymtab(); for (const auto &I : FunctionData) - for (const auto &Func : I.getValue()) - writeRecordInText(Func.second, Symtab, OS); + if (shouldEncodeData(I.getValue())) + for (const auto &Func : I.getValue()) + writeRecordInText(Func.second, Symtab, OS); } diff --git a/test/tools/llvm-profdata/general.proftext b/test/tools/llvm-profdata/general.proftext index 574effde5e4..3c62c200899 100644 --- a/test/tools/llvm-profdata/general.proftext +++ b/test/tools/llvm-profdata/general.proftext @@ -1,6 +1,6 @@ +# RUN: llvm-profdata merge -sparse=true %s -o %t.profdata - -# RUN: llvm-profdata merge %s -o %t.profdata +# RUN: llvm-profdata merge -sparse=false %s -o %t.profdata.dense # RUN: llvm-profdata show %t.profdata --function function_count_only --counts | FileCheck %s -check-prefix=FUNC_COUNT_ONLY function_count_only @@ -12,7 +12,8 @@ function_count_only # FUNC_COUNT_ONLY-NEXT: Function count: 97531 # FUNC_COUNT_ONLY-NEXT: Block counts: [] -# RUN: llvm-profdata show %t.profdata --function "name with spaces" --counts | FileCheck %s -check-prefix=SPACES +# RUN: llvm-profdata show %t.profdata.dense --function "name with spaces" --counts | FileCheck %s -check-prefix=SPACES +# RUN: llvm-profdata show %t.profdata --function "name with spaces" --counts | FileCheck %s --check-prefix=SPARSE_SPACES name with spaces 1024 2 @@ -22,6 +23,7 @@ name with spaces # SPACES-NEXT: Counters: 2 # SPACES-NEXT: Function count: 0 # SPACES-NEXT: Block counts: [0] +# SPARSE_SPACES-NOT: Function count: 0 # RUN: llvm-profdata show %t.profdata --function large_numbers --counts | FileCheck %s -check-prefix=LARGENUM large_numbers @@ -38,7 +40,7 @@ large_numbers # LARGENUM-NEXT: Function count: 2305843009213693952 # LARGENUM-NEXT: Block counts: [1152921504606846976, 576460752303423488, 288230376151711744, 144115188075855872, 72057594037927936] -# RUN: llvm-profdata show %t.profdata --function hex_hash | FileCheck %s -check-prefix=HEX-HASH +# RUN: llvm-profdata show %t.profdata.dense --function hex_hash | FileCheck %s -check-prefix=HEX-HASH hex_hash 0x1234 1 @@ -51,19 +53,21 @@ hex_hash # NOSUCHFUNC: Functions shown: 0 # RUN: llvm-profdata show %t.profdata --function _ | FileCheck %s -check-prefix=SOMEFUNCS +# RUN: llvm-profdata show %t.profdata.dense --function _ | FileCheck %s -check-prefix=SOMEFUNCS_DENSE # SOMEFUNCS: Counters: # SOMEFUNCS: function_count_only: # SOMEFUNCS: large_numbers: -# SOMEFUNCS: Functions shown: 3 +# SOMEFUNCS: Functions shown: 2 +# SOMEFUNCS_DENSE: Functions shown: 3 -# RUN: llvm-profdata show %t.profdata | FileCheck %s -check-prefix=SUMMARY +# RUN: llvm-profdata show %t.profdata.dense | FileCheck %s -check-prefix=SUMMARY # SUMMARY-NOT: Counters: # SUMMARY-NOT: Functions shown: # SUMMARY: Total functions: 4 # SUMMARY: Maximum function count: 2305843009213693952 # SUMMARY: Maximum internal block count: 1152921504606846976 -# RUN: llvm-profdata show --detailed-summary %t.profdata | FileCheck %s -check-prefix=DETAILED-SUMMARY +# RUN: llvm-profdata show --detailed-summary %t.profdata.dense | FileCheck %s -check-prefix=DETAILED-SUMMARY # DETAILED-SUMMARY: Detailed summary: # DETAILED-SUMMARY: Total number of blocks: 10 # DETAILED-SUMMARY: Total count: 4539628424389557499 diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp index c6efacb3554..9d926531503 100644 --- a/tools/llvm-profdata/llvm-profdata.cpp +++ b/tools/llvm-profdata/llvm-profdata.cpp @@ -107,7 +107,7 @@ typedef SmallVector WeightedFileVector; static void mergeInstrProfile(const WeightedFileVector &Inputs, StringRef OutputFilename, - ProfileFormat OutputFormat) { + ProfileFormat OutputFormat, bool OutputSparse) { if (OutputFilename.compare("-") == 0) exitWithError("Cannot write indexed profdata format to stdout."); @@ -119,7 +119,7 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, if (EC) exitWithErrorCode(EC, OutputFilename); - InstrProfWriter Writer; + InstrProfWriter Writer(OutputSparse); SmallSet WriterErrorCodes; for (const auto &Input : Inputs) { auto ReaderOrErr = InstrProfReader::create(Input.Filename); @@ -228,6 +228,9 @@ static int merge_main(int argc, const char *argv[]) { "GCC encoding (only meaningful for -sample)"), clEnumValEnd)); + cl::opt OutputSparse("sparse", cl::init(false), + cl::desc("Generate a sparse profile (only meaningful for -instr)")); + cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); if (InputFilenames.empty() && WeightedInputFilenames.empty()) @@ -241,7 +244,8 @@ static int merge_main(int argc, const char *argv[]) { WeightedInputs.push_back(parseWeightedFile(WeightedFilename)); if (ProfileKind == instr) - mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat); + mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat, + OutputSparse); else mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat); diff --git a/unittests/ProfileData/CoverageMappingTest.cpp b/unittests/ProfileData/CoverageMappingTest.cpp index 35b8626c494..c85da9a0f9c 100644 --- a/unittests/ProfileData/CoverageMappingTest.cpp +++ b/unittests/ProfileData/CoverageMappingTest.cpp @@ -92,6 +92,7 @@ struct CoverageMappingTest : ::testing::Test { void SetUp() override { NextFile = 0; + ProfileWriter.setOutputSparse(false); } unsigned getFile(StringRef Name) { @@ -154,7 +155,16 @@ struct CoverageMappingTest : ::testing::Test { } }; -TEST_F(CoverageMappingTest, basic_write_read) { +struct MaybeSparseCoverageMappingTest + : public CoverageMappingTest, + public ::testing::WithParamInterface { + void SetUp() { + CoverageMappingTest::SetUp(); + ProfileWriter.setOutputSparse(GetParam()); + } +}; + +TEST_P(MaybeSparseCoverageMappingTest, basic_write_read) { addCMR(Counter::getCounter(0), "foo", 1, 1, 1, 1); addCMR(Counter::getCounter(1), "foo", 2, 1, 2, 2); addCMR(Counter::getZero(), "foo", 3, 1, 3, 4); @@ -174,7 +184,7 @@ TEST_F(CoverageMappingTest, basic_write_read) { } } -TEST_F(CoverageMappingTest, expansion_gets_first_counter) { +TEST_P(MaybeSparseCoverageMappingTest, expansion_gets_first_counter) { addCMR(Counter::getCounter(1), "foo", 10, 1, 10, 2); // This starts earlier in "foo", so the expansion should get its counter. addCMR(Counter::getCounter(2), "foo", 1, 1, 20, 1); @@ -187,7 +197,7 @@ TEST_F(CoverageMappingTest, expansion_gets_first_counter) { ASSERT_EQ(3U, OutputCMRs[2].LineStart); } -TEST_F(CoverageMappingTest, basic_coverage_iteration) { +TEST_P(MaybeSparseCoverageMappingTest, basic_coverage_iteration) { InstrProfRecord Record("func", 0x1234, {30, 20, 10, 0}); ProfileWriter.addRecord(std::move(Record)); readProfCounts(); @@ -210,7 +220,7 @@ TEST_F(CoverageMappingTest, basic_coverage_iteration) { ASSERT_EQ(CoverageSegment(11, 11, false), Segments[6]); } -TEST_F(CoverageMappingTest, uncovered_function) { +TEST_P(MaybeSparseCoverageMappingTest, uncovered_function) { readProfCounts(); addCMR(Counter::getZero(), "file1", 1, 2, 3, 4); @@ -223,7 +233,7 @@ TEST_F(CoverageMappingTest, uncovered_function) { ASSERT_EQ(CoverageSegment(3, 4, false), Segments[1]); } -TEST_F(CoverageMappingTest, uncovered_function_with_mapping) { +TEST_P(MaybeSparseCoverageMappingTest, uncovered_function_with_mapping) { readProfCounts(); addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9); @@ -238,7 +248,7 @@ TEST_F(CoverageMappingTest, uncovered_function_with_mapping) { ASSERT_EQ(CoverageSegment(9, 9, false), Segments[2]); } -TEST_F(CoverageMappingTest, combine_regions) { +TEST_P(MaybeSparseCoverageMappingTest, combine_regions) { InstrProfRecord Record("func", 0x1234, {10, 20, 30}); ProfileWriter.addRecord(std::move(Record)); readProfCounts(); @@ -257,9 +267,11 @@ TEST_F(CoverageMappingTest, combine_regions) { ASSERT_EQ(CoverageSegment(9, 9, false), Segments[3]); } -TEST_F(CoverageMappingTest, dont_combine_expansions) { - InstrProfRecord Record("func", 0x1234, {10, 20}); - ProfileWriter.addRecord(std::move(Record)); +TEST_P(MaybeSparseCoverageMappingTest, dont_combine_expansions) { + InstrProfRecord Record1("func", 0x1234, {10, 20}); + InstrProfRecord Record2("func", 0x1234, {0, 0}); + ProfileWriter.addRecord(std::move(Record1)); + ProfileWriter.addRecord(std::move(Record2)); readProfCounts(); addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9); @@ -277,8 +289,8 @@ TEST_F(CoverageMappingTest, dont_combine_expansions) { ASSERT_EQ(CoverageSegment(9, 9, false), Segments[3]); } -TEST_F(CoverageMappingTest, strip_filename_prefix) { - InstrProfRecord Record("file1:func", 0x1234, {10}); +TEST_P(MaybeSparseCoverageMappingTest, strip_filename_prefix) { + InstrProfRecord Record("file1:func", 0x1234, {0}); ProfileWriter.addRecord(std::move(Record)); readProfCounts(); @@ -292,4 +304,7 @@ TEST_F(CoverageMappingTest, strip_filename_prefix) { ASSERT_EQ("func", Names[0]); } +INSTANTIATE_TEST_CASE_P(MaybeSparse, MaybeSparseCoverageMappingTest, + ::testing::Bool()); + } // end anonymous namespace diff --git a/unittests/ProfileData/InstrProfTest.cpp b/unittests/ProfileData/InstrProfTest.cpp index 51f52f2a077..8c12f47dabe 100644 --- a/unittests/ProfileData/InstrProfTest.cpp +++ b/unittests/ProfileData/InstrProfTest.cpp @@ -37,6 +37,8 @@ struct InstrProfTest : ::testing::Test { InstrProfWriter Writer; std::unique_ptr Reader; + void SetUp() { Writer.setOutputSparse(false); } + void readProfile(std::unique_ptr Profile) { auto ReaderOrErr = IndexedInstrProfReader::create(std::move(Profile)); ASSERT_TRUE(NoError(ReaderOrErr.getError())); @@ -44,13 +46,24 @@ struct InstrProfTest : ::testing::Test { } }; -TEST_F(InstrProfTest, write_and_read_empty_profile) { +struct SparseInstrProfTest : public InstrProfTest { + void SetUp() { Writer.setOutputSparse(true); } +}; + +struct MaybeSparseInstrProfTest : public InstrProfTest, + public ::testing::WithParamInterface { + void SetUp() { + Writer.setOutputSparse(GetParam()); + } +}; + +TEST_P(MaybeSparseInstrProfTest, write_and_read_empty_profile) { auto Profile = Writer.writeBuffer(); readProfile(std::move(Profile)); ASSERT_TRUE(Reader->begin() == Reader->end()); } -TEST_F(InstrProfTest, write_and_read_one_function) { +TEST_P(MaybeSparseInstrProfTest, write_and_read_one_function) { InstrProfRecord Record("foo", 0x1234, {1, 2, 3, 4}); Writer.addRecord(std::move(Record)); auto Profile = Writer.writeBuffer(); @@ -68,7 +81,7 @@ TEST_F(InstrProfTest, write_and_read_one_function) { ASSERT_TRUE(++I == E); } -TEST_F(InstrProfTest, get_instr_prof_record) { +TEST_P(MaybeSparseInstrProfTest, get_instr_prof_record) { InstrProfRecord Record1("foo", 0x1234, {1, 2}); InstrProfRecord Record2("foo", 0x1235, {3, 4}); Writer.addRecord(std::move(Record1)); @@ -95,7 +108,7 @@ TEST_F(InstrProfTest, get_instr_prof_record) { ASSERT_TRUE(ErrorEquals(instrprof_error::unknown_function, R.getError())); } -TEST_F(InstrProfTest, get_function_counts) { +TEST_P(MaybeSparseInstrProfTest, get_function_counts) { InstrProfRecord Record1("foo", 0x1234, {1, 2}); InstrProfRecord Record2("foo", 0x1235, {3, 4}); Writer.addRecord(std::move(Record1)); @@ -122,7 +135,7 @@ TEST_F(InstrProfTest, get_function_counts) { ASSERT_TRUE(ErrorEquals(instrprof_error::unknown_function, EC)); } -TEST_F(InstrProfTest, get_icall_data_read_write) { +TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write) { InstrProfRecord Record1("caller", 0x1234, {1, 2}); InstrProfRecord Record2("callee1", 0x1235, {3, 4}); InstrProfRecord Record3("callee2", 0x1235, {3, 4}); @@ -169,7 +182,7 @@ TEST_F(InstrProfTest, get_icall_data_read_write) { ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1")); } -TEST_F(InstrProfTest, get_icall_data_read_write_with_weight) { +TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write_with_weight) { InstrProfRecord Record1("caller", 0x1234, {1, 2}); InstrProfRecord Record2("callee1", 0x1235, {3, 4}); InstrProfRecord Record3("callee2", 0x1235, {3, 4}); @@ -215,7 +228,7 @@ TEST_F(InstrProfTest, get_icall_data_read_write_with_weight) { ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1")); } -TEST_F(InstrProfTest, get_icall_data_read_write_big_endian) { +TEST_P(MaybeSparseInstrProfTest, get_icall_data_read_write_big_endian) { InstrProfRecord Record1("caller", 0x1234, {1, 2}); InstrProfRecord Record2("callee1", 0x1235, {3, 4}); InstrProfRecord Record3("callee2", 0x1235, {3, 4}); @@ -267,7 +280,7 @@ TEST_F(InstrProfTest, get_icall_data_read_write_big_endian) { Writer.setValueProfDataEndianness(support::little); } -TEST_F(InstrProfTest, get_icall_data_merge1) { +TEST_P(MaybeSparseInstrProfTest, get_icall_data_merge1) { static const char caller[] = "caller"; static const char callee1[] = "callee1"; static const char callee2[] = "callee2"; @@ -382,7 +395,7 @@ TEST_F(InstrProfTest, get_icall_data_merge1) { ASSERT_EQ(2U, VD_4[2].Count); } -TEST_F(InstrProfTest, get_icall_data_merge1_saturation) { +TEST_P(MaybeSparseInstrProfTest, get_icall_data_merge1_saturation) { static const char bar[] = "bar"; const uint64_t Max = std::numeric_limits::max(); @@ -436,7 +449,7 @@ TEST_F(InstrProfTest, get_icall_data_merge1_saturation) { // This test tests that when there are too many values // for a given site, the merged results are properly // truncated. -TEST_F(InstrProfTest, get_icall_data_merge_site_trunc) { +TEST_P(MaybeSparseInstrProfTest, get_icall_data_merge_site_trunc) { static const char caller[] = "caller"; InstrProfRecord Record11(caller, 0x1234, {1, 2}); @@ -504,7 +517,7 @@ ValueProfNode Site4Values[2] = {{{uint64_t("callee2"), 1800}, &Site4Values[1]}, static ValueProfNode *ValueProfNodes[5] = {&Site1Values[0], &Site2Values[0], &Site3Values[0], &Site4Values[0], 0}; static uint16_t NumValueSites[IPVK_Last + 1] = {5}; -TEST_F(InstrProfTest, runtime_value_prof_data_read_write) { +TEST_P(MaybeSparseInstrProfTest, runtime_value_prof_data_read_write) { ValueProfRuntimeRecord RTRecord; initializeValueProfRuntimeRecord(&RTRecord, &NumValueSites[0], &ValueProfNodes[0]); @@ -574,7 +587,7 @@ TEST_F(InstrProfTest, runtime_value_prof_data_read_write) { free(VPData); } -TEST_F(InstrProfTest, get_max_function_count) { +TEST_P(MaybeSparseInstrProfTest, get_max_function_count) { InstrProfRecord Record1("foo", 0x1234, {1ULL << 31, 2}); InstrProfRecord Record2("bar", 0, {1ULL << 63}); InstrProfRecord Record3("baz", 0x5678, {0, 0, 0, 0}); @@ -587,7 +600,7 @@ TEST_F(InstrProfTest, get_max_function_count) { ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount()); } -TEST_F(InstrProfTest, get_weighted_function_counts) { +TEST_P(MaybeSparseInstrProfTest, get_weighted_function_counts) { InstrProfRecord Record1("foo", 0x1234, {1, 2}); InstrProfRecord Record2("foo", 0x1235, {3, 4}); Writer.addRecord(std::move(Record1), 3); @@ -607,7 +620,7 @@ TEST_F(InstrProfTest, get_weighted_function_counts) { ASSERT_EQ(20U, Counts[1]); } -TEST_F(InstrProfTest, instr_prof_symtab_test) { +TEST_P(MaybeSparseInstrProfTest, instr_prof_symtab_test) { std::vector FuncNames; FuncNames.push_back("func1"); FuncNames.push_back("func2"); @@ -658,7 +671,7 @@ TEST_F(InstrProfTest, instr_prof_symtab_test) { ASSERT_EQ(StringRef("bar3"), R); } -TEST_F(InstrProfTest, instr_prof_symtab_compression_test) { +TEST_P(MaybeSparseInstrProfTest, instr_prof_symtab_compression_test) { std::vector FuncNames1; std::vector FuncNames2; for (int I = 0; I < 10 * 1024; I++) { @@ -731,4 +744,22 @@ TEST_F(InstrProfTest, instr_prof_symtab_compression_test) { } } +TEST_F(SparseInstrProfTest, preserve_no_records) { + InstrProfRecord Record1("foo", 0x1234, {0}); + InstrProfRecord Record2("bar", 0x4321, {0, 0}); + InstrProfRecord Record3("bar", 0x4321, {0, 0, 0}); + + Writer.addRecord(std::move(Record1)); + Writer.addRecord(std::move(Record2)); + Writer.addRecord(std::move(Record3)); + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + auto I = Reader->begin(), E = Reader->end(); + ASSERT_TRUE(I == E); +} + +INSTANTIATE_TEST_CASE_P(MaybeSparse, MaybeSparseInstrProfTest, + ::testing::Bool()); + } // end anonymous namespace From f45551cab84750e767b6ae5863d74e81d985f59f Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 19 Jan 2016 16:57:08 +0000 Subject: [PATCH 0108/1132] Simplify MCFillFragment. The value size was always 1 or 0, so we don't need to store it. In a no asserts build this takes the testcase of pr26208 from 11 to 10 seconds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258141 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 9f9435ed260c07bfe732e00217d80e1df840fff8) --- include/llvm/MC/MCFragment.h | 29 ++++++----------------------- lib/MC/MCAssembler.cpp | 16 +++------------- lib/MC/MCFragment.cpp | 3 +-- lib/MC/MCMachOStreamer.cpp | 2 +- lib/MC/MCObjectStreamer.cpp | 4 ++-- lib/MC/WinCOFFStreamer.cpp | 2 +- 6 files changed, 14 insertions(+), 42 deletions(-) diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h index 7d6db525ce6..e51ee90e3e6 100644 --- a/include/llvm/MC/MCFragment.h +++ b/include/llvm/MC/MCFragment.h @@ -321,36 +321,19 @@ class MCAlignFragment : public MCFragment { class MCFillFragment : public MCFragment { - /// Value - Value to use for filling bytes. - int64_t Value; - - /// ValueSize - The size (in bytes) of \p Value to use when filling, or 0 if - /// this is a virtual fill fragment. - unsigned ValueSize; + /// Value to use for filling bytes. + uint8_t Value; - /// Size - The number of bytes to insert. + /// The number of bytes to insert. uint64_t Size; public: - MCFillFragment(int64_t Value, unsigned ValueSize, uint64_t Size, - MCSection *Sec = nullptr) - : MCFragment(FT_Fill, false, 0, Sec), Value(Value), ValueSize(ValueSize), - Size(Size) { - assert((!ValueSize || (Size % ValueSize) == 0) && - "Fill size must be a multiple of the value size!"); - } - - /// \name Accessors - /// @{ - - int64_t getValue() const { return Value; } - - unsigned getValueSize() const { return ValueSize; } + MCFillFragment(uint8_t Value, uint64_t Size, MCSection *Sec = nullptr) + : MCFragment(FT_Fill, false, 0, Sec), Value(Value), Size(Size) {} + uint8_t getValue() const { return Value; } uint64_t getSize() const { return Size; } - /// @} - static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_Fill; } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 15e82fa4938..a88e3df88ff 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -489,17 +489,8 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, ++stats::EmittedFillFragments; const MCFillFragment &FF = cast(F); - assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!"); - - for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) { - switch (FF.getValueSize()) { - default: llvm_unreachable("Invalid size!"); - case 1: OW->write8 (uint8_t (FF.getValue())); break; - case 2: OW->write16(uint16_t(FF.getValue())); break; - case 4: OW->write32(uint32_t(FF.getValue())); break; - case 8: OW->write64(uint64_t(FF.getValue())); break; - } - } + for (uint64_t I = 0, E = FF.getSize(); I != E; ++I) + OW->write8(FF.getValue()); break; } @@ -578,8 +569,7 @@ void MCAssembler::writeSectionData(const MCSection *Sec, "Invalid align in virtual section!"); break; case MCFragment::FT_Fill: - assert((cast(F).getValueSize() == 0 || - cast(F).getValue() == 0) && + assert((cast(F).getValue() == 0) && "Invalid fill in virtual section!"); break; } diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp index efdb7049203..09570d7fbe4 100644 --- a/lib/MC/MCFragment.cpp +++ b/lib/MC/MCFragment.cpp @@ -386,8 +386,7 @@ void MCFragment::dump() { } case MCFragment::FT_Fill: { const MCFillFragment *FF = cast(this); - OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize() - << " Size:" << FF->getSize(); + OS << " Value:" << FF->getValue() << " Size:" << FF->getSize(); break; } case MCFragment::FT_Relaxable: { diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 91cec6b5e03..5775245a5da 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -418,7 +418,7 @@ void MCMachOStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, if (ByteAlignment != 1) new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section); - MCFragment *F = new MCFillFragment(0, 0, Size, Section); + MCFragment *F = new MCFillFragment(0, Size, Section); Symbol->setFragment(F); // Update the maximum alignment on the zero fill section if necessary. diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 972610ac8d6..8ee24786967 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -436,9 +436,9 @@ bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name, void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) { const MCSection *Sec = getCurrentSection().first; + (void)Sec; assert(Sec && "need a section"); - unsigned ItemSize = Sec->isVirtualSection() ? 0 : 1; - insert(new MCFillFragment(FillValue, ItemSize, NumBytes)); + insert(new MCFillFragment(FillValue, NumBytes)); } void MCObjectStreamer::FinishImpl() { diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index a38b1a41a9b..f9d231921d5 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -258,7 +258,7 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, ByteAlignment, Section); MCFillFragment *Fragment = new MCFillFragment( - /*Value=*/0, /*ValueSize=*/0, Size, Section); + /*Value=*/0, Size, Section); Symbol->setFragment(Fragment); } From 1e175257bbd93805becab24ee90dfca92e3fd8da Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 19 Jan 2016 17:47:48 +0000 Subject: [PATCH 0109/1132] Use larger write sizes for MCFillFragment. This brings the pr26208 testcase down to 3.2 seconds. Not checking it in since it does create a 4GB .o file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258149 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit be7a41bf5d0e689c16e70e2a6b22c3c19b98f18b) --- lib/MC/MCAssembler.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index a88e3df88ff..6965b1b037f 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -488,9 +488,20 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, case MCFragment::FT_Fill: { ++stats::EmittedFillFragments; const MCFillFragment &FF = cast(F); - - for (uint64_t I = 0, E = FF.getSize(); I != E; ++I) - OW->write8(FF.getValue()); + uint8_t V = FF.getValue(); + const unsigned MaxChunkSize = 16; + char Data[MaxChunkSize]; + memcpy(Data, &V, 1); + for (unsigned I = 1; I < MaxChunkSize; ++I) + Data[I] = Data[0]; + + uint64_t Size = FF.getSize(); + for (unsigned ChunkSize = MaxChunkSize; ChunkSize; ChunkSize /= 2) { + StringRef Ref(Data, ChunkSize); + for (uint64_t I = 0, E = Size / ChunkSize; I != E; ++I) + OW->writeBytes(Ref); + Size = Size % ChunkSize; + } break; } From 7a4fa64711250111fd0f93250556966140a0f8a8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 19 Jan 2016 22:24:12 +0000 Subject: [PATCH 0110/1132] [X86][SSE] Add INSERTPS target shuffle combines. As vector shuffles can only reference two inputs many (V)INSERTPS patterns end up being split over two targets shuffles. This patch adds combines to attempt to combine (V)INSERTPS nodes with input/output nodes that are just zeroing out these additional vector elements. Differential Revision: http://reviews.llvm.org/D16072 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258205 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 250273a9cef868e7fa5c3ad1cd88f4bdd17bb5e5) --- lib/Target/X86/X86ISelLowering.cpp | 137 ++++++++++++++++++ test/CodeGen/X86/insertps-combine.ll | 16 +- .../X86/merge-consecutive-loads-128.ll | 12 +- test/CodeGen/X86/vector-shuffle-128-v4.ll | 8 - 4 files changed, 145 insertions(+), 28 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8571311a79b..ebbb44695a0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -23704,6 +23704,52 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, return true; } +/// Check a target shuffle mask's inputs to see if we can set any values to +/// SM_SentinelZero - this is for elements that are known to be zero +/// (not just zeroable) from their inputs. +static bool setTargetShuffleZeroElements(SDValue N, + SmallVectorImpl &Mask) { + bool IsUnary; + if (!isTargetShuffle(N.getOpcode())) + return false; + if (!getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), true, Mask, + IsUnary)) + return false; + + SDValue V1 = N.getOperand(0); + SDValue V2 = IsUnary ? V1 : N.getOperand(1); + + while (V1.getOpcode() == ISD::BITCAST) + V1 = V1->getOperand(0); + while (V2.getOpcode() == ISD::BITCAST) + V2 = V2->getOperand(0); + + for (int i = 0, Size = Mask.size(); i != Size; ++i) { + int M = Mask[i]; + + // Already decoded as SM_SentinelZero / SM_SentinelUndef. + if (M < 0) + continue; + + SDValue V = M < Size ? V1 : V2; + + // We are referencing an UNDEF input. + if (V.isUndef()) { + Mask[i] = SM_SentinelUndef; + continue; + } + + // TODO - handle the Size != (int)V.getNumOperands() cases in future. + if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands()) + continue; + if (!X86::isZeroNode(V.getOperand(M % Size))) + continue; + Mask[i] = SM_SentinelZero; + } + + return true; +} + /// \brief Try to combine x86 target specific shuffles. static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -23777,6 +23823,96 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask); } + // Attempt to merge blend(insertps(x,y),zero). + if (V0.getOpcode() == X86ISD::INSERTPS || + V1.getOpcode() == X86ISD::INSERTPS) { + assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32"); + + // Determine which elements are known to be zero. + SmallVector TargetMask; + if (!setTargetShuffleZeroElements(N, TargetMask)) + return SDValue(); + + // Helper function to take inner insertps node and attempt to + // merge the blend with zero into its zero mask. + auto MergeInsertPSAndBlend = [&](SDValue V, int Offset) { + if (V.getOpcode() != X86ISD::INSERTPS) + return SDValue(); + SDValue Op0 = V.getOperand(0); + SDValue Op1 = V.getOperand(1); + SDValue Op2 = V.getOperand(2); + unsigned InsertPSMask = cast(Op2)->getZExtValue(); + + // Check each element of the blend node's target mask - must either + // be zeroable (and update the zero mask) or selects the element from + // the inner insertps node. + for (int i = 0; i != 4; ++i) + if (TargetMask[i] < 0) + InsertPSMask |= (1u << i); + else if (TargetMask[i] != (i + Offset)) + return SDValue(); + return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, Op0, Op1, + DAG.getConstant(InsertPSMask, DL, MVT::i8)); + }; + + if (SDValue V = MergeInsertPSAndBlend(V0, 0)) + return V; + if (SDValue V = MergeInsertPSAndBlend(V1, 4)) + return V; + } + return SDValue(); + } + case X86ISD::INSERTPS: { + assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32"); + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); + SDValue Op2 = N.getOperand(2); + unsigned InsertPSMask = cast(Op2)->getZExtValue(); + unsigned DstIdx = (InsertPSMask >> 4) & 3; + + // Attempt to merge insertps with an inner target shuffle node. + SmallVector TargetMask; + if (!setTargetShuffleZeroElements(Op0, TargetMask)) + return SDValue(); + + bool Updated = false; + bool UseInput00 = false; + bool UseInput01 = false; + for (int i = 0; i != 4; ++i) { + int M = TargetMask[i]; + if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { + // No change if element is already zero or the inserted element. + continue; + } else if (M < 0) { + // If the target mask is undef/zero then we must zero the element. + InsertPSMask |= (1u << i); + Updated = true; + continue; + } + + // The input vector element must be inline. + if (M != i && M != (i + 4)) + return SDValue(); + + // Determine which inputs of the target shuffle we're using. + UseInput00 |= (0 <= M && M < 4); + UseInput01 |= (4 <= M); + } + + // If we're not using both inputs of the target shuffle then use the + // referenced input directly. + if (UseInput00 && !UseInput01) { + Updated = true; + Op0 = Op0.getOperand(0); + } else if (!UseInput00 && UseInput01) { + Updated = true; + Op0 = Op0.getOperand(1); + } + + if (Updated) + return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, Op0, Op1, + DAG.getConstant(InsertPSMask, DL, MVT::i8)); + return SDValue(); } default: @@ -28134,6 +28270,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles + case X86ISD::INSERTPS: case X86ISD::PALIGNR: case X86ISD::BLENDI: case X86ISD::UNPCKH: diff --git a/test/CodeGen/X86/insertps-combine.ll b/test/CodeGen/X86/insertps-combine.ll index f2596b6347b..690707b6870 100644 --- a/test/CodeGen/X86/insertps-combine.ll +++ b/test/CodeGen/X86/insertps-combine.ll @@ -6,16 +6,12 @@ define <4 x float> @shuffle_v4f32_0z27(<4 x float> %x, <4 x float> %a) { ; SSE-LABEL: shuffle_v4f32_0z27: ; SSE: # BB#0: -; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3] -; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2] +; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[2] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4f32_0z27: ; AVX: # BB#0: -; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3] -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[2] ; AVX-NEXT: retq %vecext = extractelement <4 x float> %x, i32 0 %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 @@ -50,16 +46,12 @@ define <4 x float> @shuffle_v4f32_0zz4(<4 x float> %xyzw, <4 x float> %abcd) { define <4 x float> @shuffle_v4f32_0z24(<4 x float> %xyzw, <4 x float> %abcd) { ; SSE-LABEL: shuffle_v4f32_0z24: ; SSE: # BB#0: -; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3] -; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4f32_0z24: ; AVX: # BB#0: -; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3] -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0] ; AVX-NEXT: retq %vecext = extractelement <4 x float> %xyzw, i32 0 %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 diff --git a/test/CodeGen/X86/merge-consecutive-loads-128.ll b/test/CodeGen/X86/merge-consecutive-loads-128.ll index 2d6d3b6a0fb..5c6efe6eb2c 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -159,15 +159,13 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s ; ; SSE41-LABEL: merge_4f32_f32_012u: ; SSE41: # BB#0: -; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; SSE41-NEXT: retq ; ; AVX-LABEL: merge_4f32_f32_012u: ; AVX: # BB#0: -; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; AVX-NEXT: retq %ptr0 = getelementptr inbounds float, float* %ptr, i64 0 @@ -195,15 +193,13 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s ; ; SSE41-LABEL: merge_4f32_f32_019u: ; SSE41: # BB#0: -; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; SSE41-NEXT: retq ; ; AVX-LABEL: merge_4f32_f32_019u: ; AVX: # BB#0: -; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; AVX-NEXT: retq %ptr0 = getelementptr inbounds float, float* %ptr, i64 0 diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 53dbb32235a..9187f3513d6 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -1080,15 +1080,11 @@ define <4 x float> @shuffle_v4f32_0zz6(<4 x float> %a, <4 x float> %b) { ; SSE41-LABEL: shuffle_v4f32_0zz6: ; SSE41: # BB#0: ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2] -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4f32_0zz6: ; AVX: # BB#0: ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2] -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> @@ -1129,15 +1125,11 @@ define <4 x float> @shuffle_v4f32_0z24(<4 x float> %a, <4 x float> %b) { ; SSE41-LABEL: shuffle_v4f32_0z24: ; SSE41: # BB#0: ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0] -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4f32_0z24: ; AVX: # BB#0: ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0] -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> From 820d2fdf449ea421b5cd54261d190289206e3694 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 19 Jan 2016 23:04:56 +0000 Subject: [PATCH 0111/1132] [X86][SSE] Add VZEXT_MOVL target shuffle decoding. Add support for decoding VZEXT_MOVL target shuffle masks, allowing it to be used as a source in target shuffle combines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258215 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit df369b21b9919ed30c10b5243202400f2c322c41) --- lib/Target/X86/X86ISelLowering.cpp | 5 +++++ test/CodeGen/X86/insertps-combine.ll | 8 ++------ test/CodeGen/X86/sse41.ll | 8 ++------ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ebbb44695a0..1e036451a31 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3939,6 +3939,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::VPERMI: case X86ISD::VPERMV: case X86ISD::VPERMV3: + case X86ISD::VZEXT_MOVL: return true; } } @@ -4886,6 +4887,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, DecodePSHUFLWMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; + case X86ISD::VZEXT_MOVL: + DecodeZeroMoveLowMask(VT, Mask); + IsUnary = true; + break; case X86ISD::PSHUFB: { IsUnary = true; SDValue MaskNode = N->getOperand(1); diff --git a/test/CodeGen/X86/insertps-combine.ll b/test/CodeGen/X86/insertps-combine.ll index 690707b6870..78bae28762c 100644 --- a/test/CodeGen/X86/insertps-combine.ll +++ b/test/CodeGen/X86/insertps-combine.ll @@ -24,16 +24,12 @@ define <4 x float> @shuffle_v4f32_0z27(<4 x float> %x, <4 x float> %a) { define <4 x float> @shuffle_v4f32_0zz4(<4 x float> %xyzw, <4 x float> %abcd) { ; SSE-LABEL: shuffle_v4f32_0zz4: ; SSE: # BB#0: -; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4f32_0zz4: ; AVX: # BB#0: -; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] ; AVX-NEXT: retq %vecext = extractelement <4 x float> %xyzw, i32 0 %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index 0a83a9753b8..16e43f26b67 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -507,16 +507,12 @@ define <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) { define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) { ; X32-LABEL: shuf_X00A: ; X32: ## BB#0: -; X32-NEXT: xorps %xmm2, %xmm2 -; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: shuf_X00A: ; X64: ## BB#0: -; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] ; X64-NEXT: retq %vecext = extractelement <4 x float> %x, i32 0 %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 From cc981181e4ac775366cd41a44aa395bc5f494c98 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 29 Jan 2016 20:21:02 +0000 Subject: [PATCH 0112/1132] [InstCombine] avoid an insertelement transformation that induces the opposite extractelement fold (PR26354) We would infinite loop because we created a shufflevector that was wider than needed and then failed to combine that with the insertelement. When subsequently visiting the extractelement from that shuffle, we see that it's unnecessary, delete it, and trigger another visit to the insertelement. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259236 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit b17df8b4d7fcad4f1593e309e9a15d596977af20) --- .../InstCombine/InstCombineVectorOps.cpp | 18 ++++++++++- .../InstCombine/insert-extract-shuffle.ll | 30 +++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 5cde31a9162..bc4c0ebae79 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -380,6 +380,23 @@ static void replaceExtractElements(InsertElementInst *InsElt, ExtendMask.push_back(UndefValue::get(IntType)); Value *ExtVecOp = ExtElt->getVectorOperand(); + auto *ExtVecOpInst = dyn_cast(ExtVecOp); + BasicBlock *InsertionBlock = (ExtVecOpInst && !isa(ExtVecOpInst)) + ? ExtVecOpInst->getParent() + : ExtElt->getParent(); + + // TODO: This restriction matches the basic block check below when creating + // new extractelement instructions. If that limitation is removed, this one + // could also be removed. But for now, we just bail out to ensure that we + // will replace the extractelement instruction that is feeding our + // insertelement instruction. This allows the insertelement to then be + // replaced by a shufflevector. If the insertelement is not replaced, we can + // induce infinite looping because there's an optimization for extractelement + // that will delete our widening shuffle. This would trigger another attempt + // here to create that shuffle, and we spin forever. + if (InsertionBlock != InsElt->getParent()) + return; + auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ConstantVector::get(ExtendMask)); @@ -387,7 +404,6 @@ static void replaceExtractElements(InsertElementInst *InsElt, // (as long as it's not a PHI) or at the start of the basic block of the // extract, so any subsequent extracts in the same basic block can use it. // TODO: Insert before the earliest ExtractElementInst that is replaced. - auto *ExtVecOpInst = dyn_cast(ExtVecOp); if (ExtVecOpInst && !isa(ExtVecOpInst)) WideVec->insertAfter(ExtVecOpInst); else diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll index 47c2a139a47..8ed4db8bbbc 100644 --- a/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -175,3 +175,33 @@ bb3: ret <4 x double> %tmp4 } +; PR26354: https://llvm.org/bugs/show_bug.cgi?id=26354 +; Don't create a shufflevector if we know that we're not going to replace the insertelement. + +define double @pr26354(<2 x double>* %tmp, i1 %B) { +; CHECK-LABEL: @pr26354( +; CHECK: %ld = load <2 x double>, <2 x double>* %tmp +; CHECK-NEXT: %e1 = extractelement <2 x double> %ld, i32 0 +; CHECK-NEXT: br i1 %B, label %if, label %end +; CHECK: if: +; CHECK-NEXT: %e2 = extractelement <2 x double> %ld, i32 1 +; CHECK-NEXT: %i1 = insertelement <4 x double> +; CHECK-NEXT: br label %end + +entry: + %ld = load <2 x double>, <2 x double>* %tmp + %e1 = extractelement <2 x double> %ld, i32 0 + %e2 = extractelement <2 x double> %ld, i32 1 + br i1 %B, label %if, label %end + +if: + %i1 = insertelement <4 x double> zeroinitializer, double %e2, i32 3 + br label %end + +end: + %ph = phi <4 x double> [ undef, %entry ], [ %i1, %if ] + %e3 = extractelement <4 x double> %ph, i32 1 + %mu = fmul double %e1, %e3 + ret double %mu +} + From aa9b558fe8854e9d8178f2b2dd3006331b638853 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 23 Jan 2016 13:37:07 +0000 Subject: [PATCH 0113/1132] [X86][SSE] Remove INSERTPS dependencies from unreferenced operands. If the INSERTPS zeroes out all the referenced elements from either of the 2 input vectors (and the input is not already UNDEF), then set that input to UNDEF to reduce dependencies. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258622 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 36a34a0671033d29eaf1d7593976903a3a9c612d) --- lib/Target/X86/X86ISelLowering.cpp | 16 +++++++++++--- test/CodeGen/X86/insertps-combine.ll | 32 ++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1e036451a31..7c0b277e51b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -23873,9 +23873,19 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, SDValue Op1 = N.getOperand(1); SDValue Op2 = N.getOperand(2); unsigned InsertPSMask = cast(Op2)->getZExtValue(); - unsigned DstIdx = (InsertPSMask >> 4) & 3; + unsigned DstIdx = (InsertPSMask >> 4) & 0x3; + unsigned ZeroMask = InsertPSMask & 0xF; + + // If we zero out all elements from Op0 then we don't need to reference it. + if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef()) + return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1, + DAG.getConstant(InsertPSMask, DL, MVT::i8)); + + // If we zero out the element from Op1 then we don't need to reference it. + if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef()) + return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), + DAG.getConstant(InsertPSMask, DL, MVT::i8)); - // Attempt to merge insertps with an inner target shuffle node. SmallVector TargetMask; if (!setTargetShuffleZeroElements(Op0, TargetMask)) return SDValue(); @@ -23915,7 +23925,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, } if (Updated) - return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, Op0, Op1, + return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, DAG.getConstant(InsertPSMask, DL, MVT::i8)); return SDValue(); diff --git a/test/CodeGen/X86/insertps-combine.ll b/test/CodeGen/X86/insertps-combine.ll index 78bae28762c..54b5fe444ba 100644 --- a/test/CodeGen/X86/insertps-combine.ll +++ b/test/CodeGen/X86/insertps-combine.ll @@ -98,6 +98,38 @@ define <4 x float> @shuffle_v4f32_0z6z(<4 x float> %A, <4 x float> %B) { ret <4 x float> %vecinit4 } +define <4 x float> @insertps_undef_input0(<4 x float> %a0, <4 x float> %a1) { +; SSE-LABEL: insertps_undef_input0: +; SSE: # BB#0: +; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,zero +; SSE-NEXT: retq +; +; AVX-LABEL: insertps_undef_input0: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],zero,zero +; AVX-NEXT: retq + %res0 = fadd <4 x float> %a0, + %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %res0, <4 x float> %a1, i8 21) + %res2 = shufflevector <4 x float> %res1, <4 x float> zeroinitializer, <4 x i32> + ret <4 x float> %res2 +} + +define <4 x float> @insertps_undef_input1(<4 x float> %a0, <4 x float> %a1) { +; SSE-LABEL: insertps_undef_input1: +; SSE: # BB#0: +; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3] +; SSE-NEXT: retq +; +; AVX-LABEL: insertps_undef_input1: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3] +; AVX-NEXT: retq + %res0 = fadd <4 x float> %a1, + %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %res0, i8 21) + %res2 = shufflevector <4 x float> %res1, <4 x float> zeroinitializer, <4 x i32> + ret <4 x float> %res2 +} + define float @extract_zero_insertps_z0z7(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: extract_zero_insertps_z0z7: ; SSE: # BB#0: From 24ad0984e1117e07c7b9f853d716ed6d391497f5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 1 Feb 2016 08:59:30 +0000 Subject: [PATCH 0114/1132] [X86][SSE] Find source of the inserted element of INSERTPS Minor patch to trace back through target shuffles to the source of the inserted element in a (V)INSERTPS shuffle. Differential Revision: http://reviews.llvm.org/D16652 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259343 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit dbf62e3ecd8895b965e31c42689ca8f467e47e50) --- lib/Target/X86/X86ISelLowering.cpp | 33 ++++++++++++++++++++++++++---- test/CodeGen/X86/sse41.ll | 19 ++++------------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7c0b277e51b..c771529d379 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4316,6 +4316,11 @@ static bool isUndefOrEqual(int Val, int CmpVal) { return (Val < 0 || Val == CmpVal); } +/// Val is either the undef or zero sentinel value. +static bool isUndefOrZero(int Val) { + return (Val == SM_SentinelUndef || Val == SM_SentinelZero); +} + /// Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size, falls within the specified /// sequential range (Low, Low+Size]. or is undef. @@ -23873,6 +23878,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, SDValue Op1 = N.getOperand(1); SDValue Op2 = N.getOperand(2); unsigned InsertPSMask = cast(Op2)->getZExtValue(); + unsigned SrcIdx = (InsertPSMask >> 6) & 0x3; unsigned DstIdx = (InsertPSMask >> 4) & 0x3; unsigned ZeroMask = InsertPSMask & 0xF; @@ -23886,19 +23892,38 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), DAG.getConstant(InsertPSMask, DL, MVT::i8)); - SmallVector TargetMask; - if (!setTargetShuffleZeroElements(Op0, TargetMask)) + // Attempt to merge insertps Op1 with an inner target shuffle node. + SmallVector TargetMask1; + if (setTargetShuffleZeroElements(Op1, TargetMask1)) { + int M = TargetMask1[SrcIdx]; + if (isUndefOrZero(M)) { + // Zero/UNDEF insertion - zero out element and remove dependency. + InsertPSMask |= (1u << DstIdx); + return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), + DAG.getConstant(InsertPSMask, DL, MVT::i8)); + } + // Update insertps mask srcidx and reference the source input directly. + assert(0 <= M && M < 8 && "Shuffle index out of range"); + InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); + Op1 = Op1.getOperand(M < 4 ? 0 : 1); + return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, + DAG.getConstant(InsertPSMask, DL, MVT::i8)); + } + + // Attempt to merge insertps Op0 with an inner target shuffle node. + SmallVector TargetMask0; + if (!setTargetShuffleZeroElements(Op0, TargetMask0)) return SDValue(); bool Updated = false; bool UseInput00 = false; bool UseInput01 = false; for (int i = 0; i != 4; ++i) { - int M = TargetMask[i]; + int M = TargetMask0[i]; if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { // No change if element is already zero or the inserted element. continue; - } else if (M < 0) { + } else if (isUndefOrZero(M)) { // If the target mask is undef/zero then we must zero the element. InsertPSMask |= (1u << i); Updated = true; diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index 16e43f26b67..466dbdd67a6 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -846,16 +846,12 @@ define <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocap ; X32: ## BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0] -; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X32-NEXT: retl ; ; X64-LABEL: insertps_from_broadcast_loadf32: ; X64: ## BB#0: -; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0] -; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X64-NEXT: retq %1 = getelementptr inbounds float, float* %fb, i64 %index %2 = load float, float* %1, align 4 @@ -871,16 +867,12 @@ define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float ; X32-LABEL: insertps_from_broadcast_loadv4f32: ; X32: ## BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movups (%eax), %xmm1 -; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0] -; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X32-NEXT: retl ; ; X64-LABEL: insertps_from_broadcast_loadv4f32: ; X64: ## BB#0: -; X64-NEXT: movups (%rdi), %xmm1 -; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0] -; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; X64-NEXT: retq %1 = load <4 x float>, <4 x float>* %b, align 4 %2 = extractelement <4 x float> %1, i32 0 @@ -892,14 +884,12 @@ define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float ret <4 x float> %7 } -;; FIXME: We're emitting an extraneous pshufd/vbroadcast. define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* nocapture readonly %fb, i64 %index) { ; X32-LABEL: insertps_from_broadcast_multiple_use: ; X32: ## BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero -; X32-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0,0,0] ; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0] ; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0] ; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[0] @@ -912,7 +902,6 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl ; X64-LABEL: insertps_from_broadcast_multiple_use: ; X64: ## BB#0: ; X64-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero -; X64-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0,0,0] ; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0] ; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0] ; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[0] From d7315fa4971479b1d97d07bec6d225fc35dfb821 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Mon, 1 Feb 2016 17:37:56 +0000 Subject: [PATCH 0115/1132] [InstCombine] Don't transform (X+INT_MAX)>=(Y+INT_MAX) -> (X<=Y) This miscompile came about because we tried to use a transform which was only appropriate for xor operators when addition was present. This fixes PR26407. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259375 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 2291a38a78381e92fbe8e6b93c980eba45c845f7) --- lib/Transforms/InstCombine/InstCombineCompares.cpp | 2 +- test/Transforms/InstCombine/icmp.ll | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index f322e4ed7aa..051fd1084f7 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3877,7 +3877,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BO1->getOperand(0)); } - if (CI->isMaxValue(true)) { + if (BO0->getOpcode() == Instruction::Xor && CI->isMaxValue(true)) { ICmpInst::Predicate Pred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll index 7d6ec96b532..1e64cd7f582 100644 --- a/test/Transforms/InstCombine/icmp.ll +++ b/test/Transforms/InstCombine/icmp.ll @@ -1672,3 +1672,15 @@ define i1 @cmp_slt_rhs_inc(float %x, i32 %i) { %cmp = icmp slt i32 %conv, %inc ret i1 %cmp } + +; CHECK-LABEL: @PR26407 +; CHECK-NEXT: %[[addx:.*]] = add i32 %x, 2147483647 +; CHECK-NEXT: %[[addy:.*]] = add i32 %y, 2147483647 +; CHECK-NEXT: %[[cmp:.*]] = icmp uge i32 %[[addx]], %[[addy]] +; CHECK-NEXT: ret i1 %[[cmp]] +define i1 @PR26407(i32 %x, i32 %y) { + %addx = add i32 %x, 2147483647 + %addy = add i32 %y, 2147483647 + %cmp = icmp uge i32 %addx, %addy + ret i1 %cmp +} From 99b712205fbf6a4010f98350170bda43b6b0ed08 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 20 Jan 2016 18:57:48 +0000 Subject: [PATCH 0116/1132] Accept subtractions involving a weak symbol. When a symbol S shows up in an expression in assembly there are two possible interpretations * The expression is referring to the value of S in this file. * The expression is referring to the value after symbol resolution. In the first case the assembler can reason about the value and try to produce a relocation. In the second case, that is only possible if the symbol cannot be preempted. Assemblers are not very consistent about which interpretation gets used. This changes MC to agree with GAS in the case of an expression of the form "Sym - WeakSym". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258329 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/ELFObjectWriter.cpp | 6 ------ test/MC/AArch64/error-location.s | 3 --- test/MC/ARM/error-location.s | 3 --- test/MC/ELF/relocation.s | 6 ++++++ 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 7e3ddda1815..8183e8cbdf3 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -655,12 +655,6 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, return; } - if (::isWeak(SymB)) { - Ctx.reportError(Fixup.getLoc(), - "Cannot represent a subtraction with a weak symbol"); - return; - } - uint64_t SymBOffset = Layout.getSymbolOffset(SymB); uint64_t K = SymBOffset - FixupOffset; IsPCRel = true; diff --git a/test/MC/AArch64/error-location.s b/test/MC/AArch64/error-location.s index 02504368f00..c629e0a50de 100644 --- a/test/MC/AArch64/error-location.s +++ b/test/MC/AArch64/error-location.s @@ -16,9 +16,6 @@ // CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a difference across sections .word x_a - y_a -// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a subtraction with a weak symbol - .word a - w - // CHECK: :0: error: expression could not be evaluated .set v1, -undef diff --git a/test/MC/ARM/error-location.s b/test/MC/ARM/error-location.s index 112acf318ed..2f70f294b57 100644 --- a/test/MC/ARM/error-location.s +++ b/test/MC/ARM/error-location.s @@ -16,9 +16,6 @@ @ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a difference across sections .word x_a - y_a -@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a subtraction with a weak symbol - .word a - w - @ CHECK: :0: error: expression could not be evaluated .set v1, -undef diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s index 0fec7679281..e0313904563 100644 --- a/test/MC/ELF/relocation.s +++ b/test/MC/ELF/relocation.s @@ -63,6 +63,11 @@ pr24486: .code16 call pr23771 + .weak weak_sym +weak_sym: + .long pr23272-weak_sym + + // CHECK: Section { // CHECK: Name: .rela.text // CHECK: Relocations [ @@ -101,5 +106,6 @@ pr24486: // CHECK-NEXT: 0xDC R_X86_64_PLT32 foo 0x0 // CHECK-NEXT: 0xF0 R_X86_64_32 .text 0xF0 // CHECK-NEXT: 0xF5 R_X86_64_PC16 pr23771 0xFFFFFFFFFFFFFFFE +// CHECK-NEXT: 0xF7 R_X86_64_PC32 pr23272 0x0 // CHECK-NEXT: ] // CHECK-NEXT: } From 9cf0f969bd3063f8a136f2fe5986f7017d5aacfa Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 1 Feb 2016 20:36:49 +0000 Subject: [PATCH 0117/1132] Fix infinite recursion in MCAsmStreamer::EmitValueImpl. If a target can only emit 8-bits data, we would loop in EmitValueImpl since it will try to split a 32-bits data in 1 chunk of 32-bits. No test since all current targets can emit 32bits at a time. Patch by Alexandru Guduleasa! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259399 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit 74894f721ad0b8cbdb6692bbf490f964423c86dd) --- lib/MC/MCAsmStreamer.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 98a152f18cf..b7daebc27c7 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -709,17 +709,15 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, report_fatal_error("Don't know how to emit this value."); // We couldn't handle the requested integer size so we fallback by breaking - // the request down into several, smaller, integers. Since sizes greater - // than eight are invalid and size equivalent to eight should have been - // handled earlier, we use four bytes as our largest piece of granularity. + // the request down into several, smaller, integers. + // Since sizes greater or equal to "Size" are invalid, we use the greatest + // power of 2 that is less than "Size" as our largest piece of granularity. bool IsLittleEndian = MAI->isLittleEndian(); for (unsigned Emitted = 0; Emitted != Size;) { unsigned Remaining = Size - Emitted; // The size of our partial emission must be a power of two less than - // eight. - unsigned EmissionSize = PowerOf2Floor(Remaining); - if (EmissionSize > 4) - EmissionSize = 4; + // Size. + unsigned EmissionSize = PowerOf2Floor(std::min(Remaining, Size - 1)); // Calculate the byte offset of our partial emission taking into account // the endianness of the target. unsigned ByteOffset = From ff9546e8353f61319ab069306745307840e7019f Mon Sep 17 00:00:00 2001 From: Jun Bum Lim Date: Mon, 1 Feb 2016 20:55:11 +0000 Subject: [PATCH 0118/1132] Avoid inlining call sites in unreachable-terminated block Summary: If the normal destination of the invoke or the parent block of the call site is unreachable-terminated, there is little point in inlining the call site unless there is literally zero cost. Unlike my previous change (D15289), this change specifically handle the call sites followed by unreachable in the same basic block for call or in the normal destination for the invoke. This change could be a reasonable first step to conservatively inline call sites leading to an unreachable-terminated block while BFI / BPI is not yet available in inliner. Reviewers: manmanren, majnemer, hfinkel, davidxl, mcrosier, dblaikie, eraman Subscribers: dblaikie, davidxl, mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D16616 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259403 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit da7cbcd8d59c107376718d88f2e47f7b2344b49a) --- lib/Analysis/InlineCost.cpp | 23 +++- test/Transforms/Inline/inline_unreachable.ll | 130 +++++++++++++++++++ test/Transforms/JumpThreading/pr26096.ll | 11 +- 3 files changed, 155 insertions(+), 9 deletions(-) create mode 100644 test/Transforms/Inline/inline_unreachable.ll diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 2b9e06e4210..8852da6ec92 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -1215,15 +1215,26 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (OnlyOneCallAndLocalLinkage) Cost += InlineConstants::LastCallToStaticBonus; - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this unless there is literally zero - // cost. + // If the normal destination of the invoke or the parent block of the call + // site is unreachable-terminated, there is little point in inlining this + // unless there is literally zero cost. + // FIXME: Note that it is possible that an unreachable-terminated block has a + // hot entry. For example, in below scenario inlining hot_call_X() may be + // beneficial : + // main() { + // hot_call_1(); + // ... + // hot_call_N() + // exit(0); + // } + // For now, we are not handling this corner case here as it is rare in real + // code. In future, we should elaborate this based on BPI and BFI in more + // general threshold adjusting heuristics in updateThreshold(). Instruction *Instr = CS.getInstruction(); if (InvokeInst *II = dyn_cast(Instr)) { - if (isa(II->getNormalDest()->begin())) + if (isa(II->getNormalDest()->getTerminator())) Threshold = 0; - } else if (isa(++BasicBlock::iterator(Instr))) + } else if (isa(Instr->getParent()->getTerminator())) Threshold = 0; // If this function uses the coldcc calling convention, prefer not to inline diff --git a/test/Transforms/Inline/inline_unreachable.ll b/test/Transforms/Inline/inline_unreachable.ll new file mode 100644 index 00000000000..dbf0119113a --- /dev/null +++ b/test/Transforms/Inline/inline_unreachable.ll @@ -0,0 +1,130 @@ +; RUN: opt < %s -inline -S | FileCheck %s + +@a = global i32 4 +@_ZTIi = external global i8* + +; CHECK-LABEL: callSimpleFunction +; CHECK: call i32 @simpleFunction +define i32 @callSimpleFunction(i32 %idx, i32 %limit) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: + %s = call i32 @simpleFunction(i32 %idx) + store i32 %s, i32* @a + unreachable + +if.end: + ret i32 %idx +} + +; CHECK-LABEL: callSmallFunction +; CHECK-NOT: call i32 @smallFunction +define i32 @callSmallFunction(i32 %idx, i32 %limit) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: + %s = call i32 @smallFunction(i32 %idx) + store i32 %s, i32* @a + unreachable + +if.end: + ret i32 %idx +} + +; CHECK-LABEL: throwSimpleException +; CHECK: invoke i32 @simpleFunction +define i32 @throwSimpleException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %exception = call i8* @__cxa_allocate_exception(i64 1) #0 + invoke i32 @simpleFunction(i32 %idx) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %if.then + call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 + unreachable + +lpad: ; preds = %if.then + %ll = landingpad { i8*, i32 } + cleanup + ret i32 %idx + +if.end: ; preds = %entry + ret i32 %idx +} + +; CHECK-LABEL: throwSmallException +; CHECK-NOT: invoke i32 @smallFunction +define i32 @throwSmallException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %exception = call i8* @__cxa_allocate_exception(i64 1) #0 + invoke i32 @smallFunction(i32 %idx) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %if.then + call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 + unreachable + +lpad: ; preds = %if.then + %ll = landingpad { i8*, i32 } + cleanup + ret i32 %idx + +if.end: ; preds = %entry + ret i32 %idx +} + +define i32 @simpleFunction(i32 %a) #0 { +entry: + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +define i32 @smallFunction(i32 %a) { +entry: + %r = load volatile i32, i32* @a + ret i32 %r +} + +attributes #0 = { nounwind } +attributes #1 = { noreturn } + +declare i8* @__cxa_allocate_exception(i64) +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_throw(i8*, i8*, i8*) + diff --git a/test/Transforms/JumpThreading/pr26096.ll b/test/Transforms/JumpThreading/pr26096.ll index 2671e82b617..096d43e24d2 100644 --- a/test/Transforms/JumpThreading/pr26096.ll +++ b/test/Transforms/JumpThreading/pr26096.ll @@ -10,19 +10,24 @@ entry: br i1 %B, label %if.end, label %if.then if.then: ; preds = %entry - call void @fn2() + call void @fn2(i1 %B) ret void if.end: ; preds = %entry - call void @fn2() + call void @fn2(i1 %B) ret void } -define internal void @fn2() unnamed_addr { +define internal void @fn2(i1 %B) unnamed_addr { entry: call void @fn1() call void @fn1() call void @fn1() + br i1 %B, label %if.end, label %if.then +if.then: + unreachable + +if.end: unreachable } From 0ff0ed1f5eac5049074cbee39c517816b9053637 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 2 Feb 2016 00:45:30 +0000 Subject: [PATCH 0119/1132] [LVI] Fix a latent bug in getValueAt This routine was returning Undefined for most queries. This was utterly wrong. Amusingly, we do not appear to have any callers of this which are actually trying to exploit unreachable code or this would have broken the world. A better approach would be to explicit describe the intersection of facts. That's blocked behind http://reviews.llvm.org/D14476 and I wanted to fix the current bug. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259446 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit b4e775c587ff94d5924a464ab8e47470db1a3c05) --- lib/Analysis/LazyValueInfo.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 0d1d34e0cb4..c2bfd96d0da 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -1079,6 +1079,14 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) { Result = getFromRangeMetadata(I); mergeAssumeBlockValueConstantRange(V, Result, CxtI); + // Note: What's actually happening here is that we're starting at overdefined + // and then intersecting two different types of facts. The code is not + // structured that way (FIXME), and we need to take particular care to not + // let the undefined state escape since we have *not* proven the particular + // value to be unreachable at the context instruction. + if (Result.isUndefined()) + Result.markOverdefined(); + DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } From ba5db3d055f53534e8a20b2dea8b02f2c6fbc286 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Tue, 2 Feb 2016 01:32:50 +0000 Subject: [PATCH 0120/1132] [X86] Don't force Nearest-Even rounding for VCVTPS2PH, use MXCSR. Officially, we don't acknowledge non-default configurations of MXCSR, as getting there would require usage of the FENV_ACCESS pragma (at least insofar as rounding mode is concerned). We don't support the pragma, so we can assume that the default rounding mode - round to nearest, ties to even - is always used. However, it's inconsistent with the rest of the instruction set, where MXCSR is always effective (unless otherwise specified). Also, it's an unnecessary obstacle to the few brave souls that use fenv.h with LLVM. Avoid the hard-coded rounding mode for fp_to_f16; use MXCSR instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259448 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit b080ff471d8ad7405050c8643abf0cbda5171c93) rdar://24376466 --- lib/Target/X86/X86InstrSSE.td | 9 +++++++-- test/CodeGen/X86/fastmath-float-half-conversion.ll | 4 ++-- test/CodeGen/X86/half.ll | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 6a7c45665e9..b385a7d1102 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8276,9 +8276,14 @@ let Predicates = [HasF16C] in { // Patterns for matching conversions from float to half-float and vice versa. let Predicates = [HasF16C] in { + // Use MXCSR.RC for rounding instead of explicitly specifying the default + // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the + // configurations we support (the default). However, falling back to MXCSR is + // more consistent with other instructions, which are always controlled by it. + // It's encoded as 0b100. def : Pat<(fp_to_f16 FR32:$src), (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr - (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>; + (COPY_TO_REGCLASS FR32:$src, VR128), 4)), sub_16bit))>; def : Pat<(f16_to_fp GR16:$src), (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr @@ -8286,7 +8291,7 @@ let Predicates = [HasF16C] in { def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))), (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr - (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >; + (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 4)), FR32)) >; } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/fastmath-float-half-conversion.ll b/test/CodeGen/X86/fastmath-float-half-conversion.ll index 29308735cca..637fcc21595 100644 --- a/test/CodeGen/X86/fastmath-float-half-conversion.ll +++ b/test/CodeGen/X86/fastmath-float-half-conversion.ll @@ -5,7 +5,7 @@ define zeroext i16 @test1_fast(double %d) #0 { ; ALL-LABEL: test1_fast: ; F16C-NOT: callq {{_+}}truncdfhf2 ; F16C: vcvtsd2ss %xmm0, %xmm0, %xmm0 -; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0 +; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX: callq {{_+}}truncdfhf2 ; ALL: ret entry: @@ -19,7 +19,7 @@ define zeroext i16 @test2_fast(x86_fp80 %d) #0 { ; F16C: fldt ; F16C-NEXT: fstps ; F16C-NEXT: vmovss -; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0 +; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX: callq {{_+}}truncxfhf2 ; ALL: ret entry: diff --git a/test/CodeGen/X86/half.ll b/test/CodeGen/X86/half.ll index 3b2518e28f5..531891f9cae 100644 --- a/test/CodeGen/X86/half.ll +++ b/test/CodeGen/X86/half.ll @@ -102,7 +102,7 @@ define void @test_sitofp_i64(i64 %a, half* %p) #0 { ; CHECK_LIBCALL-NEXT: retq ; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]] -; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]] +; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]] ; CHECK-F16C-NEXT: vmovd [[REG0]], %eax ; CHECK-F16C-NEXT: movw %ax, (%rsi) ; CHECK-F16C-NEXT: retq @@ -175,7 +175,7 @@ define void @test_uitofp_i64(i64 %a, half* %p) #0 { ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee ; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) ; CHECK-LIBCALL-NEXT: popq [[ADDR]] -; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]] ; CHECK-F16C-NEXT: vmovd [[REG4]], %eax ; CHECK-F16C-NEXT: movw %ax, (%rsi) ; CHECK-NEXT: retq From 8c24dfa3b62ba6ac1dcd983f02019a2232ddbc25 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Tue, 2 Feb 2016 01:44:03 +0000 Subject: [PATCH 0121/1132] [X86][FastISel] Don't force Nearest-Even rounding for VCVTPS2PH, use MXCSR. FastISel counterpart to r259448. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259449 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit ddad6c7a5df99ba9e49ec43b433b2865a57d53e9) rdar://24376466 --- lib/Target/X86/X86FastISel.cpp | 6 ++++-- test/CodeGen/X86/fast-isel-float-half-convertion.ll | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 01e33caeef7..39e5e715a55 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2320,8 +2320,10 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // register class VR128 by method 'constrainOperandRegClass' which is // directly called by 'fastEmitInst_ri'. // Instruction VCVTPS2PHrr takes an extra immediate operand which is - // used to provide rounding control. - InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0); + // used to provide rounding control: use MXCSR.RC, encoded as 0b100. + // It's consistent with the other FP instructions, which are usually + // controlled by MXCSR. + InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4); // Move the lower 32-bits of ResultReg to another register of class GR32. ResultReg = createResultReg(&X86::GR32RegClass); diff --git a/test/CodeGen/X86/fast-isel-float-half-convertion.ll b/test/CodeGen/X86/fast-isel-float-half-convertion.ll index 707a325bf41..acb85fd171f 100644 --- a/test/CodeGen/X86/fast-isel-float-half-convertion.ll +++ b/test/CodeGen/X86/fast-isel-float-half-convertion.ll @@ -4,7 +4,7 @@ define i16 @test_fp32_to_fp16(float %a) { ; CHECK-LABEL: test_fp32_to_fp16: -; CHECK: vcvtps2ph $0, %xmm0, %xmm0 +; CHECK: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NEXT: vmovd %xmm0, %eax ; CHECK-NEXT: retq entry: From 02f4d6d864778a279f1b378d9fd5a808d3fe6947 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Tue, 2 Feb 2016 06:41:55 +0000 Subject: [PATCH 0122/1132] [RegisterCoalescer] Better DebugLoc for reMaterializeTrivialDef When rematerializing a computation by replacing the copy, use the copy's location. The location of the copy is more representative of the original program. This partially fixes PR10003. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259469 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit a101dc9a9c3dd6174cb22595dd9964bbcadb5d90) --- lib/CodeGen/RegisterCoalescer.cpp | 2 ++ test/CodeGen/X86/loc-remat.ll | 56 +++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 test/CodeGen/X86/loc-remat.ll diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index c1ff13ec7ca..95c772357c3 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -939,11 +939,13 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } } + DebugLoc DL = CopyMI->getDebugLoc(); MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = std::next(MachineBasicBlock::iterator(CopyMI)); TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); MachineInstr *NewMI = std::prev(MII); + NewMI->setDebugLoc(DL); // In a situation like the following: // %vreg0:subreg = instr ; DefMI, subreg = DstIdx diff --git a/test/CodeGen/X86/loc-remat.ll b/test/CodeGen/X86/loc-remat.ll new file mode 100644 index 00000000000..1536546146a --- /dev/null +++ b/test/CodeGen/X86/loc-remat.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = common global i32 0, align 4 + +define i32 @main() !dbg !4 { +entry: + %0 = load volatile i32, i32* @x, align 4, !dbg !9, !tbaa !10 + %add = add nsw i32 %0, 24, !dbg !9 + store volatile i32 %add, i32* @x, align 4, !dbg !9, !tbaa !10 + %1 = load volatile i32, i32* @x, align 4, !dbg !14, !tbaa !10 + %add1 = add nsw i32 %1, 2, !dbg !14 + store volatile i32 %add1, i32* @x, align 4, !dbg !14, !tbaa !10 + %2 = load volatile i32, i32* @x, align 4, !dbg !15, !tbaa !10 + %add2 = add nsw i32 %2, 3, !dbg !15 + store volatile i32 %add2, i32* @x, align 4, !dbg !15, !tbaa !10 + %3 = load volatile i32, i32* @x, align 4, !dbg !16, !tbaa !10 + %add3 = add nsw i32 %3, 4, !dbg !16 + store volatile i32 %add3, i32* @x, align 4, !dbg !16, !tbaa !10 + tail call void @exit(i32 24), !dbg !17 + unreachable, !dbg !17 +} + +; CHECK-LABEL: main: +; CHECK: .loc 1 3 +; CHECK: .loc 1 4 +; CHECK: .loc 1 5 +; CHECK: .loc 1 6 +; CHECK: .loc 1 7 +; CHECK: .loc 1 8 +; CHECK-NEXT: movl $24, %edi +; CHECK-NEXT: callq exit + +declare void @exit(i32) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!6, !7} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 259383) (llvm/trunk 259385)", isOptimized: true, runtimeVersion: 0, emissionKind: 2, enums: !2, subprograms: !3) +!1 = !DIFile(filename: "t.c", directory: "/home/majnemer/llvm/src") +!2 = !{} +!3 = !{!4} +!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, variables: !2) +!5 = !DISubroutineType(types: !2) +!6 = !{i32 2, !"Dwarf Version", i32 4} +!7 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !DILocation(line: 4, column: 5, scope: !4) +!10 = !{!11, !11, i64 0} +!11 = !{!"int", !12, i64 0} +!12 = !{!"omnipotent char", !13, i64 0} +!13 = !{!"Simple C/C++ TBAA"} +!14 = !DILocation(line: 5, column: 5, scope: !4) +!15 = !DILocation(line: 6, column: 5, scope: !4) +!16 = !DILocation(line: 7, column: 5, scope: !4) +!17 = !DILocation(line: 8, column: 3, scope: !4) From dfe275e46550b4a5e532e80bb79b08c5bfb33faf Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Tue, 2 Feb 2016 20:11:17 +0000 Subject: [PATCH 0123/1132] [X86] Fix the merging of SP updates in prologue/epilogue insertions. When the merging was involving LEAs, we were taking the wrong immediate from the list of operands. rdar://problem/24446069 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259553 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 9 +++++-- test/CodeGen/X86/merge-sp-update-lea.ll | 32 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/merge-sp-update-lea.ll diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 2a587375afd..a7044d29dba 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -376,12 +376,17 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, int Offset = 0; if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || - Opc == X86::LEA32r || Opc == X86::LEA64_32r) && + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && PI->getOperand(0).getReg() == StackPtr){ Offset += PI->getOperand(2).getImm(); MBB.erase(PI); if (!doMergeWithPrevious) MBBI = NI; + } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && + PI->getOperand(0).getReg() == StackPtr) { + // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. + Offset += PI->getOperand(4).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && PI->getOperand(0).getReg() == StackPtr) { diff --git a/test/CodeGen/X86/merge-sp-update-lea.ll b/test/CodeGen/X86/merge-sp-update-lea.ll new file mode 100644 index 00000000000..cd2be44e1c0 --- /dev/null +++ b/test/CodeGen/X86/merge-sp-update-lea.ll @@ -0,0 +1,32 @@ +; RUN: llc %s -o - | FileCheck %s +target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" +target triple = "i386-apple-macosx" + +; Check that the merging of SP updates, when LEAs are involved, happen +; correctly. +; CHECK-LABEL: useLEA: +; CHECK: calll L_realloc +; Make sure that the offset we get here is 8 + 16. +; We used to have 8 + 1 because we were not reading the right immediate form +; the LEA instruction. +; CHECK-NEXT: leal 24(%esp), %esp +define noalias i8* @useLEA(i8* nocapture %p, i32 %nbytes) #0 { +entry: + %cmp = icmp slt i32 %nbytes, 0 + br i1 %cmp, label %cond.end.3, label %cond.false + +cond.false: ; preds = %entry + %tobool = icmp ne i32 %nbytes, 0 + %cond = select i1 %tobool, i32 %nbytes, i32 1 + %call = tail call i8* @realloc(i8* %p, i32 %cond) + br label %cond.end.3 + +cond.end.3: ; preds = %entry, %cond.false + %cond4 = phi i8* [ %call, %cond.false ], [ null, %entry ] + ret i8* %cond4 +} + +; Function Attrs: nounwind optsize +declare noalias i8* @realloc(i8* nocapture, i32) + +attributes #0 = { nounwind optsize ssp "disable-tail-calls"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-features"="+lea-sp" } From 3e4d1e52d0a7263eaa5b63e52eb3bbba3ce17c70 Mon Sep 17 00:00:00 2001 From: Anna Zaks Date: Tue, 2 Feb 2016 01:03:11 +0000 Subject: [PATCH 0124/1132] [safestack] Make sure the unsafe stack pointer is popped in all cases The unsafe stack pointer is only popped in moveStaticAllocasToUnsafeStack so it won't happen if there are no static allocas. Fixes https://llvm.org/bugs/show_bug.cgi?id=26122 Differential Revision: http://reviews.llvm.org/D16339 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259447 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Instrumentation/SafeStack.cpp | 53 ++++++++++---------- test/Transforms/SafeStack/ARM/setjmp.ll | 4 +- test/Transforms/SafeStack/dynamic-alloca.ll | 3 +- test/Transforms/SafeStack/setjmp2.ll | 5 +- 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/lib/Transforms/Instrumentation/SafeStack.cpp b/lib/Transforms/Instrumentation/SafeStack.cpp index ee8d4fad752..6ed1746155c 100644 --- a/lib/Transforms/Instrumentation/SafeStack.cpp +++ b/lib/Transforms/Instrumentation/SafeStack.cpp @@ -144,7 +144,8 @@ class SafeStack : public FunctionPass { Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F, ArrayRef StaticAllocas, ArrayRef ByValArguments, - ArrayRef Returns); + ArrayRef Returns, + Instruction *BasePointer); /// \brief Generate code to restore the stack after all stack restore points /// in \p StackRestorePoints. @@ -431,6 +432,8 @@ AllocaInst * SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F, ArrayRef StackRestorePoints, Value *StaticTop, bool NeedDynamicTop) { + assert(StaticTop && "The stack top isn't set."); + if (StackRestorePoints.empty()) return nullptr; @@ -441,19 +444,13 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F, // runtime itself. AllocaInst *DynamicTop = nullptr; - if (NeedDynamicTop) + if (NeedDynamicTop) { // If we also have dynamic alloca's, the stack pointer value changes // throughout the function. For now we store it in an alloca. DynamicTop = IRB.CreateAlloca(StackPtrTy, /*ArraySize=*/nullptr, "unsafe_stack_dynamic_ptr"); - - if (!StaticTop) - // We need the original unsafe stack pointer value, even if there are - // no unsafe static allocas. - StaticTop = IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr"); - - if (NeedDynamicTop) IRB.CreateStore(StaticTop, DynamicTop); + } // Restore current stack pointer after longjmp/exception catch. for (Instruction *I : StackRestorePoints) { @@ -467,29 +464,18 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F, return DynamicTop; } +/// We explicitly compute and set the unsafe stack layout for all unsafe +/// static alloca instructions. We save the unsafe "base pointer" in the +/// prologue into a local variable and restore it in the epilogue. Value *SafeStack::moveStaticAllocasToUnsafeStack( IRBuilder<> &IRB, Function &F, ArrayRef StaticAllocas, - ArrayRef ByValArguments, ArrayRef Returns) { + ArrayRef ByValArguments, ArrayRef Returns, + Instruction *BasePointer) { if (StaticAllocas.empty() && ByValArguments.empty()) - return nullptr; + return BasePointer; DIBuilder DIB(*F.getParent()); - // We explicitly compute and set the unsafe stack layout for all unsafe - // static alloca instructions. We save the unsafe "base pointer" in the - // prologue into a local variable and restore it in the epilogue. - - // Load the current stack pointer (we'll also use it as a base pointer). - // FIXME: use a dedicated register for it ? - Instruction *BasePointer = - IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr"); - assert(BasePointer->getType() == StackPtrTy); - - for (ReturnInst *RI : Returns) { - IRB.SetInsertPoint(RI); - IRB.CreateStore(BasePointer, UnsafeStackPtr); - } - // Compute maximum alignment among static objects on the unsafe stack. unsigned MaxAlignment = 0; for (Argument *Arg : ByValArguments) { @@ -726,9 +712,16 @@ bool SafeStack::runOnFunction(Function &F) { IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F); + // Load the current stack pointer (we'll also use it as a base pointer). + // FIXME: use a dedicated register for it ? + Instruction *BasePointer = + IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr"); + assert(BasePointer->getType() == StackPtrTy); + // The top of the unsafe stack after all unsafe static allocas are allocated. Value *StaticTop = moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas, - ByValArguments, Returns); + ByValArguments, Returns, + BasePointer); // Safe stack object that stores the current unsafe stack top. It is updated // as unsafe dynamic (non-constant-sized) allocas are allocated and freed. @@ -743,6 +736,12 @@ bool SafeStack::runOnFunction(Function &F) { moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop, DynamicAllocas); + // Restore the unsafe stack pointer before each return. + for (ReturnInst *RI : Returns) { + IRB.SetInsertPoint(RI); + IRB.CreateStore(BasePointer, UnsafeStackPtr); + } + DEBUG(dbgs() << "[SafeStack] safestack applied\n"); return true; } diff --git a/test/Transforms/SafeStack/ARM/setjmp.ll b/test/Transforms/SafeStack/ARM/setjmp.ll index 8c57908bbe4..20e46f8f0e2 100644 --- a/test/Transforms/SafeStack/ARM/setjmp.ll +++ b/test/Transforms/SafeStack/ARM/setjmp.ll @@ -6,8 +6,8 @@ define void @f(i32 %b) safestack { entry: ; CHECK: %[[SPA:.*]] = call i8** @__safestack_pointer_address() -; CHECK: %[[USDP:.*]] = alloca i8* ; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]] +; CHECK: %[[USDP:.*]] = alloca i8* ; CHECK: store i8* %[[USP]], i8** %[[USDP]] ; CHECK: call i32 @setjmp @@ -26,6 +26,8 @@ if.then: br label %if.end if.end: +; CHECK: store i8* %[[USP:.*]], i8** %[[SPA:.*]] + ret void } diff --git a/test/Transforms/SafeStack/dynamic-alloca.ll b/test/Transforms/SafeStack/dynamic-alloca.ll index bfec66f82a2..b0571f72f1a 100644 --- a/test/Transforms/SafeStack/dynamic-alloca.ll +++ b/test/Transforms/SafeStack/dynamic-alloca.ll @@ -8,7 +8,7 @@ ; Requires protector. define void @foo(i32 %n) nounwind uwtable safestack { entry: - ; CHECK: __safestack_unsafe_stack_ptr + ; CHECK: %[[SP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr %n.addr = alloca i32, align 4 %a = alloca i32*, align 8 store i32 %n, i32* %n.addr, align 4 @@ -17,5 +17,6 @@ entry: %1 = alloca i8, i64 %conv %2 = bitcast i8* %1 to i32* store i32* %2, i32** %a, align 8 + ; CHECK: store i8* %[[SP:.*]], i8** @__safestack_unsafe_stack_ptr ret void } diff --git a/test/Transforms/SafeStack/setjmp2.ll b/test/Transforms/SafeStack/setjmp2.ll index bb15d7e03ac..dc83c482420 100644 --- a/test/Transforms/SafeStack/setjmp2.ll +++ b/test/Transforms/SafeStack/setjmp2.ll @@ -12,8 +12,8 @@ ; CHECK: @foo(i32 %[[ARG:.*]]) define i32 @foo(i32 %size) nounwind uwtable safestack { entry: - ; CHECK: %[[DYNPTR:.*]] = alloca i8* - ; CHECK-NEXT: %[[SP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr + ; CHECK: %[[SP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr + ; CHECK-NEXT: %[[DYNPTR:.*]] = alloca i8* ; CHECK-NEXT: store i8* %[[SP]], i8** %[[DYNPTR]] ; CHECK-NEXT: %[[ZEXT:.*]] = zext i32 %[[ARG]] to i64 @@ -35,6 +35,7 @@ entry: ; CHECK: call void @funcall(i32* %[[ALLOCA]]) call void @funcall(i32* %a) + ; CHECK-NEXT: store i8* %[[SP:.*]], i8** @__safestack_unsafe_stack_ptr ret i32 0 } From 46b3fcf06c90af7fbcd073a66fe8b675252b07f5 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Fri, 29 Jan 2016 23:51:00 +0000 Subject: [PATCH 0125/1132] [Objective-C] Support a new special module flag. "Objective-C Class Properties" will be put into the objc_imageinfo struct. rdar://23891898 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259270 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 3c9026e3283..bcaad86a511 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -467,6 +467,7 @@ emitModuleFlags(MCStreamer &Streamer, } else if (Key == "Objective-C Garbage Collection" || Key == "Objective-C GC Only" || Key == "Objective-C Is Simulated" || + Key == "Objective-C Class Properties" || Key == "Objective-C Image Swift Version") { ImageInfoFlags |= mdconst::extract(Val)->getZExtValue(); } else if (Key == "Objective-C Image Info Section") { From b038db2be2d8d1883be461f4f43af48c053a3e08 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Wed, 3 Feb 2016 15:05:06 +0000 Subject: [PATCH 0126/1132] [DemandedBits] Revert r249687 due to PR26071 This regresses a test in LoopVectorize, so I'll need to go away and think about how to solve this in a way that isn't broken. From the writeup in PR26071: What's happening is that ComputeKnownZeroes is telling us that all bits except the LSB are zero. We're then deciding that only the LSB needs to be demanded from the icmp's inputs. This is where we're wrong - we're assuming that after simplification the bits that were known zero will continue to be known zero. But they're not - during trivialization the upper bits get changed (because an XOR isn't shrunk), so the icmp fails. The fault is in demandedbits - its contract does clearly state that a non-demanded bit may either be zero or one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259649 91177308-0d34-0410-b5e6-96231b3b80d8 (cherry picked from commit c48890e1947d4a74125e35a8206dd2e6709d1c04) --- lib/Analysis/DemandedBits.cpp | 7 ---- test/Analysis/DemandedBits/basic.ll | 31 ----------------- .../AArch64/loop-vectorization-factors.ll | 34 ------------------- 3 files changed, 72 deletions(-) diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 143d0b79f18..6f92ba6289a 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -242,13 +242,6 @@ void DemandedBits::determineLiveOperandBits( if (OperandNo != 0) AB = AOut; break; - case Instruction::ICmp: - // Count the number of leading zeroes in each operand. - ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1)); - auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); - AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes); - break; } } diff --git a/test/Analysis/DemandedBits/basic.ll b/test/Analysis/DemandedBits/basic.ll index 9973edf79c1..3fd1b321288 100644 --- a/test/Analysis/DemandedBits/basic.ll +++ b/test/Analysis/DemandedBits/basic.ll @@ -10,34 +10,3 @@ define i8 @test_mul(i32 %a, i32 %b) { %3 = trunc i32 %2 to i8 ret i8 %3 } - -; CHECK-LABEL: 'test_icmp1' -; CHECK-DAG: DemandedBits: 0x1 for %3 = icmp eq i32 %1, %2 -; CHECK-DAG: DemandedBits: 0xFFF for %1 = and i32 %a, 255 -; CHECK-DAG: DemandedBits: 0xFFF for %2 = shl i32 %1, 4 -define i1 @test_icmp1(i32 %a, i32 %b) { - %1 = and i32 %a, 255 - %2 = shl i32 %1, 4 - %3 = icmp eq i32 %1, %2 - ret i1 %3 -} - -; CHECK-LABEL: 'test_icmp2' -; CHECK-DAG: DemandedBits: 0x1 for %3 = icmp eq i32 %1, %2 -; CHECK-DAG: DemandedBits: 0xFFF for %1 = and i32 %a, 255 -; CHECK-DAG: DemandedBits: 0xFF for %2 = ashr i32 %1, 4 -define i1 @test_icmp2(i32 %a, i32 %b) { - %1 = and i32 %a, 255 - %2 = ashr i32 %1, 4 - %3 = icmp eq i32 %1, %2 - ret i1 %3 -} - -; CHECK-LABEL: 'test_icmp3' -; CHECK-DAG: DemandedBits: 0xFFFFFFFF for %1 = and i32 %a, 255 -; CHECK-DAG: DemandedBits: 0x1 for %2 = icmp eq i32 -1, %1 -define i1 @test_icmp3(i32 %a) { - %1 = and i32 %a, 255 - %2 = icmp eq i32 -1, %1 - ret i1 %2 -} diff --git a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index eee31049180..51f899c2f64 100644 --- a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -205,39 +205,5 @@ for.body: ; preds = %for.body, %for.body br i1 %exitcond, label %for.cond.cleanup, label %for.body } -; CHECK-LABEL: @add_g -; CHECK: load <16 x i8> -; CHECK: xor <16 x i8> -; CHECK: icmp ult <16 x i8> -; CHECK: select <16 x i1> {{.*}}, <16 x i8> -; CHECK: store <16 x i8> -define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture %r, i8 %arg1, i32 %len) #0 { - %1 = icmp sgt i32 %len, 0 - br i1 %1, label %.lr.ph, label %._crit_edge - -.lr.ph: ; preds = %0 - %2 = sext i8 %arg1 to i64 - br label %3 - -._crit_edge: ; preds = %3, %0 - ret void - -;