From d0dbed10d5f08aa3efa31af7287e4c1b34735c57 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Tue, 17 Jun 2014 06:52:41 +0000
Subject: [PATCH 001/157] tools: Add a space between package version and
 LLVM_VERSION_INFO

This reads a little strangely. Add a space to clean it up.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211090 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Support/CommandLine.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index b2bb7466c112..e09d4b6e47b4 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -1699,7 +1699,7 @@ class VersionPrinter {
     OS << "LLVM (http://llvm.org/):\n"
        << "  " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
 #ifdef LLVM_VERSION_INFO
-    OS << LLVM_VERSION_INFO;
+    OS << " " << LLVM_VERSION_INFO;
 #endif
     OS << "\n  ";
 #ifndef __OPTIMIZE__

From feacf821802eb294cc228d26bd3add4d918c7957 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Tue, 17 Jun 2014 06:52:47 +0000
Subject: [PATCH 002/157] Support: Inject LLVM_VERSION_INFO into the Support
 library

Mimic r116632 in passing LLVM_VERSION_INFO from the Makefile build
system to the build. This improves the -version output of tools that
use llvm::cl under the configure+make system.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211091 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Support/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/Support/Makefile b/lib/Support/Makefile
index 4a2185d589e5..39426aaaacee 100644
--- a/lib/Support/Makefile
+++ b/lib/Support/Makefile
@@ -17,3 +17,7 @@ include $(LEVEL)/Makefile.common
 
 CompileCommonOpts := $(filter-out -pedantic,$(CompileCommonOpts))
 CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts))
+
+ifdef LLVM_VERSION_INFO
+CompileCommonOpts += -DLLVM_VERSION_INFO='"$(LLVM_VERSION_INFO)"'
+endif

From 792a17352302c8305368ac5e836e5b132c604898 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Tue, 17 Jun 2014 09:23:12 +0000
Subject: [PATCH 003/157] [msan] Fix handling of multiplication by a constant
 with a number of trailing zeroes.

Multiplication by an integer with a number of trailing zero bits leaves
the same number of lower bits of the result initialized to zero.
This change makes MSan take this into account in the case of multiplication by
a compile-time constant.

We don't handle the general, non-constant, case because
(a) it's not going to be cheap (computation-wise);
(b) multiplication by a partially uninitialized value in user code is
    a bad idea anyway.

Constant case must be handled because it appears from LLVM optimization of a
completely valid user code, as the test case in compiler-rt demonstrates.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211092 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/MemorySanitizer.cpp       | 50 +++++++++-
 .../MemorySanitizer/mul_by_constant.ll        | 94 +++++++++++++++++++
 2 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 test/Instrumentation/MemorySanitizer/mul_by_constant.ll

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index e890943c6a84..38a3fbb21f8c 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1397,13 +1397,61 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     SC.Done(&I);
   }
 
+  // \brief Handle multiplication by constant.
+  //
+  // Handle a special case of multiplication by constant that may have one or
+  // more zeros in the lower bits. This makes corresponding number of lower bits
+  // of the result zero as well. We model it by shifting the other operand
+  // shadow left by the required number of bits. Effectively, we transform
+  // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
+  // We use multiplication by 2**N instead of shift to cover the case of
+  // multiplication by 0, which may occur in some elements of a vector operand.
+  void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
+                           Value *OtherArg) {
+    Constant *ShadowMul;
+    Type *Ty = ConstArg->getType();
+    if (Ty->isVectorTy()) {
+      unsigned NumElements = Ty->getVectorNumElements();
+      Type *EltTy = Ty->getSequentialElementType();
+      SmallVector<Constant *, 16> Elements;
+      for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
+        ConstantInt *Elt =
+            dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx));
+        APInt V = Elt->getValue();
+        APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+        Elements.push_back(ConstantInt::get(EltTy, V2));
+      }
+      ShadowMul = ConstantVector::get(Elements);
+    } else {
+      ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg);
+      APInt V = Elt->getValue();
+      APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+      ShadowMul = ConstantInt::get(Elt->getType(), V2);
+    }
+
+    IRBuilder<> IRB(&I);
+    setShadow(&I,
+              IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
+    setOrigin(&I, getOrigin(OtherArg));
+  }
+
+  void visitMul(BinaryOperator &I) {
+    Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
+    Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
+    if (constOp0 && !constOp1)
+      handleMulByConstant(I, constOp0, I.getOperand(1));
+    else if (constOp1 && !constOp0)
+      handleMulByConstant(I, constOp1, I.getOperand(0));
+    else
+      handleShadowOr(I);
+  }
+
   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
-  void visitMul(BinaryOperator &I) { handleShadowOr(I); }
 
   void handleDiv(Instruction &I) {
     IRBuilder<> IRB(&I);
diff --git a/test/Instrumentation/MemorySanitizer/mul_by_constant.ll b/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
new file mode 100644
index 000000000000..e068f69ae4ba
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
@@ -0,0 +1,94 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check instrumentation mul when one of the operands is a constant.
+
+define i64 @MulConst(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, 42949672960000
+  ret i64 %y
+}
+
+; 42949672960000 = 2**32 * 10000
+; 36 trailing zero bits
+; 68719476736 = 2**36
+
+; CHECK-LABEL: @MulConst(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 68719476736
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulZero(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, 0
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulZero(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 0{{$}}
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulNeg(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, -16
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulNeg(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 16
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulNeg2(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, -48
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulNeg2(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 16
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulOdd(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, 12345
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulOdd(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 1
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulLarge(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, -9223372036854775808
+  ret i64 %y
+}
+
+; -9223372036854775808 = 0x7000000000000000
+
+; CHECK-LABEL: @MulLarge(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], -9223372036854775808
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+define <4 x i32> @MulVectorConst(<4 x i32> %x) sanitize_memory {
+entry:
+  %y = mul <4 x i32> %x, <i32 3072, i32 0, i32 -16, i32 -48>
+  ret <4 x i32> %y
+}
+
+; CHECK-LABEL: @MulVectorConst(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul <4 x i32> [[A]], <i32 1024, i32 0, i32 16, i32 16>
+; CHECK: store <4 x i32> [[B]], <4 x i32>* {{.*}} @__msan_retval_tls

From 9f27b050b03bbd4b06d7f7d0e24b899906b4e057 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Tue, 17 Jun 2014 09:33:24 +0000
Subject: [PATCH 004/157] ConvertUTF tests: remove uses of initializer lists to
 restore compatibility with MSVC

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211093 91177308-0d34-0410-b5e6-96231b3b80d8
---
 unittests/Support/ConvertUTFTest.cpp | 1803 ++++++++++++++++----------
 1 file changed, 1108 insertions(+), 695 deletions(-)

diff --git a/unittests/Support/ConvertUTFTest.cpp b/unittests/Support/ConvertUTFTest.cpp
index 3b71ed1b6a6e..16c9bebfde16 100644
--- a/unittests/Support/ConvertUTFTest.cpp
+++ b/unittests/Support/ConvertUTFTest.cpp
@@ -11,6 +11,7 @@
 #include "gtest/gtest.h"
 #include <string>
 #include <vector>
+#include <utility>
 
 using namespace llvm;
 
@@ -65,6 +66,39 @@ TEST(ConvertUTFTest, HasUTF16BOM) {
   EXPECT_FALSE(HasBOM);
 }
 
+struct ConvertUTFResultContainer {
+  ConversionResult ErrorCode;
+  std::vector<unsigned> UnicodeScalars;
+
+  ConvertUTFResultContainer(ConversionResult ErrorCode)
+      : ErrorCode(ErrorCode) {}
+
+  ConvertUTFResultContainer
+  withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
+              unsigned US2 = 0x110000, unsigned US3 = 0x110000,
+              unsigned US4 = 0x110000, unsigned US5 = 0x110000,
+              unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
+    ConvertUTFResultContainer Result(*this);
+    if (US0 != 0x110000)
+      Result.UnicodeScalars.push_back(US0);
+    if (US1 != 0x110000)
+      Result.UnicodeScalars.push_back(US1);
+    if (US2 != 0x110000)
+      Result.UnicodeScalars.push_back(US2);
+    if (US3 != 0x110000)
+      Result.UnicodeScalars.push_back(US3);
+    if (US4 != 0x110000)
+      Result.UnicodeScalars.push_back(US4);
+    if (US5 != 0x110000)
+      Result.UnicodeScalars.push_back(US5);
+    if (US6 != 0x110000)
+      Result.UnicodeScalars.push_back(US6);
+    if (US7 != 0x110000)
+      Result.UnicodeScalars.push_back(US7);
+    return Result;
+  }
+};
+
 std::pair<ConversionResult, std::vector<unsigned>>
 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
@@ -73,17 +107,55 @@ ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
   std::vector<UTF32> Decoded(S.size(), 0);
   UTF32 *TargetStart = Decoded.data();
 
-  auto Result =
+  auto ErrorCode =
       ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
                          Decoded.data() + Decoded.size(), lenientConversion);
 
   Decoded.resize(TargetStart - Decoded.data());
 
-  return std::make_pair(Result, Decoded);
+  return std::make_pair(ErrorCode, Decoded);
+}
+
+std::pair<ConversionResult, std::vector<unsigned>>
+ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
+  const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
+
+  const UTF8 *SourceNext = SourceStart;
+  std::vector<UTF32> Decoded(S.size(), 0);
+  UTF32 *TargetStart = Decoded.data();
+
+  auto ErrorCode = ConvertUTF8toUTF32Partial(
+      &SourceNext, SourceStart + S.size(), &TargetStart,
+      Decoded.data() + Decoded.size(), lenientConversion);
+
+  Decoded.resize(TargetStart - Decoded.data());
+
+  return std::make_pair(ErrorCode, Decoded);
 }
 
-#define R0(RESULT) std::make_pair(RESULT, std::vector<unsigned>{})
-#define R(RESULT, ...) std::make_pair(RESULT, std::vector<unsigned>{ __VA_ARGS__ })
+::testing::AssertionResult
+CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
+                                 StringRef S, bool Partial = false) {
+  ConversionResult ErrorCode;
+  std::vector<unsigned> Decoded;
+  if (!Partial)
+    std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
+  else
+
+    std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
+  if (Expected.ErrorCode != ErrorCode)
+    return ::testing::AssertionFailure() << "Expected error code "
+                                         << Expected.ErrorCode << ", actual "
+                                         << ErrorCode;
+
+  if (Expected.UnicodeScalars != Decoded)
+    return ::testing::AssertionFailure()
+           << "Expected lenient decoded result:\n"
+           << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
+           << "Actual result:\n" << ::testing::PrintToString(Decoded);
+
+  return ::testing::AssertionSuccess();
+}
 
 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
 
@@ -92,25 +164,27 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   //
 
   // U+0041 LATIN CAPITAL LETTER A
-  EXPECT_EQ(R(conversionOK, 0x0041),
-      ConvertUTF8ToUnicodeScalarsLenient("\x41"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
 
   //
   // 2-byte sequences
   //
 
   // U+0283 LATIN SMALL LETTER ESH
-  EXPECT_EQ(R(conversionOK, 0x0283),
-      ConvertUTF8ToUnicodeScalarsLenient("\xca\x83"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
+      "\xca\x83"));
 
   // U+03BA GREEK SMALL LETTER KAPPA
   // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
   // U+03C3 GREEK SMALL LETTER SIGMA
   // U+03BC GREEK SMALL LETTER MU
   // U+03B5 GREEK SMALL LETTER EPSILON
-  EXPECT_EQ(R(conversionOK, 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK)
+          .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
+      "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
 
   //
   // 3-byte sequences
@@ -118,13 +192,15 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
 
   // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
   // U+6587 CJK UNIFIED IDEOGRAPH-6587
-  EXPECT_EQ(R(conversionOK, 0x4f8b, 0x6587),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe4\xbe\x8b\xe6\x96\x87"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
+      "\xe4\xbe\x8b\xe6\x96\x87"));
 
   // U+D55C HANGUL SYLLABLE HAN
   // U+AE00 HANGUL SYLLABLE GEUL
-  EXPECT_EQ(R(conversionOK, 0xd55c, 0xae00),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\x95\x9c\xea\xb8\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
+      "\xed\x95\x9c\xea\xb8\x80"));
 
   // U+1112 HANGUL CHOSEONG HIEUH
   // U+1161 HANGUL JUNGSEONG A
@@ -132,98 +208,122 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   // U+1100 HANGUL CHOSEONG KIYEOK
   // U+1173 HANGUL JUNGSEONG EU
   // U+11AF HANGUL JONGSEONG RIEUL
-  EXPECT_EQ(R(conversionOK, 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
-          "\xe1\x86\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK)
+          .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
+      "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
+      "\xe1\x86\xaf"));
 
   //
   // 4-byte sequences
   //
 
   // U+E0100 VARIATION SELECTOR-17
-  EXPECT_EQ(R(conversionOK, 0x000E0100),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xa0\x84\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
+      "\xf3\xa0\x84\x80"));
 
   //
   // First possible sequence of a certain length
   //
 
   // U+0000 NULL
-  EXPECT_EQ(R(conversionOK, 0x0000),
-      ConvertUTF8ToUnicodeScalarsLenient(StringRef("\x00", 1)));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
+      StringRef("\x00", 1)));
 
   // U+0080 PADDING CHARACTER
-  EXPECT_EQ(R(conversionOK, 0x0080),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc2\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
+      "\xc2\x80"));
 
   // U+0800 SAMARITAN LETTER ALAF
-  EXPECT_EQ(R(conversionOK, 0x0800),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\xa0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
+      "\xe0\xa0\x80"));
 
   // U+10000 LINEAR B SYLLABLE B008 A
-  EXPECT_EQ(R(conversionOK, 0x10000),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x90\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
+      "\xf0\x90\x80\x80"));
 
   // U+200000 (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x88\x80\x80\x80"));
 
   // U+4000000 (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x84\x80\x80\x80\x80"));
 
   //
   // Last possible sequence of a certain length
   //
 
   // U+007F DELETE
-  EXPECT_EQ(R(conversionOK, 0x007f),
-      ConvertUTF8ToUnicodeScalarsLenient("\x7f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
 
   // U+07FF (unassigned)
-  EXPECT_EQ(R(conversionOK, 0x07ff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xdf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
+      "\xdf\xbf"));
 
   // U+FFFF (noncharacter)
-  EXPECT_EQ(R(conversionOK, 0xffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
+      "\xef\xbf\xbf"));
 
   // U+1FFFFF (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf7\xbf\xbf\xbf"));
 
   // U+3FFFFFF (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfb\xbf\xbf\xbf\xbf"));
 
   // U+7FFFFFFF (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfd\xbf\xbf\xbf\xbf\xbf"));
 
   //
   // Other boundary conditions
   //
 
   // U+D7FF (unassigned)
-  EXPECT_EQ(R(conversionOK, 0xd7ff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\x9f\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
+      "\xed\x9f\xbf"));
 
   // U+E000 (private use)
-  EXPECT_EQ(R(conversionOK, 0xe000),
-      ConvertUTF8ToUnicodeScalarsLenient("\xee\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
+      "\xee\x80\x80"));
 
   // U+FFFD REPLACEMENT CHARACTER
-  EXPECT_EQ(R(conversionOK, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbd"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
+      "\xef\xbf\xbd"));
 
   // U+10FFFF (noncharacter)
-  EXPECT_EQ(R(conversionOK, 0x10ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
+      "\xf4\x8f\xbf\xbf"));
 
   // U+110000 (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x90\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf4\x90\x80\x80"));
 
   //
   // Unexpected continuation bytes
@@ -232,407 +332,570 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   // A sequence of unexpected continuation bytes that don't follow a first
   // byte, every byte is a maximal subpart.
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xbf\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf\x80\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf\x82\xbf\xaa"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xaa\xb0\xbb\xbf\xaa\xa0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\x80\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xbf\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\x80\xbf\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\x80\xbf\x80\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\x80\xbf\x82\xbf\xaa"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xaa\xb0\xbb\xbf\xaa\xa0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
 
   // All continuation bytes (0x80--0xbf).
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-          "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-          "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-          "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+      "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+      "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+      "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
 
   //
   // Lonely start bytes
   //
 
   // Start bytes of 2-byte sequences (0xc0--0xdf).
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-          "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
-
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
-          "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
-          "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
-          "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+      "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
+      "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
+      "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
+      "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
 
   // Start bytes of 3-byte sequences (0xe0--0xef).
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
-
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
-          "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
+      "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
 
   // Start bytes of 4-byte sequences (0xf0--0xf7).
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
-
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
 
   // Start bytes of 5-byte sequences (0xf8--0xfb).
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\xf9\xfa\xfb"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\xf9\xfa\xfb"));
 
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
 
   // Start bytes of 6-byte sequences (0xfc--0xfd).
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\xfd"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfc\xfd"));
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x20\xfd\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xfc\x20\xfd\x20"));
 
   //
   // Other bytes (0xc0--0xc1, 0xfe--0xff).
   //
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc1"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfe"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xff"));
-
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0\xc1\xfe\xff"));
-
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfe\xfe\xff\xff"));
-
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfe\x80\x80\x80\x80\x80"));
-
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xff\x80\x80\x80\x80\x80"));
-
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xc0\xc1\xfe\xff"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfe\xfe\xff\xff"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfe\x80\x80\x80\x80\x80"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xff\x80\x80\x80\x80\x80"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
 
   //
   // Sequences with one continuation byte missing
   //
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc2"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xdf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\xa0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe1\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xec\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\x9f"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xee\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x90\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xe0\xa0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xe0\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xe1\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xec\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xed\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xed\x9f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xee\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xef\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf0\x90\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf0\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf1\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf3\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf4\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf4\x8f\xbf"));
 
   // Overlong sequences with one trailing byte missing.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc1"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\x9f"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xc0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xc1"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xe0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xe0\x9f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x8f\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x80\x80\x80\x80"));
 
   // Sequences that represent surrogates with one trailing byte missing.
   // High surrogates
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xac"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xa0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xac"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xaf"));
   // Low surrogates
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xb0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xb4"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xb0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xb4"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xbf"));
 
   // Ill-formed 4-byte sequences.
   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+1100xx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x90\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf4\x90\x80"));
   // U+13FBxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf5\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf6\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf4\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf5\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf6\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf7\x80\x80"));
   // U+1FFBxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf7\xbf\xbf"));
 
   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+2000xx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\xbf\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf9\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfa\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x88\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf9\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfa\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfb\x80\x80\x80"));
   // U+3FFFFxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfb\xbf\xbf\xbf"));
 
   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
   // U+40000xx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf\xbf\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x84\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\xbf\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfd\x80\x80\x80\x80"));
   // U+7FFFFFxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfd\xbf\xbf\xbf\xbf"));
 
   //
   // Sequences with two continuation bytes missing
   //
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x90"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf0\x90"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf0\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf1\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf3\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf4\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf4\x8f"));
 
   // Overlong sequences with two trailing byte missing.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf0\x8f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x80\x80\x80"));
 
   // Sequences that represent surrogates with two trailing bytes missing.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
 
   // Ill-formed 4-byte sequences.
   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+110yxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x90"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf4\x90"));
   // U+13Fyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf5\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf6\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf4\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf5\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf6\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf7\x80"));
   // U+1FFyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf7\xbf"));
 
   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+200yxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf9\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfa\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x88\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf8\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf9\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfa\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfb\x80\x80"));
   // U+3FFFyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfb\xbf\xbf"));
 
   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+4000yxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x84\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfd\x80\x80\x80"));
   // U+7FFFFyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfd\xbf\xbf\xbf"));
 
   //
   // Sequences with three continuation bytes missing
   //
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
 
   // Broken overlong sequences.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf8\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x80\x80"));
 
   // Ill-formed 4-byte sequences.
   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+14yyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf5"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf6"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
   // U+1Cyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
 
   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+20yyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf9\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfa\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf8\x88"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf8\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf9\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfa\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfb\x80"));
   // U+3FCyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfb\xbf"));
 
   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+400yyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x84\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfc\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfd\x80\x80"));
   // U+7FFCyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfd\xbf\xbf"));
 
   //
   // Sequences with four continuation bytes missing
@@ -641,36 +904,41 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+uzyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf9"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfa"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
   // U+3zyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
 
   // Broken overlong sequences.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfc\x80"));
 
   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+uzzyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfc\x84"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfc\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfd\x80"));
   // U+7Fzzyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfd\xbf"));
 
   //
   // Sequences with five continuation bytes missing
@@ -679,89 +947,124 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+uzzyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
   // U+uuzzyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
 
   //
   // Consecutive sequences with trailing bytes missing
   //
 
-  EXPECT_EQ(R(sourceIllegal,
-      0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, /**/
-      0xfffd, 0xfffd, 0xfffd, 0xfffd,
-      0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-      0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, /**/
-      0xfffd, 0xfffd, 0xfffd, 0xfffd,
-      0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xc0" "\xe0\x80" "\xf0\x80\x80"
-          "\xf8\x80\x80\x80"
-          "\xfc\x80\x80\x80\x80"
-          "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
-          "\xfb\xbf\xbf\xbf"
-          "\xfd\xbf\xbf\xbf\xbf"));
-
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xc0" "\xe0\x80" "\xf0\x80\x80"
+      "\xf8\x80\x80\x80"
+      "\xfc\x80\x80\x80\x80"
+      "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
+      "\xfb\xbf\xbf\xbf"
+      "\xfd\xbf\xbf\xbf\xbf"));
 
   //
   // Overlong UTF-8 sequences
   //
 
   // U+002F SOLIDUS
-  EXPECT_EQ(R(conversionOK, 0x002f),
-      ConvertUTF8ToUnicodeScalarsLenient("\x2f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
 
   // Overlong sequences of the above.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0\xaf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\x80\xaf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80\x80\xaf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80\x80\xaf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80\x80\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xc0\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xe0\x80\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x80\x80\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x80\x80\x80\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x80\x80\x80\x80\xaf"));
 
   // U+0000 NULL
-  EXPECT_EQ(R(conversionOK, 0x0000),
-      ConvertUTF8ToUnicodeScalarsLenient(StringRef("\x00", 1)));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
+      StringRef("\x00", 1)));
 
   // Overlong sequences of the above.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xc0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xe0\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x80\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x80\x80\x80\x80\x80"));
 
   // Other overlong sequences.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc1\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc1\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\x9f\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x87\xbf\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x83\xbf\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xc0\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xc1\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xc1\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xe0\x9f\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xa0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x8f\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x87\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x83\xbf\xbf\xbf\xbf"));
 
   //
   // Isolated surrogates
@@ -780,68 +1083,98 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   // High surrogates
 
   // U+D800
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xa0\x80"));
 
   // U+DB40
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xac\xa0"));
 
   // U+DBFF
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xaf\xbf"));
 
   // Low surrogates
 
   // U+DC00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xb0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xb0\x80"));
 
   // U+DD00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xb4\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xb4\x80"));
 
   // U+DFFF
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xbf\xbf"));
 
   // Surrogate pairs
 
   // U+D800 U+DC00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80\xed\xb0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xa0\x80\xed\xb0\x80"));
 
   // U+D800 U+DD00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80\xed\xb4\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xa0\x80\xed\xb4\x80"));
 
   // U+D800 U+DFFF
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80\xed\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xa0\x80\xed\xbf\xbf"));
 
   // U+DB40 U+DC00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0\xed\xb0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xac\xa0\xed\xb0\x80"));
 
   // U+DB40 U+DD00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0\xed\xb4\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xac\xa0\xed\xb4\x80"));
 
   // U+DB40 U+DFFF
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0\xed\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xac\xa0\xed\xbf\xbf"));
 
   // U+DBFF U+DC00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf\xed\xb0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xaf\xbf\xed\xb0\x80"));
 
   // U+DBFF U+DD00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf\xed\xb4\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xaf\xbf\xed\xb4\x80"));
 
   // U+DBFF U+DFFF
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf\xed\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xaf\xbf\xed\xbf\xbf"));
 
   //
   // Noncharacters
@@ -855,397 +1188,477 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   //    and the values U+FDD0..U+FDEF.
 
   // U+FFFE
-  EXPECT_EQ(R(conversionOK, 0xfffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
+      "\xef\xbf\xbe"));
 
   // U+FFFF
-  EXPECT_EQ(R(conversionOK, 0xffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
+      "\xef\xbf\xbf"));
 
   // U+1FFFE
-  EXPECT_EQ(R(conversionOK, 0x1fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x9f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
+      "\xf0\x9f\xbf\xbe"));
 
   // U+1FFFF
-  EXPECT_EQ(R(conversionOK, 0x1ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x9f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
+      "\xf0\x9f\xbf\xbf"));
 
   // U+2FFFE
-  EXPECT_EQ(R(conversionOK, 0x2fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xaf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
+      "\xf0\xaf\xbf\xbe"));
 
   // U+2FFFF
-  EXPECT_EQ(R(conversionOK, 0x2ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xaf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
+      "\xf0\xaf\xbf\xbf"));
 
   // U+3FFFE
-  EXPECT_EQ(R(conversionOK, 0x3fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
+      "\xf0\xbf\xbf\xbe"));
 
   // U+3FFFF
-  EXPECT_EQ(R(conversionOK, 0x3ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
+      "\xf0\xbf\xbf\xbf"));
 
   // U+4FFFE
-  EXPECT_EQ(R(conversionOK, 0x4fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x8f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
+      "\xf1\x8f\xbf\xbe"));
 
   // U+4FFFF
-  EXPECT_EQ(R(conversionOK, 0x4ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
+      "\xf1\x8f\xbf\xbf"));
 
   // U+5FFFE
-  EXPECT_EQ(R(conversionOK, 0x5fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x9f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
+      "\xf1\x9f\xbf\xbe"));
 
   // U+5FFFF
-  EXPECT_EQ(R(conversionOK, 0x5ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x9f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
+      "\xf1\x9f\xbf\xbf"));
 
   // U+6FFFE
-  EXPECT_EQ(R(conversionOK, 0x6fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\xaf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
+      "\xf1\xaf\xbf\xbe"));
 
   // U+6FFFF
-  EXPECT_EQ(R(conversionOK, 0x6ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\xaf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
+      "\xf1\xaf\xbf\xbf"));
 
   // U+7FFFE
-  EXPECT_EQ(R(conversionOK, 0x7fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\xbf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
+      "\xf1\xbf\xbf\xbe"));
 
   // U+7FFFF
-  EXPECT_EQ(R(conversionOK, 0x7ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
+      "\xf1\xbf\xbf\xbf"));
 
   // U+8FFFE
-  EXPECT_EQ(R(conversionOK, 0x8fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\x8f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
+      "\xf2\x8f\xbf\xbe"));
 
   // U+8FFFF
-  EXPECT_EQ(R(conversionOK, 0x8ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
+      "\xf2\x8f\xbf\xbf"));
 
   // U+9FFFE
-  EXPECT_EQ(R(conversionOK, 0x9fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\x9f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
+      "\xf2\x9f\xbf\xbe"));
 
   // U+9FFFF
-  EXPECT_EQ(R(conversionOK, 0x9ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\x9f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
+      "\xf2\x9f\xbf\xbf"));
 
   // U+AFFFE
-  EXPECT_EQ(R(conversionOK, 0xafffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\xaf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
+      "\xf2\xaf\xbf\xbe"));
 
   // U+AFFFF
-  EXPECT_EQ(R(conversionOK, 0xaffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\xaf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
+      "\xf2\xaf\xbf\xbf"));
 
   // U+BFFFE
-  EXPECT_EQ(R(conversionOK, 0xbfffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\xbf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
+      "\xf2\xbf\xbf\xbe"));
 
   // U+BFFFF
-  EXPECT_EQ(R(conversionOK, 0xbffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
+      "\xf2\xbf\xbf\xbf"));
 
   // U+CFFFE
-  EXPECT_EQ(R(conversionOK, 0xcfffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\x8f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
+      "\xf3\x8f\xbf\xbe"));
 
   // U+CFFFF
-  EXPECT_EQ(R(conversionOK, 0xcfffF),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
+      "\xf3\x8f\xbf\xbf"));
 
   // U+DFFFE
-  EXPECT_EQ(R(conversionOK, 0xdfffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\x9f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
+      "\xf3\x9f\xbf\xbe"));
 
   // U+DFFFF
-  EXPECT_EQ(R(conversionOK, 0xdffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\x9f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
+      "\xf3\x9f\xbf\xbf"));
 
   // U+EFFFE
-  EXPECT_EQ(R(conversionOK, 0xefffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xaf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
+      "\xf3\xaf\xbf\xbe"));
 
   // U+EFFFF
-  EXPECT_EQ(R(conversionOK, 0xeffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xaf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
+      "\xf3\xaf\xbf\xbf"));
 
   // U+FFFFE
-  EXPECT_EQ(R(conversionOK, 0xffffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
+      "\xf3\xbf\xbf\xbe"));
 
   // U+FFFFF
-  EXPECT_EQ(R(conversionOK, 0xfffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
+      "\xf3\xbf\xbf\xbf"));
 
   // U+10FFFE
-  EXPECT_EQ(R(conversionOK, 0x10fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
+      "\xf4\x8f\xbf\xbe"));
 
   // U+10FFFF
-  EXPECT_EQ(R(conversionOK, 0x10ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
+      "\xf4\x8f\xbf\xbf"));
 
   // U+FDD0
-  EXPECT_EQ(R(conversionOK, 0xfdd0),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x90"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
+      "\xef\xb7\x90"));
 
   // U+FDD1
-  EXPECT_EQ(R(conversionOK, 0xfdd1),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x91"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
+      "\xef\xb7\x91"));
 
   // U+FDD2
-  EXPECT_EQ(R(conversionOK, 0xfdd2),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x92"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
+      "\xef\xb7\x92"));
 
   // U+FDD3
-  EXPECT_EQ(R(conversionOK, 0xfdd3),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x93"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
+      "\xef\xb7\x93"));
 
   // U+FDD4
-  EXPECT_EQ(R(conversionOK, 0xfdd4),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x94"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
+      "\xef\xb7\x94"));
 
   // U+FDD5
-  EXPECT_EQ(R(conversionOK, 0xfdd5),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x95"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
+      "\xef\xb7\x95"));
 
   // U+FDD6
-  EXPECT_EQ(R(conversionOK, 0xfdd6),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x96"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
+      "\xef\xb7\x96"));
 
   // U+FDD7
-  EXPECT_EQ(R(conversionOK, 0xfdd7),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x97"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
+      "\xef\xb7\x97"));
 
   // U+FDD8
-  EXPECT_EQ(R(conversionOK, 0xfdd8),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x98"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
+      "\xef\xb7\x98"));
 
   // U+FDD9
-  EXPECT_EQ(R(conversionOK, 0xfdd9),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x99"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
+      "\xef\xb7\x99"));
 
   // U+FDDA
-  EXPECT_EQ(R(conversionOK, 0xfdda),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9a"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
+      "\xef\xb7\x9a"));
 
   // U+FDDB
-  EXPECT_EQ(R(conversionOK, 0xfddb),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9b"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
+      "\xef\xb7\x9b"));
 
   // U+FDDC
-  EXPECT_EQ(R(conversionOK, 0xfddc),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9c"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
+      "\xef\xb7\x9c"));
 
   // U+FDDD
-  EXPECT_EQ(R(conversionOK, 0xfddd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9d"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
+      "\xef\xb7\x9d"));
 
   // U+FDDE
-  EXPECT_EQ(R(conversionOK, 0xfdde),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9e"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
+      "\xef\xb7\x9e"));
 
   // U+FDDF
-  EXPECT_EQ(R(conversionOK, 0xfddf),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
+      "\xef\xb7\x9f"));
 
   // U+FDE0
-  EXPECT_EQ(R(conversionOK, 0xfde0),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
+      "\xef\xb7\xa0"));
 
   // U+FDE1
-  EXPECT_EQ(R(conversionOK, 0xfde1),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa1"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
+      "\xef\xb7\xa1"));
 
   // U+FDE2
-  EXPECT_EQ(R(conversionOK, 0xfde2),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa2"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
+      "\xef\xb7\xa2"));
 
   // U+FDE3
-  EXPECT_EQ(R(conversionOK, 0xfde3),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa3"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
+      "\xef\xb7\xa3"));
 
   // U+FDE4
-  EXPECT_EQ(R(conversionOK, 0xfde4),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa4"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
+      "\xef\xb7\xa4"));
 
   // U+FDE5
-  EXPECT_EQ(R(conversionOK, 0xfde5),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa5"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
+      "\xef\xb7\xa5"));
 
   // U+FDE6
-  EXPECT_EQ(R(conversionOK, 0xfde6),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa6"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
+      "\xef\xb7\xa6"));
 
   // U+FDE7
-  EXPECT_EQ(R(conversionOK, 0xfde7),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa7"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
+      "\xef\xb7\xa7"));
 
   // U+FDE8
-  EXPECT_EQ(R(conversionOK, 0xfde8),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa8"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
+      "\xef\xb7\xa8"));
 
   // U+FDE9
-  EXPECT_EQ(R(conversionOK, 0xfde9),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa9"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
+      "\xef\xb7\xa9"));
 
   // U+FDEA
-  EXPECT_EQ(R(conversionOK, 0xfdea),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xaa"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
+      "\xef\xb7\xaa"));
 
   // U+FDEB
-  EXPECT_EQ(R(conversionOK, 0xfdeb),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xab"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
+      "\xef\xb7\xab"));
 
   // U+FDEC
-  EXPECT_EQ(R(conversionOK, 0xfdec),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xac"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
+      "\xef\xb7\xac"));
 
   // U+FDED
-  EXPECT_EQ(R(conversionOK, 0xfded),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xad"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
+      "\xef\xb7\xad"));
 
   // U+FDEE
-  EXPECT_EQ(R(conversionOK, 0xfdee),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xae"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
+      "\xef\xb7\xae"));
 
   // U+FDEF
-  EXPECT_EQ(R(conversionOK, 0xfdef),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
+      "\xef\xb7\xaf"));
 
   // U+FDF0
-  EXPECT_EQ(R(conversionOK, 0xfdf0),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
+      "\xef\xb7\xb0"));
 
   // U+FDF1
-  EXPECT_EQ(R(conversionOK, 0xfdf1),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb1"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
+      "\xef\xb7\xb1"));
 
   // U+FDF2
-  EXPECT_EQ(R(conversionOK, 0xfdf2),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb2"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
+      "\xef\xb7\xb2"));
 
   // U+FDF3
-  EXPECT_EQ(R(conversionOK, 0xfdf3),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb3"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
+      "\xef\xb7\xb3"));
 
   // U+FDF4
-  EXPECT_EQ(R(conversionOK, 0xfdf4),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb4"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
+      "\xef\xb7\xb4"));
 
   // U+FDF5
-  EXPECT_EQ(R(conversionOK, 0xfdf5),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb5"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
+      "\xef\xb7\xb5"));
 
   // U+FDF6
-  EXPECT_EQ(R(conversionOK, 0xfdf6),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb6"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
+      "\xef\xb7\xb6"));
 
   // U+FDF7
-  EXPECT_EQ(R(conversionOK, 0xfdf7),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb7"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
+      "\xef\xb7\xb7"));
 
   // U+FDF8
-  EXPECT_EQ(R(conversionOK, 0xfdf8),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb8"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
+      "\xef\xb7\xb8"));
 
   // U+FDF9
-  EXPECT_EQ(R(conversionOK, 0xfdf9),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb9"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
+      "\xef\xb7\xb9"));
 
   // U+FDFA
-  EXPECT_EQ(R(conversionOK, 0xfdfa),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xba"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
+      "\xef\xb7\xba"));
 
   // U+FDFB
-  EXPECT_EQ(R(conversionOK, 0xfdfb),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbb"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
+      "\xef\xb7\xbb"));
 
   // U+FDFC
-  EXPECT_EQ(R(conversionOK, 0xfdfc),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbc"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
+      "\xef\xb7\xbc"));
 
   // U+FDFD
-  EXPECT_EQ(R(conversionOK, 0xfdfd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbd"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
+      "\xef\xb7\xbd"));
 
   // U+FDFE
-  EXPECT_EQ(R(conversionOK, 0xfdfe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
+      "\xef\xb7\xbe"));
 
   // U+FDFF
-  EXPECT_EQ(R(conversionOK, 0xfdff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbf"));
-}
-
-std::pair<ConversionResult, std::vector<unsigned>>
-ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
-  const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
-
-  const UTF8 *SourceNext = SourceStart;
-  std::vector<UTF32> Decoded(S.size(), 0);
-  UTF32 *TargetStart = Decoded.data();
-
-  auto Result = ConvertUTF8toUTF32Partial(
-      &SourceNext, SourceStart + S.size(), &TargetStart,
-      Decoded.data() + Decoded.size(), lenientConversion);
-
-  Decoded.resize(TargetStart - Decoded.data());
-
-  return std::make_pair(Result, Decoded);
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
+      "\xef\xb7\xbf"));
 }
 
 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
   // U+0041 LATIN CAPITAL LETTER A
-  EXPECT_EQ(R(conversionOK, 0x0041),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\x41"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
+      "\x41", true));
 
   //
   // Sequences with one continuation byte missing
   //
 
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xc2"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xdf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xe0\xa0"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xe0\xbf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xe1\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xec\xbf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xed\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xed\x9f"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xee\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xef\xbf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf0\x90\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf0\xbf\xbf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf1\x80\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf3\xbf\xbf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf4\x80\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf4\x8f\xbf"));
-
-  EXPECT_EQ(R(sourceExhausted, 0x0041),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\x41\xc2"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xc2", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xdf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xe0\xa0", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xe0\xbf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xe1\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xec\xbf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xed\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xed\x9f", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xee\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xef\xbf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf0\x90\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf0\xbf\xbf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf1\x80\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf3\xbf\xbf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf4\x80\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf4\x8f\xbf", true));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
+      "\x41\xc2", true));
 }
 
-#undef R0
-#undef R
-

From 3a001aab4d743b3557854d205e12bd4eb02d6f2b Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Tue, 17 Jun 2014 11:26:00 +0000
Subject: [PATCH 005/157] [msan] Fix a comment.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211094 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 38a3fbb21f8c..e71e540aa82d 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2018,10 +2018,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
-  // \brief Instrument vector shift instrinsic.
+  // \brief Instrument vector pack instrinsic.
   //
   // This function instruments intrinsics like x86_mmx_packsswb, that
-  // packs elements of 2 input vectors into half as much bits with saturation.
+  // packs elements of 2 input vectors into half as many bits with saturation.
   // Shadow is propagated with the signed variant of the same intrinsic applied
   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
   // EltSizeInBits is used only for x86mmx arguments.

From c22960dba6583c3044d37c3d8b411f393cba46e0 Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Tue, 17 Jun 2014 11:31:42 +0000
Subject: [PATCH 006/157] AArch64: estimate inline asm length during branch
 relaxation

To make sure branches are in range, we need to do a better job of estimating
the length of an inline assembly block than "it's probably 1 instruction, who'd
write asm with more than that?".

Fortunately there's already a (highly suspect, see how many ways you can think
of to break it!) callback for this purpose, which is used by the other targets.

rdar://problem/17277590

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211095 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64InstrInfo.cpp  |  8 +++++-
 test/CodeGen/AArch64/branch-relax-asm.ll | 35 ++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/AArch64/branch-relax-asm.ll

diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8cec4a148070..cabf9ea87265 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -35,8 +35,14 @@ AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
 /// GetInstSize - Return the number of bytes of code the specified
 /// instruction may be.  This returns the maximum number of bytes.
 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
-  const MCInstrDesc &Desc = MI->getDesc();
+  const MachineBasicBlock &MBB = *MI->getParent();
+  const MachineFunction *MF = MBB.getParent();
+  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+  if (MI->getOpcode() == AArch64::INLINEASM)
+    return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
 
+  const MCInstrDesc &Desc = MI->getDesc();
   switch (Desc.getOpcode()) {
   default:
     // Anything not explicitly designated otherwise is a nomal 4-byte insn.
diff --git a/test/CodeGen/AArch64/branch-relax-asm.ll b/test/CodeGen/AArch64/branch-relax-asm.ll
new file mode 100644
index 000000000000..7409c84e6180
--- /dev/null
+++ b/test/CodeGen/AArch64/branch-relax-asm.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -disable-block-placement -aarch64-tbz-offset-bits=4 -o - %s | FileCheck %s
+define i32 @test_asm_length(i32 %in) {
+; CHECK-LABEL: test_asm_length:
+
+  ; It would be more natural to use just one "tbnz %false" here, but if the
+  ; number of instructions in the asm is counted reasonably, that block is out
+  ; of the limited range we gave tbz. So branch relaxation has to invert the
+  ; condition.
+; CHECK:     tbz w0, #0, [[TRUE:LBB[0-9]+_[0-9]+]]
+; CHECK:     b [[FALSE:LBB[0-9]+_[0-9]+]]
+
+; CHECK: [[TRUE]]:
+; CHECK:     orr w0, wzr, #0x4
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     ret
+
+; CHECK: [[FALSE]]:
+; CHECK:     ret
+
+  %val = and i32 %in, 1
+  %tst = icmp eq i32 %val, 0
+  br i1 %tst, label %true, label %false
+
+true:
+  call void asm sideeffect "nop\0A\09nop\0A\09nop\0A\09nop\0A\09nop\0A\09nop", ""()
+  ret i32 4
+
+false:
+  ret i32 0
+}

From fc6e0300471314735331e8c9afcecc04f7d72e96 Mon Sep 17 00:00:00 2001
From: James Molloy <james.molloy@arm.com>
Date: Tue, 17 Jun 2014 12:31:41 +0000
Subject: [PATCH 007/157] Fix memory leak of RegScavenger accidentally added in
 r211037.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211097 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a91bb972fb12..a03bcdbddd79 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1735,8 +1735,10 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   isThumb1 = AFI->isThumbFunction() && !isThumb2;
 
   // FIXME: Temporarily disabling for Thumb-1 due to miscompiles
-  if (isThumb1)
+  if (isThumb1) {
+    delete RS;
     return false;
+  }
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;

From 3b2a2563322161a609fd6672af2d879c6bd65e8d Mon Sep 17 00:00:00 2001
From: James Molloy <james.molloy@arm.com>
Date: Tue, 17 Jun 2014 13:10:38 +0000
Subject: [PATCH 008/157] Move SetTheory from utils/TableGen into lib/TableGen
 so Clang can use it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211100 91177308-0d34-0410-b5e6-96231b3b80d8
---
 {utils => include/llvm}/TableGen/SetTheory.h | 0
 lib/TableGen/CMakeLists.txt                  | 1 +
 {utils => lib}/TableGen/SetTheory.cpp        | 2 +-
 utils/TableGen/CMakeLists.txt                | 1 -
 utils/TableGen/CodeGenRegisters.h            | 2 +-
 utils/TableGen/CodeGenSchedule.h             | 2 +-
 utils/TableGen/TableGen.cpp                  | 2 +-
 7 files changed, 5 insertions(+), 5 deletions(-)
 rename {utils => include/llvm}/TableGen/SetTheory.h (100%)
 rename {utils => lib}/TableGen/SetTheory.cpp (99%)

diff --git a/utils/TableGen/SetTheory.h b/include/llvm/TableGen/SetTheory.h
similarity index 100%
rename from utils/TableGen/SetTheory.h
rename to include/llvm/TableGen/SetTheory.h
diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt
index 935d674a3603..fb702187d13a 100644
--- a/lib/TableGen/CMakeLists.txt
+++ b/lib/TableGen/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMTableGen
   Error.cpp
   Main.cpp
   Record.cpp
+  SetTheory.cpp
   StringMatcher.cpp
   TableGenBackend.cpp
   TGLexer.cpp
diff --git a/utils/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp
similarity index 99%
rename from utils/TableGen/SetTheory.cpp
rename to lib/TableGen/SetTheory.cpp
index 5ead7ed16fda..c99c2bab45ab 100644
--- a/utils/TableGen/SetTheory.cpp
+++ b/lib/TableGen/SetTheory.cpp
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SetTheory.h"
 #include "llvm/Support/Format.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
 
 using namespace llvm;
 
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index f277608d2c80..feaa7c757962 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -26,7 +26,6 @@ add_tablegen(llvm-tblgen LLVM
   OptParserEmitter.cpp
   PseudoLoweringEmitter.cpp
   RegisterInfoEmitter.cpp
-  SetTheory.cpp
   SubtargetEmitter.cpp
   TableGen.cpp
   X86DisassemblerTables.cpp
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 30732c8e04d8..278315ba47b3 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -15,7 +15,6 @@
 #ifndef CODEGEN_REGISTERS_H
 #define CODEGEN_REGISTERS_H
 
-#include "SetTheory.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
@@ -23,6 +22,7 @@
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
 #include <cstdlib>
 #include <map>
 #include <set>
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index 65ac60207472..3fef8adf91e5 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -15,11 +15,11 @@
 #ifndef CODEGEN_SCHEDULE_H
 #define CODEGEN_SCHEDULE_H
 
-#include "SetTheory.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
 
 namespace llvm {
 
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 00c3a6fd91f6..bbd61f5fb112 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -12,13 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "TableGenBackends.h" // Declares all backends.
-#include "SetTheory.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Main.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
 
 using namespace llvm;
 

From 199da600f3afd6804af2077203a2d4086d7242a9 Mon Sep 17 00:00:00 2001
From: Dinesh Dwivedi <dinesh.d@samsung.com>
Date: Tue, 17 Jun 2014 14:34:19 +0000
Subject: [PATCH 009/157] Fixed jump threading going to infinite loop.

This patch add code to remove unreachable blocks from function
as they may cause jump threading to stuck in infinite loop.

Differential Revision: http://reviews.llvm.org/D3991


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211103 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/JumpThreading.cpp       |  3 +++
 test/Transforms/JumpThreading/pr15851_hang.ll | 22 +++++++++++++++++++
 test/Transforms/JumpThreading/select.ll       |  2 +-
 3 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 test/Transforms/JumpThreading/pr15851_hang.ll

diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 230a381593e0..e501ff29d038 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -158,6 +158,9 @@ bool JumpThreading::runOnFunction(Function &F) {
   TLI = &getAnalysis<TargetLibraryInfo>();
   LVI = &getAnalysis<LazyValueInfo>();
 
+  // Remove unreachable blocks from function as they may result in infinite loop.
+  removeUnreachableBlocks(F);
+
   FindLoopHeaders(F);
 
   bool Changed, EverChanged = false;
diff --git a/test/Transforms/JumpThreading/pr15851_hang.ll b/test/Transforms/JumpThreading/pr15851_hang.ll
new file mode 100644
index 000000000000..0484bc9f9dc9
--- /dev/null
+++ b/test/Transforms/JumpThreading/pr15851_hang.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+; CHECK-LABEL: @f(
+; CHECK-LABEL: entry
+; CHECK: ret void
+; CHECK-NOT: for.cond1
+; CHECK-NOT: for.body
+
+define void @f() {
+entry:
+  ret void
+
+for.cond1:
+  %i.025 = phi i32 [ %inc, %for.body ], [ %inc, %for.body ], [ 1, %for.cond1 ]
+  %cmp = icmp slt i32 %i.025, 2
+  br i1 %cmp, label %for.body, label %for.cond1
+
+for.body:
+  %inc = add nsw i32 %i.025, 0
+  %a = icmp ugt i32 %inc, 2
+  br i1 %a, label %for.cond1, label %for.cond1
+}
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index 201e604e0c5e..545e86c082f4 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -127,7 +127,7 @@ L4:
 ; CHECK: test_switch_default
 ; CHECK: entry:
 ; CHECK: load
-; CHECK: switch
+; CHECK: icmp
 ; CHECK: [[THREADED:[A-Za-z.0-9]+]]:
 ; CHECK: store
 ; CHECK: br

From 1d5ff6bb7a4e4ed38351188ba4b2b7d6e0ae3c71 Mon Sep 17 00:00:00 2001
From: Juergen Ributzka <juergen@apple.com>
Date: Tue, 17 Jun 2014 14:47:45 +0000
Subject: [PATCH 010/157] [FastISel][X86] Fix previous refactoring commit
 (r211077)

Overlooked that fcmp_une uses an "or" instead of an "and" for combining the
flags.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211104 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86FastISel.cpp    | 8 ++++----
 test/CodeGen/X86/fast-isel-cmp.ll | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 80cc99bb80cc..6a4a467f6a9a 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1049,9 +1049,9 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
     return false;
 
   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
-  static unsigned SETFOpcTable[2][2] = {
-    { X86::SETEr,  X86::SETNPr },
-    { X86::SETNEr, X86::SETPr  }
+  static unsigned SETFOpcTable[2][3] = {
+    { X86::SETEr,  X86::SETNPr, X86::AND8rr },
+    { X86::SETNEr, X86::SETPr,  X86::OR8rr  }
   };
   unsigned *SETFOpc = nullptr;
   switch (CI->getPredicate()) {
@@ -1071,7 +1071,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
             FlagReg1);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
             FlagReg2);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::AND8rr),
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
             ResultReg).addReg(FlagReg1).addReg(FlagReg2);
     UpdateValueMap(I, ResultReg);
     return true;
diff --git a/test/CodeGen/X86/fast-isel-cmp.ll b/test/CodeGen/X86/fast-isel-cmp.ll
index edf1263a3eac..61cc67a244e4 100644
--- a/test/CodeGen/X86/fast-isel-cmp.ll
+++ b/test/CodeGen/X86/fast-isel-cmp.ll
@@ -155,7 +155,7 @@ define zeroext i1 @fcmp_une(float %x, float %y) {
 ; FAST:       ucomiss  %xmm1, %xmm0
 ; FAST-NEXT:  setne    %al
 ; FAST-NEXT:  setp     %cl
-; FAST-NEXT:  andb     %al, %cl
+; FAST-NEXT:  orb      %al, %cl
   %1 = fcmp une float %x, %y
   ret i1 %1
 }

From 41bfd7846bbcb84b184da0fc3d21a7103df4e2af Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Tue, 17 Jun 2014 16:53:04 +0000
Subject: [PATCH 011/157] R600/SI: Re-initialize the m0 register after using it
 for indirect addressing

We need to store a value greater than or equal to the number of LDS
bytes allocated by the shader in the m0 register in order for LDS
instructions to work correctly.

We always initialize m0 at the beginning of a shader, but this register
is also used for indirect addressing offsets, so we need to
re-initialize it any time we use indirect addressing.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211107 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/SILowerControlFlow.cpp | 87 +++++++++++++++-----------
 1 file changed, 50 insertions(+), 37 deletions(-)

diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 6601f2a98065..9f5ff29ad93a 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -86,6 +86,7 @@ class SILowerControlFlowPass : public MachineFunctionPass {
   void Kill(MachineInstr &MI);
   void Branch(MachineInstr &MI);
 
+  void InitM0ForLDS(MachineBasicBlock::iterator MI);
   void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
   void IndirectSrc(MachineInstr &MI);
   void IndirectDst(MachineInstr &MI);
@@ -320,6 +321,14 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
+/// The m0 register stores the maximum allowable address for LDS reads and
+/// writes.  Its value must be at least the size in bytes of LDS allocated by
+/// the shader.  For simplicity, we set it to the maximum possible value.
+void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) {
+    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),  TII->get(AMDGPU::S_MOV_B32),
+            AMDGPU::M0).addImm(0xffffffff);
+}
+
 void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
 
   MachineBasicBlock &MBB = *MI.getParent();
@@ -333,52 +342,57 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
     BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
             .addReg(Idx);
     MBB.insert(I, MovRel);
-    MI.eraseFromParent();
-    return;
-  }
+  } else {
 
-  assert(AMDGPU::SReg_64RegClass.contains(Save));
-  assert(AMDGPU::VReg_32RegClass.contains(Idx));
+    assert(AMDGPU::SReg_64RegClass.contains(Save));
+    assert(AMDGPU::VReg_32RegClass.contains(Idx));
 
-  // Save the EXEC mask
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
-          .addReg(AMDGPU::EXEC);
+    // Save the EXEC mask
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+            .addReg(AMDGPU::EXEC);
 
-  // Read the next variant into VCC (lower 32 bits) <- also loop target
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
-          AMDGPU::VCC_LO)
-          .addReg(Idx);
+    // Read the next variant into VCC (lower 32 bits) <- also loop target
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+            AMDGPU::VCC_LO)
+            .addReg(Idx);
 
-  // Move index from VCC into M0
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
-          .addReg(AMDGPU::VCC_LO);
+    // Move index from VCC into M0
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+            .addReg(AMDGPU::VCC_LO);
 
-  // Compare the just read M0 value to all possible Idx values
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
-          .addReg(AMDGPU::M0)
-          .addReg(Idx);
+    // Compare the just read M0 value to all possible Idx values
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+            .addReg(AMDGPU::M0)
+            .addReg(Idx);
 
-  // Update EXEC, save the original EXEC value to VCC
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
-          .addReg(AMDGPU::VCC);
+    // Update EXEC, save the original EXEC value to VCC
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+            .addReg(AMDGPU::VCC);
 
-  // Do the actual move
-  MBB.insert(I, MovRel);
+    // Do the actual move
+    MBB.insert(I, MovRel);
 
-  // Update EXEC, switch all done bits to 0 and all todo bits to 1
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC)
-          .addReg(AMDGPU::VCC);
+    // Update EXEC, switch all done bits to 0 and all todo bits to 1
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+            .addReg(AMDGPU::EXEC)
+            .addReg(AMDGPU::VCC);
 
-  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-          .addImm(-7)
-          .addReg(AMDGPU::EXEC);
+    // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+            .addImm(-7)
+            .addReg(AMDGPU::EXEC);
 
-  // Restore EXEC
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
-          .addReg(Save);
+    // Restore EXEC
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+            .addReg(Save);
 
+  }
+  // FIXME: Are there any values other than the LDS address clamp that need to
+  // be stored in the m0 register and may be live for more than a few
+  // instructions?  If so, we should save the m0 register at the beginning
+  // of this function and restore it here.
+  // FIXME: Add support for LDS direct loads.
+  InitM0ForLDS(&MI);
   MI.eraseFromParent();
 }
 
@@ -523,8 +537,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
     MachineBasicBlock &MBB = MF.front();
     // Initialize M0 to a value that won't cause LDS access to be discarded
     // due to offset clamping
-    BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32),
-            AMDGPU::M0).addImm(0xffffffff);
+    InitM0ForLDS(MBB.getFirstNonPHI());
   }
 
   if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {

From bae98b1b454a86000d17d4df1abf072a8520b98f Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Tue, 17 Jun 2014 16:53:07 +0000
Subject: [PATCH 012/157] SelectionDAG: Expand i64 = FP_TO_SINT i32

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211108 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 59 ++++++++++++++++++++++++
 lib/Target/R600/AMDGPUISelLowering.cpp   |  1 +
 test/CodeGen/R600/fp_to_sint_i64.ll      | 12 +++++
 3 files changed, 72 insertions(+)
 create mode 100644 test/CodeGen/R600/fp_to_sint_i64.ll

diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 48eb86c49c39..a4245a6016c2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3152,6 +3152,65 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                 Node->getOperand(0), Node->getValueType(0), dl);
     Results.push_back(Tmp1);
     break;
+  case ISD::FP_TO_SINT: {
+    EVT VT = Node->getOperand(0).getValueType();
+    EVT NVT = Node->getValueType(0);
+
+    // FIXME: Only f32 to i64 conversions are supported.
+    if (VT != MVT::f32 || NVT != MVT::i64)
+      break;
+
+    // Expand f32 -> i64 conversion
+    // This algorithm comes from compiler-rt's implementation of fixsfdi:
+    // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  VT.getSizeInBits());
+    SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
+    SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
+    SDValue Bias = DAG.getConstant(127, IntVT);
+    SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
+                                       IntVT);
+    SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
+    SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
+
+    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+
+    SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
+        DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+        DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT)));
+    SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
+
+    SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+        DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+        DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT)));
+    Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+
+    SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
+        DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+        DAG.getConstant(0x00800000, IntVT));
+
+    R = DAG.getZExtOrTrunc(R, dl, NVT);
+
+
+    R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
+       DAG.getNode(ISD::SHL, dl, NVT, R,
+                   DAG.getZExtOrTrunc(
+                      DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+                      dl, TLI.getShiftAmountTy(IntVT))),
+       DAG.getNode(ISD::SRL, dl, NVT, R,
+                   DAG.getZExtOrTrunc(
+                      DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+                      dl, TLI.getShiftAmountTy(IntVT))),
+       ISD::SETGT);
+
+    SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
+        DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
+        Sign);
+
+    Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
+        DAG.getConstant(0, NVT), Ret, ISD::SETLT));
+    break;
+  }
   case ISD::FP_TO_UINT: {
     SDValue True, False;
     EVT VT =  Node->getOperand(0).getValueType();
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 15ac2a2379d3..1627ec087728 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -253,6 +253,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::ROTL, MVT::i64, Expand);
   setOperationAction(ISD::ROTR, MVT::i64, Expand);
 
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
   setOperationAction(ISD::MUL, MVT::i64, Expand);
   setOperationAction(ISD::MULHU, MVT::i64, Expand);
   setOperationAction(ISD::MULHS, MVT::i64, Expand);
diff --git a/test/CodeGen/R600/fp_to_sint_i64.ll b/test/CodeGen/R600/fp_to_sint_i64.ll
new file mode 100644
index 000000000000..ec3e19804c57
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_sint_i64.ll
@@ -0,0 +1,12 @@
+; FIXME: Merge into fp_to_sint.ll when EG/NI supports 64-bit types
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
+
+; SI-LABEL: @fp_to_sint_i64
+; Check that the compiler doesn't crash with a "cannot select" error
+; SI: S_ENDPGM
+define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fptosi float %in to i64
+  store i64 %0, i64 addrspace(1)* %out
+  ret void
+}

From ff8dc48da387719d4b4c4712715be0e2d2672d87 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Tue, 17 Jun 2014 16:53:09 +0000
Subject: [PATCH 013/157] R600/SI: Add a pattern for llvm.AMDGPU.barrier.global

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211109 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUIntrinsics.td      | 1 +
 lib/Target/R600/EvergreenInstructions.td | 7 ++++++-
 lib/Target/R600/SIInstructions.td        | 9 +++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index 95fab11a260f..4ef23abfe3ed 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -65,6 +65,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_barrier_local  : Intrinsic<[], [], []>;
+  def int_AMDGPU_barrier_global  : Intrinsic<[], [], []>;
 }
 
 // Legacy names for compatability.
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index 28725ff280da..dcb7e982c7fc 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -348,7 +348,7 @@ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
 def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
 
 def GROUP_BARRIER : InstR600 <
-    (outs), (ins), "  GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>,
+    (outs), (ins), "  GROUP_BARRIER", [(int_AMDGPU_barrier_local), (int_AMDGPU_barrier_global)], AnyALU>,
     R600ALU_Word0,
     R600ALU_Word1_OP2 <0x54> {
 
@@ -377,6 +377,11 @@ def GROUP_BARRIER : InstR600 <
   let ALUInst = 1;
 }
 
+def : Pat <
+	(int_AMDGPU_barrier_global),
+	(GROUP_BARRIER)
+>;
+
 //===----------------------------------------------------------------------===//
 // LDS Instructions
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index be7d8a6d4ab9..2f04bea269ac 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1731,6 +1731,15 @@ def : Pat <
   (S_XOR_B64 $src0, $src1)
 >;
 
+//===----------------------------------------------------------------------===//
+// SOPP Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+  (int_AMDGPU_barrier_global),
+  (S_BARRIER)
+>;
+
 //===----------------------------------------------------------------------===//
 // VOP2 Patterns
 //===----------------------------------------------------------------------===//

From f56e7678d1ced97d5513e0a75658dc48396e4a58 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Tue, 17 Jun 2014 16:53:14 +0000
Subject: [PATCH 014/157] R600: Use LDS and vectors for private memory

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211110 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPU.h                      |   2 +
 lib/Target/R600/AMDGPU.td                     |  24 +-
 lib/Target/R600/AMDGPUISelDAGToDAG.cpp        |   8 +-
 lib/Target/R600/AMDGPUISelLowering.cpp        |   1 +
 lib/Target/R600/AMDGPUISelLowering.h          |   9 +
 lib/Target/R600/AMDGPUPromoteAlloca.cpp       | 365 ++++++++++++++++++
 lib/Target/R600/AMDGPUSubtarget.cpp           |   5 +
 lib/Target/R600/AMDGPUSubtarget.h             |   2 +
 lib/Target/R600/AMDGPUTargetMachine.cpp       |   8 +
 lib/Target/R600/CMakeLists.txt                |   1 +
 lib/Target/R600/R600ISelLowering.cpp          |  62 +++
 lib/Target/R600/R600ISelLowering.h            |   3 +
 lib/Target/R600/R600InstrInfo.cpp             |  69 +++-
 lib/Target/R600/R600InstrInfo.h               |  14 +
 lib/Target/R600/R600Instructions.td           |  54 +++
 lib/Target/R600/R600RegisterInfo.td           |  48 ++-
 lib/Target/R600/SIInstructions.td             |   4 +-
 test/CodeGen/R600/array-ptr-calc-i32.ll       |   7 +-
 test/CodeGen/R600/indirect-private-64.ll      |  36 +-
 test/CodeGen/R600/large-alloca.ll             |   9 +-
 test/CodeGen/R600/parallelandifcollapse.ll    |   6 +
 test/CodeGen/R600/parallelorifcollapse.ll     |   5 +
 test/CodeGen/R600/private-memory.ll           |  38 +-
 .../R600/simplify-demanded-bits-build-pair.ll |   7 +-
 test/CodeGen/R600/vector-alloca.ll            |  74 ++++
 25 files changed, 788 insertions(+), 73 deletions(-)
 create mode 100644 lib/Target/R600/AMDGPUPromoteAlloca.cpp
 create mode 100644 test/CodeGen/R600/vector-alloca.ll

diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 3fd8e2f9248d..f92bde853770 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -17,6 +17,7 @@
 namespace llvm {
 
 class AMDGPUInstrPrinter;
+class AMDGPUSubtarget;
 class AMDGPUTargetMachine;
 class FunctionPass;
 class MCAsmInfo;
@@ -47,6 +48,7 @@ void initializeSILowerI1CopiesPass(PassRegistry &);
 extern char &SILowerI1CopiesID;
 
 // Passes common to R600 and SI
+FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
 Pass *createAMDGPUStructurizeCFGPass();
 FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
 
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 2cfc46393795..d3dff531a7f6 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -86,28 +86,40 @@ def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
 
+class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
+        "localmemorysize"#Value,
+        "LocalMemorySize",
+        !cast<string>(Value),
+        "The size of local memory in bytes">;
+
 class SubtargetFeatureGeneration <string Value,
                                   list<SubtargetFeature> Implies> :
         SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
                           Value#" GPU generation", Implies>;
 
+def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
+def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
+def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
+
 def FeatureR600 : SubtargetFeatureGeneration<"R600",
-        [FeatureR600ALUInst, FeatureFetchLimit8]>;
+        [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
 
 def FeatureR700 : SubtargetFeatureGeneration<"R700",
-        [FeatureFetchLimit16]>;
+        [FeatureFetchLimit16, FeatureLocalMemorySize0]>;
 
 def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
-        [FeatureFetchLimit16]>;
+        [FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
 
 def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
-        [FeatureFetchLimit16, FeatureWavefrontSize64]>;
+        [FeatureFetchLimit16, FeatureWavefrontSize64,
+         FeatureLocalMemorySize32768]
+>;
 
 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
-        [Feature64BitPtr, FeatureFP64]>;
+        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768]>;
 
 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
-        [Feature64BitPtr, FeatureFP64]>;
+        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536]>;
 //===----------------------------------------------------------------------===//
 
 def AMDGPUInstrInfo : InstrInfo {
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 8385baa1011b..b4e86ce3a1a3 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -258,6 +258,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
   }
   case ISD::SCALAR_TO_VECTOR:
+  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
   case ISD::BUILD_VECTOR: {
     unsigned RegClassID;
     const AMDGPURegisterInfo *TRI =
@@ -308,7 +309,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
       // can't be bundled by our scheduler.
       switch(NumVectorElts) {
       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
-      case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
+      case 4:
+        if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+          RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
+        else
+          RegClassID = AMDGPU::R600_Reg128RegClassID;
+        break;
       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
       }
     }
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 1627ec087728..58959b8d4bda 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -1911,6 +1911,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(CVT_F32_UBYTE1)
   NODE_NAME_CASE(CVT_F32_UBYTE2)
   NODE_NAME_CASE(CVT_F32_UBYTE3)
+  NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
   NODE_NAME_CASE(STORE_MSKOR)
   NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
   }
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 57f09bebd11d..a4baaf12588f 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -203,6 +203,15 @@ enum {
   CVT_F32_UBYTE1,
   CVT_F32_UBYTE2,
   CVT_F32_UBYTE3,
+  /// This node is for VLIW targets and it is used to represent a vector
+  /// that is stored in consecutive registers with the same channel.
+  /// For example:
+  ///   |X  |Y|Z|W|
+  /// T0|v.x| | | |
+  /// T1|v.y| | | |
+  /// T2|v.z| | | |
+  /// T3|v.w| | | |
+  BUILD_VERTICAL_VECTOR,
   FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
   STORE_MSKOR,
   LOAD_CONSTANT,
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
new file mode 100644
index 000000000000..2d3a7fdd4feb
--- /dev/null
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -0,0 +1,365 @@
+//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates allocas by either converting them into vectors or
+// by migrating them to local address space.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "amdgpu-promote-alloca"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUPromoteAlloca : public FunctionPass,
+                       public InstVisitor<AMDGPUPromoteAlloca> {
+
+  static char ID;
+  Module *Mod;
+  const AMDGPUSubtarget &ST;
+  int LocalMemAvailable;
+
+public:
+  AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
+                                                   LocalMemAvailable(0) { }
+  virtual bool doInitialization(Module &M);
+  virtual bool runOnFunction(Function &F);
+  virtual const char *getPassName() const {
+    return "AMDGPU Promote Alloca";
+  }
+  void visitAlloca(AllocaInst &I);
+};
+
+} // End anonymous namespace
+
+char AMDGPUPromoteAlloca::ID = 0;
+
+bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
+  Mod = &M;
+  return false;
+}
+
+bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
+
+  const FunctionType *FTy = F.getFunctionType();
+
+  LocalMemAvailable = ST.getLocalMemorySize();
+
+
+  // If the function has any arguments in the local address space, then it's
+  // possible these arguments require the entire local memory space, so
+  // we cannot use local memory in the pass.
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+    const Type *ParamTy = FTy->getParamType(i);
+    if (ParamTy->isPointerTy() &&
+        ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+      LocalMemAvailable = 0;
+      DEBUG(dbgs() << "Function has local memory argument.  Promoting to "
+                      "local memory disabled.\n");
+      break;
+    }
+  }
+
+  if (LocalMemAvailable > 0) {
+    // Check how much local memory is being used by global objects
+    for (Module::global_iterator I = Mod->global_begin(),
+                                 E = Mod->global_end(); I != E; ++I) {
+      GlobalVariable *GV = I;
+      PointerType *GVTy = GV->getType();
+      if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+        continue;
+      for (Value::use_iterator U = GV->use_begin(),
+                               UE = GV->use_end(); U != UE; ++U) {
+        Instruction *Use = dyn_cast<Instruction>(*U);
+        if (!Use)
+          continue;
+        if (Use->getParent()->getParent() == &F)
+          LocalMemAvailable -=
+              Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType());
+      }
+    }
+  }
+
+  LocalMemAvailable = std::max(0, LocalMemAvailable);
+  DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n");
+
+  visit(F);
+
+  return false;
+}
+
+static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
+  return VectorType::get(ArrayTy->getArrayElementType(),
+                         ArrayTy->getArrayNumElements());
+}
+
+static Value* calculateVectorIndex(Value *Ptr,
+                                  std::map<GetElementPtrInst*, Value*> GEPIdx) {
+  if (isa<AllocaInst>(Ptr))
+    return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
+
+  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
+
+  return GEPIdx[GEP];
+}
+
+static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
+  // FIXME we only support simple cases
+  if (GEP->getNumOperands() != 3)
+    return NULL;
+
+  ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
+  if (!I0 || !I0->isZero())
+    return NULL;
+
+  return GEP->getOperand(2);
+}
+
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
+  Type *AllocaTy = Alloca->getAllocatedType();
+
+  DEBUG(dbgs() << "Alloca Candidate for vectorization \n");
+
+  // FIXME: There is no reason why we can't support larger arrays, we
+  // are just being conservative for now.
+  if (!AllocaTy->isArrayTy() ||
+      AllocaTy->getArrayElementType()->isVectorTy() ||
+      AllocaTy->getArrayNumElements() > 4) {
+
+    DEBUG(dbgs() << "  Cannot convert type to vector");
+    return false;
+  }
+
+  std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
+  std::vector<Value*> WorkList;
+  for (User *AllocaUser : Alloca->users()) {
+    GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
+    if (!GEP) {
+      WorkList.push_back(AllocaUser);
+      continue;
+    }
+
+    Value *Index = GEPToVectorIndex(GEP);
+
+    // If we can't compute a vector index from this GEP, then we can't
+    // promote this alloca to vector.
+    if (!Index) {
+      DEBUG(dbgs() << "  Cannot compute vector index for GEP " << *GEP << "\n");
+      return false;
+    }
+
+    GEPVectorIdx[GEP] = Index;
+    for (User *GEPUser : AllocaUser->users()) {
+      WorkList.push_back(GEPUser);
+    }
+  }
+
+  VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
+
+  DEBUG(dbgs() << "  Converting alloca to vector "; AllocaTy->dump();
+        dbgs() << " -> "; VectorTy->dump(); dbgs() << "\n");
+
+  for (std::vector<Value*>::iterator I = WorkList.begin(),
+                                     E = WorkList.end(); I != E; ++I) {
+    Instruction *Inst = cast<Instruction>(*I);
+    IRBuilder<> Builder(Inst);
+    switch (Inst->getOpcode()) {
+    case Instruction::Load: {
+      Value *Ptr = Inst->getOperand(0);
+      Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+      Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
+      Value *VecValue = Builder.CreateLoad(BitCast);
+      Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
+      Inst->replaceAllUsesWith(ExtractElement);
+      Inst->eraseFromParent();
+      break;
+    }
+    case Instruction::Store: {
+      Value *Ptr = Inst->getOperand(1);
+      Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+      Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
+      Value *VecValue = Builder.CreateLoad(BitCast);
+      Value *NewVecValue = Builder.CreateInsertElement(VecValue,
+                                                       Inst->getOperand(0),
+                                                       Index);
+      Builder.CreateStore(NewVecValue, BitCast);
+      Inst->eraseFromParent();
+      break;
+    }
+    case Instruction::BitCast:
+      break;
+
+    default:
+      Inst->dump();
+      llvm_unreachable("Do not know how to replace this instruction "
+                              "with vector op");
+    }
+  }
+  return true;
+}
+
+static void collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
+  for (User *User : Val->users()) {
+    if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
+      continue;
+    if (isa<CallInst>(User)) {
+      WorkList.push_back(User);
+      continue;
+    }
+    if (!User->getType()->isPointerTy())
+      continue;
+    WorkList.push_back(User);
+    collectUsesWithPtrTypes(User, WorkList);
+  }
+}
+
+void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
+  IRBuilder<> Builder(&I);
+
+  // First try to replace the alloca with a vector
+  Type *AllocaTy = I.getAllocatedType();
+
+  DEBUG(dbgs() << "Trying to promote " << I);
+
+  if (tryPromoteAllocaToVector(&I))
+    return;
+
+  DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
+
+  // FIXME: This is the maximum work group size.  We should try to get
+  // value from the reqd_work_group_size function attribute if it is
+  // available.
+  unsigned WorkGroupSize = 256;
+  int AllocaSize = WorkGroupSize *
+      Mod->getDataLayout()->getTypeAllocSize(AllocaTy);
+
+  if (AllocaSize > LocalMemAvailable) {
+    DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
+    return;
+  }
+
+  DEBUG(dbgs() << "Promoting alloca to local memory\n");
+  LocalMemAvailable -= AllocaSize;
+
+  GlobalVariable *GV = new GlobalVariable(
+      *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
+      GlobalValue::ExternalLinkage, 0, I.getName(), 0,
+      GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
+
+  FunctionType *FTy = FunctionType::get(
+      Type::getInt32Ty(Mod->getContext()), false);
+  AttributeSet AttrSet;
+  AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
+
+  Value *ReadLocalSizeY = Mod->getOrInsertFunction(
+      "llvm.r600.read.local.size.y", FTy, AttrSet);
+  Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
+      "llvm.r600.read.local.size.z", FTy, AttrSet);
+  Value *ReadTIDIGX = Mod->getOrInsertFunction(
+      "llvm.r600.read.tidig.x", FTy, AttrSet);
+  Value *ReadTIDIGY = Mod->getOrInsertFunction(
+      "llvm.r600.read.tidig.y", FTy, AttrSet);
+  Value *ReadTIDIGZ = Mod->getOrInsertFunction(
+      "llvm.r600.read.tidig.z", FTy, AttrSet);
+
+
+  Value *TCntY = Builder.CreateCall(ReadLocalSizeY);
+  Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ);
+  Value *TIdX  = Builder.CreateCall(ReadTIDIGX);
+  Value *TIdY  = Builder.CreateCall(ReadTIDIGY);
+  Value *TIdZ  = Builder.CreateCall(ReadTIDIGZ);
+
+  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
+  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
+  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
+  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
+  TID = Builder.CreateAdd(TID, TIdZ);
+
+  std::vector<Value*> Indices;
+  Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
+  Indices.push_back(TID);
+
+  Value *Offset = Builder.CreateGEP(GV, Indices);
+  I.mutateType(Offset->getType());
+  I.replaceAllUsesWith(Offset);
+  I.eraseFromParent();
+
+  std::vector<Value*> WorkList;
+
+  collectUsesWithPtrTypes(Offset, WorkList);
+
+  for (std::vector<Value*>::iterator i = WorkList.begin(),
+                                     e = WorkList.end(); i != e; ++i) {
+    Value *V = *i;
+    CallInst *Call = dyn_cast<CallInst>(V);
+    if (!Call) {
+      Type *EltTy = V->getType()->getPointerElementType();
+      PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+      V->mutateType(NewTy);
+      continue;
+    }
+
+    IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
+    if (!Intr) {
+      std::vector<Type*> ArgTypes;
+      for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
+                                ArgIdx != ArgEnd; ++ArgIdx) {
+        ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
+      }
+      Function *F = Call->getCalledFunction();
+      FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
+                                                F->isVarArg());
+      Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
+                                             F->getAttributes());
+      Function *NewF = cast<Function>(C);
+      Call->setCalledFunction(NewF);
+      continue;
+    }
+
+    Builder.SetInsertPoint(Intr);
+    switch (Intr->getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+      // These intrinsics are for address space 0 only
+      Intr->eraseFromParent();
+      continue;
+    case Intrinsic::memcpy: {
+      MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
+      Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
+                           MemCpy->getLength(), MemCpy->getAlignment(),
+                           MemCpy->isVolatile());
+      Intr->eraseFromParent();
+      continue;
+    }
+    case Intrinsic::memset: {
+      MemSetInst *MemSet = cast<MemSetInst>(Intr);
+      Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
+                           MemSet->getLength(), MemSet->getAlignment(),
+                           MemSet->isVolatile());
+      Intr->eraseFromParent();
+      continue;
+    }
+    default:
+      Intr->dump();
+      llvm_unreachable("Don't know how to promote alloca intrinsic use.");
+    }
+  }
+}
+
+FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) {
+  return new AMDGPUPromoteAlloca(ST);
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 348d50f93f42..4fd43905b210 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -41,6 +41,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
   EnableIfCvt = true;
   WavefrontSize = 0;
   CFALUBug = false;
+  LocalMemorySize = 0;
   ParseSubtargetFeatures(GPU, FS);
   DevName = GPU;
 
@@ -109,6 +110,10 @@ AMDGPUSubtarget::hasCFAluBug() const {
   assert(getGeneration() <= NORTHERN_ISLANDS);
   return CFALUBug;
 }
+int
+AMDGPUSubtarget::getLocalMemorySize() const {
+  return LocalMemorySize;
+}
 bool
 AMDGPUSubtarget::isTargetELF() const {
   return false;
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 411aaf9711e7..9c78f35df3e2 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -56,6 +56,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
   bool EnableIfCvt;
   unsigned WavefrontSize;
   bool CFALUBug;
+  int LocalMemorySize;
 
   InstrItineraryData InstrItins;
 
@@ -109,6 +110,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
   unsigned getWavefrontSize() const;
   unsigned getStackEntrySize() const;
   bool hasCFAluBug() const;
+  int getLocalMemorySize() const;
 
   bool enableMachineScheduler() const override {
     return getGeneration() <= NORTHERN_ISLANDS;
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 3c896af46a6c..be1eceaaa00d 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -109,6 +109,7 @@ class AMDGPUPassConfig : public TargetPassConfig {
     return nullptr;
   }
 
+  virtual void addCodeGenPrepare();
   bool addPreISel() override;
   bool addInstSelector() override;
   bool addPreRegAlloc() override;
@@ -134,6 +135,13 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
   PM.add(createAMDGPUTargetTransformInfoPass(this));
 }
 
+void AMDGPUPassConfig::addCodeGenPrepare() {
+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+  addPass(createAMDGPUPromoteAlloca(ST));
+  addPass(createSROAPass());
+  TargetPassConfig::addCodeGenPrepare();
+}
+
 bool
 AMDGPUPassConfig::addPreISel() {
   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 128dd683fb25..be2ca06ef34d 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_target(R600CodeGen
   AMDGPUTargetTransformInfo.cpp
   AMDGPUISelLowering.cpp
   AMDGPUInstrInfo.cpp
+  AMDGPUPromoteAlloca.cpp
   AMDGPURegisterInfo.cpp
   R600ClauseMergePass.cpp
   R600ControlFlowFinalizer.cpp
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 87238d6156b9..beea54e14e9a 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -136,6 +136,16 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
 
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
   setTargetDAGCombine(ISD::FP_ROUND);
   setTargetDAGCombine(ISD::FP_TO_SINT);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
@@ -540,6 +550,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
   R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
   switch (Op.getOpcode()) {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::FCOS:
   case ISD::FSIN: return LowerTrig(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -812,6 +824,56 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
   }
 }
 
+SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
+                                                   SDValue Vector) const {
+
+  SDLoc DL(Vector);
+  EVT VecVT = Vector.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  SmallVector<SDValue, 8> Args;
+
+  for (unsigned i = 0, e = VecVT.getVectorNumElements();
+                                                           i != e; ++i) {
+    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+                               Vector, DAG.getConstant(i, getVectorIdxTy())));
+  }
+
+  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
+}
+
+SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+
+  SDLoc DL(Op);
+  SDValue Vector = Op.getOperand(0);
+  SDValue Index = Op.getOperand(1);
+
+  if (isa<ConstantSDNode>(Index) ||
+      Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+    return Op;
+
+  Vector = vectorToVerticalVector(DAG, Vector);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
+                     Vector, Index);
+}
+
+SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Vector = Op.getOperand(0);
+  SDValue Value = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+
+  if (isa<ConstantSDNode>(Index) ||
+      Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+    return Op;
+
+  Vector = vectorToVerticalVector(DAG, Vector);
+  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
+                               Vector, Value, Index);
+  return vectorToVerticalVector(DAG, Insert);
+}
+
 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
   // On hw >= R700, COS/SIN input must be between -1. and 1.
   // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index 301509fc8cf0..ed12dab48b4a 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -51,7 +51,10 @@ class R600TargetLowering : public AMDGPUTargetLowering {
   void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
       MachineRegisterInfo & MRI, unsigned dword_offset) const;
   SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG) const;
+  SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const;
 
+  SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 080ff6eea75d..3972e2f03730 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -51,11 +51,15 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                            unsigned DestReg, unsigned SrcReg,
                            bool KillSrc) const {
   unsigned VectorComponents = 0;
-  if (AMDGPU::R600_Reg128RegClass.contains(DestReg) &&
-      AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
+  if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
+      AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
+      (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
+       AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
     VectorComponents = 4;
-  } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) &&
-            AMDGPU::R600_Reg64RegClass.contains(SrcReg)) {
+  } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
+            AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
+            (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
+             AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
     VectorComponents = 2;
   }
 
@@ -1053,6 +1057,29 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   return 2;
 }
 
+bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+
+  switch(MI->getOpcode()) {
+  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+  case AMDGPU::R600_EXTRACT_ELT_V2:
+  case AMDGPU::R600_EXTRACT_ELT_V4:
+    buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(),
+                      RI.getHWRegIndex(MI->getOperand(1).getReg()), //  Address
+                      MI->getOperand(2).getReg(),
+                      RI.getHWRegChan(MI->getOperand(1).getReg()));
+    break;
+  case AMDGPU::R600_INSERT_ELT_V2:
+  case AMDGPU::R600_INSERT_ELT_V4:
+    buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value
+                       RI.getHWRegIndex(MI->getOperand(1).getReg()),  // Address
+                       MI->getOperand(3).getReg(),                    // Offset
+                       RI.getHWRegChan(MI->getOperand(1).getReg()));  // Channel
+    break;
+  }
+  MI->eraseFromParent();
+  return true;
+}
+
 void  R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
                                              const MachineFunction &MF) const {
   const AMDGPUFrameLowering *TFL =
@@ -1090,7 +1117,22 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
                                        MachineBasicBlock::iterator I,
                                        unsigned ValueReg, unsigned Address,
                                        unsigned OffsetReg) const {
-  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+  return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg,
+                                       unsigned AddrChan) const {
+  unsigned AddrReg;
+  switch (AddrChan) {
+    default: llvm_unreachable("Invalid Channel");
+    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
+    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
+    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
+    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+  }
   MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
                                                AMDGPU::AR_X, OffsetReg);
   setImmOperand(MOVA, AMDGPU::OpName::write, 0);
@@ -1107,7 +1149,22 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
                                        MachineBasicBlock::iterator I,
                                        unsigned ValueReg, unsigned Address,
                                        unsigned OffsetReg) const {
-  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+  return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg,
+                                       unsigned AddrChan) const {
+  unsigned AddrReg;
+  switch (AddrChan) {
+    default: llvm_unreachable("Invalid Channel");
+    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
+    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
+    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
+    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+  }
   MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
                                                        AMDGPU::AR_X,
                                                        OffsetReg);
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index 60df21571b35..45a57d367b8b 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -36,6 +36,18 @@ namespace llvm {
   std::vector<std::pair<int, unsigned> >
   ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const;
 
+
+  MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned ValueReg, unsigned Address,
+                                        unsigned OffsetReg,
+                                        unsigned AddrChan) const;
+
+  MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned ValueReg, unsigned Address,
+                                        unsigned OffsetReg,
+                                        unsigned AddrChan) const;
   public:
   enum BankSwizzle {
     ALU_VEC_012_SCL_210 = 0,
@@ -195,6 +207,8 @@ namespace llvm {
   int getInstrLatency(const InstrItineraryData *ItinData,
                       SDNode *Node) const override { return 1;}
 
+  virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
   /// \brief Reserve the registers that may be accesed using indirect addressing.
   void reserveIndirectRegisters(BitVector &Reserved,
                                 const MachineFunction &MF) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index ec039c50dad7..58c704d8ec85 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1581,6 +1581,60 @@ let isTerminator=1 in {
   defm CONTINUEC   : BranchInstr2<"CONTINUEC">;
 }
 
+//===----------------------------------------------------------------------===//
+// Indirect addressing pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let isPseudo = 1 in {
+
+class ExtractVertical <RegisterClass vec_rc> : InstR600 <
+  (outs R600_Reg32:$dst),
+  (ins vec_rc:$vec, R600_Reg32:$index), "",
+  [],
+  AnyALU
+>;
+
+let Constraints = "$dst = $vec" in {
+
+class InsertVertical <RegisterClass vec_rc> : InstR600 <
+  (outs vec_rc:$dst),
+  (ins vec_rc:$vec, R600_Reg32:$value, R600_Reg32:$index), "",
+  [],
+  AnyALU
+>;
+
+} // End Constraints = "$dst = $vec"
+
+} // End isPseudo = 1
+
+def R600_EXTRACT_ELT_V2 : ExtractVertical <R600_Reg64Vertical>;
+def R600_EXTRACT_ELT_V4 : ExtractVertical <R600_Reg128Vertical>;
+
+def R600_INSERT_ELT_V2 : InsertVertical <R600_Reg64Vertical>;
+def R600_INSERT_ELT_V4 : InsertVertical <R600_Reg128Vertical>;
+
+class ExtractVerticalPat <Instruction inst, ValueType vec_ty,
+                          ValueType scalar_ty> : Pat <
+  (scalar_ty (extractelt vec_ty:$vec, i32:$index)),
+  (inst $vec, $index)
+>;
+
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2i32, i32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2f32, f32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4i32, i32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4f32, f32>;
+
+class InsertVerticalPat <Instruction inst, ValueType vec_ty,
+                         ValueType scalar_ty> : Pat <
+  (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)),
+  (inst $vec, $value, $index)
+>;
+
+def : InsertVerticalPat <R600_INSERT_ELT_V2, v2i32, i32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V2, v2f32, f32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V4, v4i32, i32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>;
+
 //===----------------------------------------------------------------------===//
 // ISel Patterns
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index 68bcd207b42c..cc667d985a82 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -18,18 +18,28 @@ class R600RegWithChan <string name, bits<9> sel, string chan> :
 
 class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
     RegisterWithSubRegs<n, subregs> {
+  field bits<2> chan_encoding = 0;
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1, sub2, sub3];
-  let HWEncoding = encoding;
+  let HWEncoding{8-0} = encoding{8-0};
+  let HWEncoding{10-9} = chan_encoding;
 }
 
 class R600Reg_64<string n, list<Register> subregs, bits<16> encoding> :
     RegisterWithSubRegs<n, subregs> {
+  field bits<2> chan_encoding = 0;
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
   let HWEncoding = encoding;
+  let HWEncoding{8-0} = encoding{8-0};
+  let HWEncoding{10-9} = chan_encoding;
 }
 
+class R600Reg_64Vertical<int lo, int hi, string chan> : R600Reg_64 <
+  "V"#lo#hi#"_"#chan,
+  [!cast<Register>("T"#lo#"_"#chan), !cast<Register>("T"#hi#"_"#chan)],
+  lo
+>;
 
 foreach Index = 0-127 in {
   foreach Chan = [ "X", "Y", "Z", "W" ] in {
@@ -54,6 +64,24 @@ foreach Index = 0-127 in {
                                    Index>;
 }
 
+foreach Chan = [ "X", "Y", "Z", "W"] in {
+
+  let chan_encoding = !if(!eq(Chan, "X"), 0,
+                      !if(!eq(Chan, "Y"), 1,
+                      !if(!eq(Chan, "Z"), 2,
+                      !if(!eq(Chan, "W"), 3, 0)))) in {
+    def V0123_#Chan : R600Reg_128 <"V0123_"#Chan,
+                                   [!cast<Register>("T0_"#Chan),
+                                    !cast<Register>("T1_"#Chan),
+                                    !cast<Register>("T2_"#Chan),
+                                    !cast<Register>("T3_"#Chan)],
+                                    0>;
+    def V01_#Chan : R600Reg_64Vertical<0, 1, Chan>;
+    def V23_#Chan : R600Reg_64Vertical<2, 3, Chan>;
+  }
+}
+
+
 // KCACHE_BANK0
 foreach Index = 159-128 in {
   foreach Chan = [ "X", "Y", "Z", "W" ] in {
@@ -130,8 +158,14 @@ def ALU_PARAM : R600Reg<"Param", 0>;
 
 let isAllocatable = 0 in {
 
-// XXX: Only use the X channel, until we support wider stack widths
-def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 32, (add (sequence "Addr%u_X", 0, 127))>;
+
+// We only use Addr_[YZW] for vertical vectors.
+// FIXME if we add more vertical vector registers we will need to ad more
+// registers to these classes.
+def R600_Addr_Y : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Y)>;
+def R600_Addr_Z : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Z)>;
+def R600_Addr_W : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_W)>;
 
 def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
   (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
@@ -206,5 +240,13 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
   let CopyCost = -1;
 }
 
+def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+  (add V0123_W, V0123_Z, V0123_Y, V0123_X)
+>;
+
 def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
                                 (add (sequence "T%u_XY", 0, 63))>;
+
+def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
+                                      (add V01_X, V01_Y, V01_Z, V01_W,
+                                           V23_X, V23_Y, V23_Z, V23_W)>;
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 2f04bea269ac..48b6b7d7308c 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -2560,13 +2560,13 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I
   // 1. Extract with offset
   def : Pat<
     (vector_extract vt:$vec, (add i32:$idx, imm:$off)),
-    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
+    (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
   >;
 
   // 2. Extract without offset
   def : Pat<
     (vector_extract vt:$vec, i32:$idx),
-    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
+    (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
   >;
 
   // 3. Insert with offset
diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll
index c2362da15cde..3230353c36c7 100644
--- a/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -10,7 +10,12 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
 
 ; SI-LABEL: @test_private_array_ptr_calc:
 ; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]]
-; SI: V_MOVRELD_B32_e32 {{v[0-9]+}}, [[PTRREG]]
+;
+; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
+; alloca to a vector.  It currently fails because it does not know how
+; to interpret:
+; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
+; SI: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]]
 define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
   %alloca = alloca [4 x i32], i32 4, align 16
   %tid = call i32 @llvm.SI.tid() readnone
diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll
index 4d1f7347ecc9..b127b7ede2e8 100644
--- a/test/CodeGen/R600/indirect-private-64.ll
+++ b/test/CodeGen/R600/indirect-private-64.ll
@@ -3,10 +3,8 @@
 declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
 
 ; SI-LABEL: @private_access_f64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
 define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load double addrspace(1)* %in, align 8
   %array = alloca double, i32 16, align 8
@@ -19,14 +17,10 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
 }
 
 ; SI-LABEL: @private_access_v2f64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
+; SI: DS_READ_B64
 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load <2 x double> addrspace(1)* %in, align 16
   %array = alloca <2 x double>, i32 16, align 16
@@ -39,10 +33,8 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
 }
 
 ; SI-LABEL: @private_access_i64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
 define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load i64 addrspace(1)* %in, align 8
   %array = alloca i64, i32 16, align 8
@@ -55,14 +47,10 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
 }
 
 ; SI-LABEL: @private_access_v2i64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
+; SI: DS_READ_B64
 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load <2 x i64> addrspace(1)* %in, align 16
   %array = alloca <2 x i64>, i32 16, align 16
diff --git a/test/CodeGen/R600/large-alloca.ll b/test/CodeGen/R600/large-alloca.ll
index dd9b6775c082..d8be6d40f310 100644
--- a/test/CodeGen/R600/large-alloca.ll
+++ b/test/CodeGen/R600/large-alloca.ll
@@ -2,10 +2,13 @@
 ; REQUIRES: asserts
 ; RUN: llc -march=r600 -mcpu=SI < %s
 
-define void @large_alloca(i32 addrspace(1)* %out, i32 %x) nounwind {
-  %large = alloca [256 x i32], align 4
-  %gep = getelementptr [256 x i32]* %large, i32 0, i32 255
+define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind {
+  %large = alloca [8192 x i32], align 4
+  %gep = getelementptr [8192 x i32]* %large, i32 0, i32 8191
   store i32 %x, i32* %gep
+  %gep1 = getelementptr [8192 x i32]* %large, i32 0, i32 %y
+  %0 = load i32* %gep1
+  store i32 %0, i32 addrspace(1)* %out
   ret void
 }
 
diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/R600/parallelandifcollapse.ll
index 4afaf684bfce..8a269e0cb43a 100644
--- a/test/CodeGen/R600/parallelandifcollapse.ll
+++ b/test/CodeGen/R600/parallelandifcollapse.ll
@@ -7,6 +7,12 @@
 ; CHECK: AND_INT
 ; CHECK-NEXT: AND_INT
 ; CHECK-NEXT: OR_INT
+
+; FIXME: For some reason having the allocas here allowed the flatten cfg pass
+; to do its transfomation, however now that we are using local memory for
+; allocas, the transformation isn't happening.
+; XFAIL: *
+
 define void @_Z9chk1D_512v() #0 {
 entry:
   %a0 = alloca i32, align 4
diff --git a/test/CodeGen/R600/parallelorifcollapse.ll b/test/CodeGen/R600/parallelorifcollapse.ll
index b0db7cdd0671..feca688c30aa 100644
--- a/test/CodeGen/R600/parallelorifcollapse.ll
+++ b/test/CodeGen/R600/parallelorifcollapse.ll
@@ -3,6 +3,11 @@
 ;
 ; CFG flattening should use parallel-or to generate branch conditions and
 ; then merge if-regions with the same bodies.
+
+; FIXME: For some reason having the allocas here allowed the flatten cfg pass
+; to do its transfomation, however now that we are using local memory for
+; allocas, the transformation isn't happening.
+; XFAIL: *
 ;
 ; CHECK: OR_INT
 ; CHECK-NEXT: OR_INT
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index d3453f26aee6..c60c05975600 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,24 +1,17 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
 
-; This test checks that uses and defs of the AR register happen in the same
-; instruction clause.
-
 ; FUNC-LABEL: @mova_same_clause
 
-; R600-CHECK: MOVA_INT
-; R600-CHECK-NOT: ALU clause
-; R600-CHECK: 0 + AR.x
-; R600-CHECK: MOVA_INT
-; R600-CHECK-NOT: ALU clause
-; R600-CHECK: 0 + AR.x
-
-; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
-; SI-CHECK: V_MOVRELD
-; SI-CHECK: S_CBRANCH
-; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
-; SI-CHECK: V_MOVRELD
-; SI-CHECK: S_CBRANCH
+; R600-CHECK: LDS_WRITE
+; R600-CHECK: LDS_WRITE
+; R600-CHECK: LDS_READ
+; R600-CHECK: LDS_READ
+
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_READ_B32
+; SI-CHECK: DS_READ_B32
 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
 entry:
   %stack = alloca [5 x i32], align 4
@@ -114,12 +107,8 @@ for.end:
 
 ; FUNC-LABEL: @short_array
 
-; R600-CHECK: MOV {{\** *}}T{{[0-9]\.[XYZW]}}, literal
-; R600-CHECK: 65536
-; R600-CHECK: *
 ; R600-CHECK: MOVA_INT
 
-; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 0x10000
 ; SI-CHECK: V_MOVRELS_B32_e32
 define void @short_array(i32 addrspace(1)* %out, i32 %index) {
 entry:
@@ -137,10 +126,7 @@ entry:
 
 ; FUNC-LABEL: @char_array
 
-; R600-CHECK: OR_INT {{\** *}}T{{[0-9]\.[XYZW]}}, {{[PVT0-9]+\.[XYZW]}}, literal
-; R600-CHECK: 256
-; R600-CHECK: *
-; R600-CHECK-NEXT: MOVA_INT
+; R600-CHECK: MOVA_INT
 
 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100
 ; SI-CHECK: V_MOVRELS_B32_e32
@@ -185,7 +171,9 @@ entry:
 ; Test that two stack objects are not stored in the same register
 ; The second stack object should be in T3.X
 ; FUNC-LABEL: @no_overlap
-; R600-CHECK: MOV {{\** *}}T3.X
+; R600_CHECK: MOV
+; R600_CHECK: [[CHAN:[XYZW]]]+
+; R600-CHECK-NOT: [[CHAN]]+
 ; SI-CHECK: V_MOV_B32_e32 v3
 define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
 entry:
diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
index d9f60ea1a4dc..dee432664e89 100644
--- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
+++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
 
+; XFAIL: *
+
 ; 64-bit select was originally lowered with a build_pair, and this
 ; could be simplified to 1 cndmask instead of 2, but that broken when
 ; it started being implemented with a v2i32 build_vector and
@@ -12,9 +14,10 @@ define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
   ret void
 }
 
+; FIXME: Fix truncating store for local memory
 ; SI-LABEL: @trunc_load_alloca_i64:
-; SI: V_MOVRELS_B32
-; SI-NOT: V_MOVRELS_B32
+; SI: DS_READ_B32
+; SI-NOT: DS_READ_B64
 ; SI: S_ENDPGM
 define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
   %idx = add i32 %a, %b
diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/R600/vector-alloca.ll
new file mode 100644
index 000000000000..6543f6d05933
--- /dev/null
+++ b/test/CodeGen/R600/vector-alloca.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; FUNC-LABEL: @vector_read
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOVA_INT
+define void @vector_read(i32 addrspace(1)* %out, i32 %index) {
+entry:
+  %0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32]* %0, i32 0, i32 0
+  %y = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %z = getelementptr [4 x i32]* %0, i32 0, i32 2
+  %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 1, i32* %y
+  store i32 2, i32* %z
+  store i32 3, i32* %w
+  %1 = getelementptr [4 x i32]* %0, i32 0, i32 %index
+  %2 = load i32* %1
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @vector_write
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOVA_INT
+; EG: MOVA_INT
+define void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
+entry:
+  %0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32]* %0, i32 0, i32 0
+  %y = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %z = getelementptr [4 x i32]* %0, i32 0, i32 2
+  %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  store i32 0, i32* %z
+  store i32 0, i32* %w
+  %1 = getelementptr [4 x i32]* %0, i32 0, i32 %w_index
+  store i32 1, i32* %1
+  %2 = getelementptr [4 x i32]* %0, i32 0, i32 %r_index
+  %3 = load i32* %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; This test should be optimize to:
+; store i32 0, i32 addrspace(1)* %out
+; FUNC-LABEL: @bitcast_gep
+; CHECK: STORE_RAW
+define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
+entry:
+  %0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32]* %0, i32 0, i32 0
+  %y = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %z = getelementptr [4 x i32]* %0, i32 0, i32 2
+  %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  store i32 0, i32* %z
+  store i32 0, i32* %w
+  %1 = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %2 = bitcast i32* %1 to [4 x i32]*
+  %3 = getelementptr [4 x i32]* %2, i32 0, i32 0
+  %4 = load i32* %3
+  store i32 %4, i32 addrspace(1)* %out
+  ret void
+}

From b5a910240533d0b73dde5b23396502e71bd5b130 Mon Sep 17 00:00:00 2001
From: Will Schmidt <will_schmidt@vnet.ibm.com>
Date: Tue, 17 Jun 2014 17:04:42 +0000
Subject: [PATCH 015/157] mark the old jit tests as unsupported for powerpc64
 (for cmake)

mark the old JIT tests as unsupported for powerpc64 - CMake style.
This follows the style used for hexagon/arm64/aarch64.
The equivalent tests still run under the supported MCJIT/*


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211111 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/ExecutionEngine/lit.local.cfg | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index 9ec54a583f86..f6673df3c358 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -2,6 +2,9 @@ if config.root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
     config.unsupported = True
 
 # CMake and autoconf diverge in naming or host_arch
+if 'powerpc64' in config.root.target_triple:
+    config.unsupported = True
+
 if 'aarch64' in config.root.target_triple \
     or 'arm64' in config.root.target_triple:
         config.unsupported = True

From 62e378b057fc4c1d75092cfb5fac7feee93cdb74 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 17 Jun 2014 17:36:24 +0000
Subject: [PATCH 016/157] R600/SI: Match ctlz_zero_undef

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211115 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUISelLowering.cpp |  2 +
 lib/Target/R600/SIInstrInfo.cpp        |  1 +
 lib/Target/R600/SIInstructions.td      |  8 ++--
 test/CodeGen/R600/ctlz_zero_undef.ll   | 57 ++++++++++++++++++++++++++
 4 files changed, 65 insertions(+), 3 deletions(-)
 create mode 100644 test/CodeGen/R600/ctlz_zero_undef.ll

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 58959b8d4bda..4d95723a71a1 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -298,7 +298,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
     setOperationAction(ISD::BSWAP, VT, Expand);
     setOperationAction(ISD::CTPOP, VT, Expand);
     setOperationAction(ISD::CTTZ, VT, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
     setOperationAction(ISD::CTLZ, VT, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
   }
 
   static const MVT::SimpleValueType FloatVectorTypes[] = {
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index bb13c3eb86a0..b169ac3c4d71 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -679,6 +679,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
   case AMDGPU::S_LOAD_DWORDX4_IMM:
   case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
   case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
+  case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
   }
 }
 
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 48b6b7d7308c..d809e5790ea2 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -112,11 +112,13 @@ def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "S_BCNT1_I32_B32",
 >;
 def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "S_BCNT1_I32_B64", []>;
 
-////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
+////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "S_FF0_I32_B32", []>;
 ////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
-////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
+////def S_FF1_I32_B32 : SOP1_32 <0x00000013, "S_FF1_I32_B32", []>;
 ////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
-//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
+def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32",
+  [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
+>;
 //def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
 def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
 //def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll
new file mode 100644
index 000000000000..15b5188efd60
--- /dev/null
+++ b/test/CodeGen/R600/ctlz_zero_undef.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
+
+; FUNC-LABEL: @s_ctlz_zero_undef_i32:
+; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
+; SI: S_FLBIT_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctlz_zero_undef_i32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_FFBH_U32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+  %val = load i32 addrspace(1)* %valptr, align 4
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctlz_zero_undef_v2i32:
+; SI: BUFFER_LOAD_DWORDX2
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <2 x i32> addrspace(1)* %valptr, align 8
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
+  store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @v_ctlz_zero_undef_v4i32:
+; SI: BUFFER_LOAD_DWORDX4
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: BUFFER_STORE_DWORDX4
+; SI: S_ENDPGM
+define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <4 x i32> addrspace(1)* %valptr, align 16
+  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
+  store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}

From 3f1f259c22579278844d9b23cddd04272062021c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 17 Jun 2014 17:36:27 +0000
Subject: [PATCH 017/157] R600/SI: Match cttz_zero_undef

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211116 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/SIInstrInfo.cpp      |  1 +
 lib/Target/R600/SIInstructions.td    |  6 ++-
 test/CodeGen/R600/cttz_zero_undef.ll | 57 ++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/R600/cttz_zero_undef.ll

diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index b169ac3c4d71..e06dd1de3ec4 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -679,6 +679,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
   case AMDGPU::S_LOAD_DWORDX4_IMM:
   case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
   case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
+  case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
   case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
   }
 }
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index d809e5790ea2..f7f02ee347dd 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -114,11 +114,15 @@ def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "S_BCNT1_I32_B64", []>;
 
 ////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "S_FF0_I32_B32", []>;
 ////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
-////def S_FF1_I32_B32 : SOP1_32 <0x00000013, "S_FF1_I32_B32", []>;
+def S_FF1_I32_B32 : SOP1_32 <0x00000013, "S_FF1_I32_B32",
+  [(set i32:$dst, (cttz_zero_undef i32:$src0))]
+>;
 ////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
+
 def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32",
   [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
 >;
+
 //def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
 def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
 //def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll
new file mode 100644
index 000000000000..cf44f8e60d01
--- /dev/null
+++ b/test/CodeGen/R600/cttz_zero_undef.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
+declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
+
+; FUNC-LABEL: @s_cttz_zero_undef_i32:
+; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
+; SI: S_FF1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_cttz_zero_undef_i32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_FFBL_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+  %val = load i32 addrspace(1)* %valptr, align 4
+  %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_cttz_zero_undef_v2i32:
+; SI: BUFFER_LOAD_DWORDX2
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <2 x i32> addrspace(1)* %valptr, align 8
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
+  store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @v_cttz_zero_undef_v4i32:
+; SI: BUFFER_LOAD_DWORDX4
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: BUFFER_STORE_DWORDX4
+; SI: S_ENDPGM
+define void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <4 x i32> addrspace(1)* %valptr, align 16
+  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
+  store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}

From acc5e810aaa875698ff032a3b8353194a6d3e247 Mon Sep 17 00:00:00 2001
From: Kevin Enderby <enderby@apple.com>
Date: Tue, 17 Jun 2014 17:54:13 +0000
Subject: [PATCH 018/157] =?UTF-8?q?Add=20"-format=20darwin"=20to=20llvm-si?=
 =?UTF-8?q?ze=20to=20be=20like=20darwin's=20size(1)=20-m=20output,=20and?=
 =?UTF-8?q?=20and=20the=20-l=20option=20for=20the=20long=20format.=20=20Al?=
 =?UTF-8?q?so=20when=20the=20object=20is=20a=20Mach-O=20file=20and=20the?=
 =?UTF-8?q?=20format=20is=20berkeley=20produce=20output=20like=20darwin?=
 =?UTF-8?q?=E2=80=99s=20default=20size(1)=20summary=20berkeley=20derived?=
 =?UTF-8?q?=20output.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Like System V format, there are also some small changes in how and where
the file names and archive member names are printed for darwin and
Mach-O.

Like the changes to llvm-nm these are the first steps in seeing if it is
possible to make llvm-size produce the same output as darwin's size(1).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211117 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/Object/Inputs/hello-world.macho-x86_64 | Bin 0 -> 8496 bytes
 test/Object/Inputs/macho-archive-x86_64.a   | Bin 0 -> 1304 bytes
 test/Object/size-trivial-macho.test         |  56 ++++-
 tools/llvm-size/llvm-size.cpp               | 247 ++++++++++++++++++--
 4 files changed, 276 insertions(+), 27 deletions(-)
 create mode 100755 test/Object/Inputs/hello-world.macho-x86_64
 create mode 100644 test/Object/Inputs/macho-archive-x86_64.a

diff --git a/test/Object/Inputs/hello-world.macho-x86_64 b/test/Object/Inputs/hello-world.macho-x86_64
new file mode 100755
index 0000000000000000000000000000000000000000..d004bedf6ad44f0965a1c9d49438f521f2206c75
GIT binary patch
literal 8496
zcmeHM&r4KM6uwV0O)W=|sH}KNh83s_7lAFDaOg>rzafISIr?lS-i$Ky1~m`_ix|jo
zAGB^02rZ&*1-9!is8z)T1uaFQ>HFT?r+3C5`{*7x-20t#?z!iFb055U=i}DTKea+w
z)j}*+2_YiX8X+DD8#5tJLLI1-!_n)p+p*ChTFb>MhkGLC>A(h3j>m3|mxl!Fxv?$^
zqc(wZ#9`%=(svj9oxsX`-Ay<Q+`TG<_|Q%`!Lkme%=)=_cH2~$&r|ur{X-lss8jhu
ze<`0#xZa$bK~QBrU*$Wc5)hO3ZP``dRMyWV)Av+BWxl5>-%;f!)}<^r98%`e^U3tI
zOr~dM16=-lJX87jn^TAn5-Ht;oXJe4+)`9!eFc^8gi1n;ySA_Jo)n-`_D9E~S{Uv_
z**yvGi*`?#H$1PDX-{SsQ}J0(&iR>r^Mz@6@%6g+dj|Wd5~TDd56Tz8ddAnOcw#*t
z@3D?|o~yxun>S+p1K~4aSDX`=(yfYN9=)zMJh$BpuuFT_VGOwr%)lr`bYo;3`Jw*>
zFrSM}j8`x=K!>5aFg8@(uFng0#q*_{$1uVr8lhdeY^KXg#=E8$y=h=dE0lG(Z$CvB
z*L!;&w?28*fB$v-JZ!929H|fVQiy4Tbl|*P$LCrX?!$A?$C@qcC5~t9?2Y}$7PG#a
z>b%y8I+Ae&K`ubK-(9=J-$jKJz?92S`ic868ZitQ1`Gp+0mFb{z%XDK_`evKaPnWB
zl^?dVS~G-`Z^VA{{Q0F#C!hP}<R=PFeq&t}nqhnscw%4jADor<_MLZY#rEekiNVz~
zs~DnV#r7qPYY7^^e=8P?j_Y}|_WW$do30OD^H0+S(MP`C^7Xd|Bb9$iDV*~!Mk7{6
zg4MLDNlP#XdN3-ig@_ettgTzq|41nRLnPb7th{D03>XFs1BL;^fMLKeU>GnA7zPXj
zh5^HXVc=h6pt&tF!n&LL*qjTR;WCc?Fq+pR8L=iZk>)-*hX%Ai0|c=o`temVm5}a&
zJC*ZY^tvY*el9hcOpA7Yov0S;XBi6#CtIHz*~Y&L4L>imz{U`?^a2}Ou3X)qwdw#%
zZBsg$te`U(1RoD@=|cz;@&gIRFt=bIQOBjSU_={_a~mDu-Di-&8N&1KVg&2{0(!F2
AbpQYW

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/macho-archive-x86_64.a b/test/Object/Inputs/macho-archive-x86_64.a
new file mode 100644
index 0000000000000000000000000000000000000000..9979ba9dd1e3862410a4b4d1cd9bc442cb8c626e
GIT binary patch
literal 1304
zcmbVML2DCH5T32I4XY+Ocu?^0L=QdHecL27Ad+ArJt&qof*xeDrYZ5zri3jmo_Y|W
zxBdiwhR1>zZ~h9Q7yp0<^_zWfcDGVR{mAUhH*YdCZ!(+P_mkOZ@*uop@5PZRU>i}~
zZrXOMVT_IOFxJYcjiOcqw3P=BL+0hp7f*WqM`rWs#-QILpbohV>0A;<bOaF}Z%?Ph
zuaX&T;1qRjT$DAw?rL*I>$*aDT`8Vn7$Vj3)a9Yi-@bldBnqTl01^FO3<CjfLRN+U
z1{3c;^)bqklSKSAh&de}rbQCwLI3%n;A$;+L-Nh5Q}lc`irm9In(pi+qm=h|4_@zX
zF+a}crvVjj7_UDepY*ac=k5s;RsrVmgtuqO&e-{W?QhA4cDz3G`8=g7M0)kiCCP6<
zA{bJ42rV3Ov<?x-$V4Po=LGYg@NDh)%gc|&SN!}W?KxccEO@g)!C^h9UtO%6AnGps
zw;&&WIOzPTTdlvFdw4D9h(Y*56}pOMbskcvR;niPHJo>#Gq}Z-l^ai}zvgBf(FOZ!
zS7O_2=kLw={cYsiJ`C{|{BeKRr61$U%l@7@^TMa@*q*<Ga@}9~Z3KV2zb=|c^C^cu
zD%r@lm841GVeWMY-GZy7{YXCTMbFAw&Sbw*#Nk74@u)zT_YE|IoQwPq^t-ikGyZ=m
G1bzaBZgh44

literal 0
HcmV?d00001

diff --git a/test/Object/size-trivial-macho.test b/test/Object/size-trivial-macho.test
index 6ecdf5c2a8c4..960fb25fbecb 100644
--- a/test/Object/size-trivial-macho.test
+++ b/test/Object/size-trivial-macho.test
@@ -2,6 +2,14 @@ RUN: llvm-size -A %p/Inputs/macho-text-data-bss.macho-x86_64 \
 RUN:         | FileCheck %s -check-prefix A
 RUN: llvm-size -B %p/Inputs/macho-text-data-bss.macho-x86_64 \
 RUN:         | FileCheck %s -check-prefix B
+RUN: llvm-size -format darwin %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix m
+RUN: llvm-size %p/Inputs/macho-archive-x86_64.a \
+RUN:         | FileCheck %s -check-prefix AR
+RUN: llvm-size -format darwin %p/Inputs/macho-archive-x86_64.a \
+RUN:         | FileCheck %s -check-prefix mAR
+RUN: llvm-size -format darwin -x -l %p/Inputs/hello-world.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix mxl
 
 A: section              size   addr
 A: __text                 12      0
@@ -11,5 +19,49 @@ A: __compact_unwind       32     16
 A: __eh_frame             64     48
 A: Total                 116
 
-B:   text    data     bss     dec     hex filename
-B:     12     100       4     116      74 
+B:	__TEXT	__DATA	__OBJC	others	dec	hex
+B:	76	8	0	32	116	74	
+
+m: Segment : 116
+m: 	Section (__TEXT, __text): 12
+m: 	Section (__DATA, __data): 4
+m: 	Section (__DATA, __bss): 4
+m: 	Section (__LD, __compact_unwind): 32
+m: 	Section (__TEXT, __eh_frame): 64
+m: 	total 116
+m: total 116
+
+AR: __TEXT	__DATA	__OBJC	others	dec	hex
+AR: 70	0	0	32	102	66	{{.*}}/macho-archive-x86_64.a(foo.o)
+AR: 0	4	0	0	4	4	{{.*}}/macho-archive-x86_64.a(bar.o)
+
+mAR: {{.*}}/macho-archive-x86_64.a(foo.o):
+mAR: Segment : 104
+mAR: 	Section (__TEXT, __text): 6
+mAR: 	Section (__LD, __compact_unwind): 32
+mAR: 	Section (__TEXT, __eh_frame): 64
+mAR: 	total 102
+mAR: total 104
+mAR: {{.*}}/macho-archive-x86_64.a(bar.o):
+mAR: Segment : 4
+mAR: 	Section (__TEXT, __text): 0
+mAR: 	Section (__DATA, __data): 4
+mAR: 	total 4
+mAR: total 4
+
+
+mxl: Segment __PAGEZERO: 0x100000000 (vmaddr 0x0 fileoff 0)
+mxl: Segment __TEXT: 0x1000 (vmaddr 0x100000000 fileoff 0)
+mxl: 	Section __text: 0x3b (addr 0x100000f30 offset 3888)
+mxl: 	Section __stubs: 0x6 (addr 0x100000f6c offset 3948)
+mxl: 	Section __stub_helper: 0x1a (addr 0x100000f74 offset 3956)
+mxl: 	Section __cstring: 0xd (addr 0x100000f8e offset 3982)
+mxl: 	Section __unwind_info: 0x48 (addr 0x100000f9b offset 3995)
+mxl: 	Section __eh_frame: 0x18 (addr 0x100000fe8 offset 4072)
+mxl: 	total 0xc8
+mxl: Segment __DATA: 0x1000 (vmaddr 0x100001000 fileoff 4096)
+mxl: 	Section __nl_symbol_ptr: 0x10 (addr 0x100001000 offset 4096)
+mxl: 	Section __la_symbol_ptr: 0x8 (addr 0x100001010 offset 4112)
+mxl: 	total 0x18
+mxl: Segment __LINKEDIT: 0x1000 (vmaddr 0x100002000 fileoff 8192)
+mxl: total 0x100003000
diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp
index ebd41a528023..5e4e4aa932bb 100644
--- a/tools/llvm-size/llvm-size.cpp
+++ b/tools/llvm-size/llvm-size.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
@@ -31,13 +32,13 @@
 using namespace llvm;
 using namespace object;
 
-enum OutputFormatTy {berkeley, sysv};
+enum OutputFormatTy {berkeley, sysv, darwin};
 static cl::opt<OutputFormatTy>
        OutputFormat("format",
          cl::desc("Specify output format"),
          cl::values(clEnumVal(sysv, "System V format"),
                     clEnumVal(berkeley, "Berkeley format"),
-                    clEnumValEnd),
+                    clEnumVal(darwin, "Darwin -m format"), clEnumValEnd),
          cl::init(berkeley));
 
 static cl::opt<OutputFormatTy>
@@ -47,6 +48,13 @@ static cl::opt<OutputFormatTy>
                     clEnumValEnd),
          cl::init(berkeley));
 
+static bool berkeleyHeaderPrinted = false;
+static bool moreThanOneFile = false;
+
+cl::opt<bool> DarwinLongFormat("l",
+         cl::desc("When format is darwin, use long format "
+                  "to include addresses and offsets."));
+
 enum RadixTy {octal = 8, decimal = 10, hexadecimal = 16};
 static cl::opt<unsigned int>
        Radix("-radix",
@@ -85,6 +93,182 @@ static size_t getNumLengthAsString(uint64_t num) {
   return result.size();
 }
 
+/// @brief Return the the printing format for the Radix.
+static const char * getRadixFmt(void) {
+  switch (Radix) {
+  case octal:
+    return PRIo64;
+  case decimal:
+    return PRIu64;
+  case hexadecimal:
+    return PRIx64;
+  }
+  return nullptr;
+}
+
+/// @brief Print the size of each Mach-O segment and section in @p MachO.
+///
+/// This is when used when @c OutputFormat is darwin and produces the same
+/// output as darwin's size(1) -m output.
+static void PrintDarwinSectionSizes(MachOObjectFile *MachO) {
+  std::string fmtbuf;
+  raw_string_ostream fmt(fmtbuf);
+  const char *radix_fmt = getRadixFmt();
+  if (Radix == hexadecimal)
+    fmt << "0x";
+  fmt << "%" << radix_fmt;
+
+  uint32_t LoadCommandCount = MachO->getHeader().ncmds;
+  uint32_t Filetype = MachO->getHeader().filetype;
+  MachOObjectFile::LoadCommandInfo Load = MachO->getFirstLoadCommandInfo();
+
+  uint64_t total = 0;
+  for (unsigned I = 0; ; ++I) {
+    if (Load.C.cmd == MachO::LC_SEGMENT_64) {
+      MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Load);
+      outs() << "Segment " << Seg.segname << ": "
+             << format(fmt.str().c_str(), Seg.vmsize);
+      if (DarwinLongFormat)
+        outs() << " (vmaddr 0x" << format("%" PRIx64, Seg.vmaddr)
+               << " fileoff " << Seg.fileoff << ")";
+      outs() << "\n";
+      total += Seg.vmsize;
+      uint64_t sec_total = 0;
+      for (unsigned J = 0; J < Seg.nsects; ++J) {
+        MachO::section_64 Sec = MachO->getSection64(Load, J);
+        if (Filetype == MachO::MH_OBJECT)
+          outs() << "\tSection (" << format("%.16s", &Sec.segname) << ", "
+                 << format("%.16s", &Sec.sectname) << "): ";
+        else
+          outs() << "\tSection " << format("%.16s", &Sec.sectname) << ": ";
+        outs() << format(fmt.str().c_str(), Sec.size);
+        if (DarwinLongFormat)
+          outs() << " (addr 0x" << format("%" PRIx64, Sec.addr)
+                 << " offset " << Sec.offset << ")";
+        outs() << "\n";
+        sec_total += Sec.size;
+      }
+      if (Seg.nsects != 0)
+        outs() << "\ttotal " << format(fmt.str().c_str(), sec_total) << "\n";
+    }
+    else if (Load.C.cmd == MachO::LC_SEGMENT) {
+      MachO::segment_command Seg = MachO->getSegmentLoadCommand(Load);
+      outs() << "Segment " << Seg.segname << ": "
+             << format(fmt.str().c_str(), Seg.vmsize);
+      if (DarwinLongFormat)
+        outs() << " (vmaddr 0x" << format("%" PRIx64, Seg.vmaddr)
+               << " fileoff " << Seg.fileoff << ")";
+      outs() << "\n";
+      total += Seg.vmsize;
+      uint64_t sec_total = 0;
+      for (unsigned J = 0; J < Seg.nsects; ++J) {
+        MachO::section Sec = MachO->getSection(Load, J);
+        if (Filetype == MachO::MH_OBJECT)
+          outs() << "\tSection (" << format("%.16s", &Sec.segname) << ", "
+                 << format("%.16s", &Sec.sectname) << "): ";
+        else
+          outs() << "\tSection " << format("%.16s", &Sec.sectname) << ": ";
+        outs() << format(fmt.str().c_str(), Sec.size);
+        if (DarwinLongFormat)
+          outs() << " (addr 0x" << format("%" PRIx64, Sec.addr)
+                 << " offset " << Sec.offset << ")";
+        outs() << "\n";
+        sec_total += Sec.size;
+      }
+      if (Seg.nsects != 0)
+        outs() << "\ttotal " << format(fmt.str().c_str(), sec_total) << "\n";
+    }
+    if (I == LoadCommandCount - 1)
+      break;
+    else
+      Load = MachO->getNextLoadCommandInfo(Load);
+  }
+  outs() << "total " << format(fmt.str().c_str(), total) << "\n";
+}
+
+/// @brief Print the summary sizes of the standard Mach-O segments in @p MachO.
+///
+/// This is when used when @c OutputFormat is berkeley with a Mach-O file and
+/// produces the same output as darwin's size(1) default output.
+static void PrintDarwinSegmentSizes(MachOObjectFile *MachO) {
+  uint32_t LoadCommandCount = MachO->getHeader().ncmds;
+  MachOObjectFile::LoadCommandInfo Load = MachO->getFirstLoadCommandInfo();
+
+  uint64_t total_text = 0;
+  uint64_t total_data = 0;
+  uint64_t total_objc = 0;
+  uint64_t total_others = 0;
+  for (unsigned I = 0; ; ++I) {
+    if (Load.C.cmd == MachO::LC_SEGMENT_64) {
+      MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Load);
+      if (MachO->getHeader().filetype == MachO::MH_OBJECT) {
+        for (unsigned J = 0; J < Seg.nsects; ++J) {
+          MachO::section_64 Sec = MachO->getSection64(Load, J);
+          StringRef SegmentName = StringRef(Sec.segname);
+          if (SegmentName == "__TEXT")
+            total_text += Sec.size;
+          else if (SegmentName == "__DATA")
+            total_data += Sec.size;
+          else if (SegmentName == "__OBJC")
+            total_objc += Sec.size;
+          else
+            total_others += Sec.size;
+	}
+      } else {
+        StringRef SegmentName = StringRef(Seg.segname);
+        if (SegmentName == "__TEXT")
+          total_text += Seg.vmsize;
+        else if (SegmentName == "__DATA")
+          total_data += Seg.vmsize;
+        else if (SegmentName == "__OBJC")
+          total_objc += Seg.vmsize;
+        else
+          total_others += Seg.vmsize;
+      }
+    }
+    else if (Load.C.cmd == MachO::LC_SEGMENT) {
+      MachO::segment_command Seg = MachO->getSegmentLoadCommand(Load);
+      if (MachO->getHeader().filetype == MachO::MH_OBJECT) {
+        for (unsigned J = 0; J < Seg.nsects; ++J) {
+          MachO::section Sec = MachO->getSection(Load, J);
+          StringRef SegmentName = StringRef(Sec.segname);
+          if (SegmentName == "__TEXT")
+            total_text += Sec.size;
+          else if (SegmentName == "__DATA")
+            total_data += Sec.size;
+          else if (SegmentName == "__OBJC")
+            total_objc += Sec.size;
+          else
+            total_others += Sec.size;
+	}
+      } else {
+        StringRef SegmentName = StringRef(Seg.segname);
+        if (SegmentName == "__TEXT")
+          total_text += Seg.vmsize;
+        else if (SegmentName == "__DATA")
+          total_data += Seg.vmsize;
+        else if (SegmentName == "__OBJC")
+          total_objc += Seg.vmsize;
+        else
+          total_others += Seg.vmsize;
+      }
+    }
+    if (I == LoadCommandCount - 1)
+      break;
+    else
+      Load = MachO->getNextLoadCommandInfo(Load);
+  }
+  uint64_t total = total_text + total_data + total_objc + total_others;
+
+  if (!berkeleyHeaderPrinted) {
+    outs() << "__TEXT\t__DATA\t__OBJC\tothers\tdec\thex\n";
+    berkeleyHeaderPrinted = true;
+  }
+  outs() << total_text << "\t" << total_data << "\t" << total_objc << "\t"
+         << total_others << "\t" << total << "\t" << format("%" PRIx64, total)
+         << "\t";
+}
+
 /// @brief Print the size of each section in @p Obj.
 ///
 /// The format used is determined by @c OutputFormat and @c Radix.
@@ -92,20 +276,19 @@ static void PrintObjectSectionSizes(ObjectFile *Obj) {
   uint64_t total = 0;
   std::string fmtbuf;
   raw_string_ostream fmt(fmtbuf);
-
-  const char *radix_fmt = nullptr;
-  switch (Radix) {
-  case octal:
-    radix_fmt = PRIo64;
-    break;
-  case decimal:
-    radix_fmt = PRIu64;
-    break;
-  case hexadecimal:
-    radix_fmt = PRIx64;
-    break;
-  }
-  if (OutputFormat == sysv) {
+  const char *radix_fmt = getRadixFmt();
+
+  // If OutputFormat is darwin and we have a MachOObjectFile print as darwin's
+  // size(1) -m output, else if OutputFormat is darwin and not a Mach-O object
+  // let it fall through to OutputFormat berkeley.
+  MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(Obj);
+  if (OutputFormat == darwin && MachO)
+    PrintDarwinSectionSizes(MachO);
+  // If we have a MachOObjectFile and the OutputFormat is berkeley print as
+  // darwin's default berkeley format for Mach-O files.
+  else if (MachO && OutputFormat == berkeley)
+    PrintDarwinSegmentSizes(MachO);
+  else if (OutputFormat == sysv) {
     // Run two passes over all sections. The first gets the lengths needed for
     // formatting the output. The second actually does the output.
     std::size_t max_name_len = strlen("section");
@@ -204,6 +387,13 @@ static void PrintObjectSectionSizes(ObjectFile *Obj) {
 
     total = total_text + total_data + total_bss;
 
+    if (!berkeleyHeaderPrinted) {
+      outs() << "   text    data     bss     "
+             << (Radix == octal ? "oct" : "dec")
+             << "     hex filename\n";
+      berkeleyHeaderPrinted = true;
+    }
+
     // Print result.
     fmt << "%#7" << radix_fmt << " "
         << "%#7" << radix_fmt << " "
@@ -251,20 +441,31 @@ static void PrintFileSectionSizes(StringRef file) {
         continue;
       }
       if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get())) {
+        MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o);
         if (OutputFormat == sysv)
           outs() << o->getFileName() << "   (ex " << a->getFileName()
                   << "):\n";
+        else if(MachO && OutputFormat == darwin)
+            outs() << a->getFileName() << "(" << o->getFileName() << "):\n";
         PrintObjectSectionSizes(o);
-        if (OutputFormat == berkeley)
-          outs() << o->getFileName() << " (ex " << a->getFileName() << ")\n";
+        if (OutputFormat == berkeley) {
+          if (MachO)
+            outs() << a->getFileName() << "(" << o->getFileName() << ")\n";
+          else
+            outs() << o->getFileName() << " (ex " << a->getFileName() << ")\n";
+        }
       }
     }
   } else if (ObjectFile *o = dyn_cast<ObjectFile>(binary.get())) {
     if (OutputFormat == sysv)
       outs() << o->getFileName() << "  :\n";
     PrintObjectSectionSizes(o);
-    if (OutputFormat == berkeley)
-      outs() << o->getFileName() << "\n";
+    if (OutputFormat == berkeley) {
+      MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o);
+      if (!MachO || moreThanOneFile)
+        outs() << o->getFileName();
+      outs() << "\n";
+    }
   } else {
     errs() << ToolName << ": " << file << ": " << "Unrecognized file type.\n";
   }
@@ -290,11 +491,7 @@ int main(int argc, char **argv) {
   if (InputFilenames.size() == 0)
     InputFilenames.push_back("a.out");
 
-  if (OutputFormat == berkeley)
-    outs() << "   text    data     bss     "
-           << (Radix == octal ? "oct" : "dec")
-           << "     hex filename\n";
-
+  moreThanOneFile = InputFilenames.size() > 1;
   std::for_each(InputFilenames.begin(), InputFilenames.end(),
                 PrintFileSectionSizes);
 

From 476fd7c3e9ebf96da81b1d607744679875afc5f6 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Tue, 17 Jun 2014 18:06:45 +0000
Subject: [PATCH 019/157] Merge lib/Support/WindowsError.cpp into
 ib/Support/ErrorHandling.cpp.

The OSX ranlib warns on files with no symbols, and lib/Support/WindowsError.cpp
was empty when building on non-windows.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211118 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Support/CMakeLists.txt    |  1 -
 lib/Support/ErrorHandling.cpp | 69 ++++++++++++++++++++++++++
 lib/Support/WindowsError.cpp  | 91 -----------------------------------
 3 files changed, 69 insertions(+), 92 deletions(-)
 delete mode 100644 lib/Support/WindowsError.cpp

diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 9febb84f2b1d..2438d729d8de 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -89,7 +89,6 @@ add_llvm_library(LLVMSupport
   TimeValue.cpp
   Valgrind.cpp
   Watchdog.cpp
-  WindowsError.cpp
 
   ADDITIONAL_HEADERS
   Unix/Host.inc
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index ae1d64db4b82..c36007f55999 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -18,10 +18,12 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/Threading.h"
+#include "llvm/Support/WindowsError.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdlib>
@@ -134,3 +136,70 @@ void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler) {
 void LLVMResetFatalErrorHandler() {
   remove_fatal_error_handler();
 }
+
+#ifdef LLVM_ON_WIN32
+
+#include <winerror.h>
+
+// I'd rather not double the line count of the following.
+#define MAP_ERR_TO_COND(x, y)                                                  \
+  case x:                                                                      \
+    return make_error_code(errc::y)
+
+std::error_code llvm::mapWindowsError(unsigned EV) {
+  switch (EV) {
+    MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
+    MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
+    MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
+    MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
+    MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
+    MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
+    MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
+    MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
+    MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
+    MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
+    MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
+    MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
+    MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
+    MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
+    MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
+    MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
+    MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
+    MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
+    MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
+    MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
+    MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
+    MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
+    MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
+    MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
+    MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
+    MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
+    MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
+    MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
+    MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
+    MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
+    MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
+    MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
+    MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
+    MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
+    MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
+    MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
+    MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
+    MAP_ERR_TO_COND(ERROR_SEEK, io_error);
+    MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
+    MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
+    MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
+    MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
+    MAP_ERR_TO_COND(WSAEACCES, permission_denied);
+    MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
+    MAP_ERR_TO_COND(WSAEFAULT, bad_address);
+    MAP_ERR_TO_COND(WSAEINTR, interrupted);
+    MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
+    MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
+    MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
+  default:
+    return std::error_code(EV, std::system_category());
+  }
+}
+
+#endif
diff --git a/lib/Support/WindowsError.cpp b/lib/Support/WindowsError.cpp
deleted file mode 100644
index 18fdbbda9f1c..000000000000
--- a/lib/Support/WindowsError.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-//===-- WindowsError.cpp - Support for mapping windows errors to posix-----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file implements a mapping from windows errors to posix ones.
-//  The standard doesn't define what the equivalence is from system
-//  errors to generic ones. The one implemented in msvc is too conservative
-//  for llvm, so we do an extra mapping when constructing an error_code
-//  from an windows error. This allows the rest of llvm to simple checks
-//  like "EC == errc::file_exists" and have it work on both posix and
-//  windows.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Errc.h"
-#include "llvm/Support/WindowsError.h"
-
-#include "llvm/Config/llvm-config.h"
-
-#ifdef LLVM_ON_WIN32
-
-#include <winerror.h>
-
-
-// I'd rather not double the line count of the following.
-#define MAP_ERR_TO_COND(x, y)                                                  \
-  case x:                                                                      \
-    return make_error_code(errc::y)
-
-std::error_code llvm::mapWindowsError(unsigned EV) {
-  switch (EV) {
-    MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
-    MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
-    MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
-    MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
-    MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
-    MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
-    MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
-    MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
-    MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
-    MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
-    MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
-    MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
-    MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
-    MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
-    MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
-    MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
-    MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
-    MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
-    MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
-    MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
-    MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
-    MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
-    MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
-    MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
-    MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
-    MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
-    MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
-    MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
-    MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
-    MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
-    MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
-    MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
-    MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
-    MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
-    MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
-    MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
-    MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
-    MAP_ERR_TO_COND(ERROR_SEEK, io_error);
-    MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
-    MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
-    MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
-    MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
-    MAP_ERR_TO_COND(WSAEACCES, permission_denied);
-    MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
-    MAP_ERR_TO_COND(WSAEFAULT, bad_address);
-    MAP_ERR_TO_COND(WSAEINTR, interrupted);
-    MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
-    MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
-    MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
-  default:
-    return std::error_code(EV, std::system_category());
-  }
-}
-
-#endif

From 4a5916aca998a104f8bc0403f4dc566af6483023 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Tue, 17 Jun 2014 18:17:46 +0000
Subject: [PATCH 020/157] lit: simplify population of the actual_inputs array

Add all inputs to the array, except those starting with @, which
are treated as response files and expanded.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211119 91177308-0d34-0410-b5e6-96231b3b80d8
---
 utils/lit/lit/discovery.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utils/lit/lit/discovery.py b/utils/lit/lit/discovery.py
index c3c0f283b558..876d4f31e950 100644
--- a/utils/lit/lit/discovery.py
+++ b/utils/lit/lit/discovery.py
@@ -200,9 +200,7 @@ def find_tests_for_inputs(lit_config, inputs):
     # Expand '@...' form in inputs.
     actual_inputs = []
     for input in inputs:
-        if os.path.exists(input) or not input.startswith('@'):
-            actual_inputs.append(input)
-        else:
+        if input.startswith('@'):
             f = open(input[1:])
             try:
                 for ln in f:
@@ -211,6 +209,8 @@ def find_tests_for_inputs(lit_config, inputs):
                         actual_inputs.append(ln)
             finally:
                 f.close()
+        else:
+            actual_inputs.append(input)
                     
     # Load the tests from the inputs.
     tests = []

From 540fe7f20ee5a718abd2d5f1fff5092485a1dbc6 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Tue, 17 Jun 2014 19:34:46 +0000
Subject: [PATCH 021/157] R600/SI: Make sure target flags are set on pseudo
 VOP3 instructions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211120 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/SIInstrFormats.td | 26 +++++++++++++-------------
 lib/Target/R600/SIInstrInfo.td    |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 168eff25bb22..7cae9fc0d0eb 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -51,6 +51,16 @@ class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
   let Size = 8;
 }
 
+class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc64 <outs, ins, asm, pattern> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+  let VOP3 = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // Scalar operations
 //===----------------------------------------------------------------------===//
@@ -207,7 +217,7 @@ class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
 }
 
 class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    Enc64 <outs, ins, asm, pattern> {
+    VOP3Common <outs, ins, asm, pattern> {
 
   bits<8> dst;
   bits<2> src0_modifiers;
@@ -233,16 +243,11 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
   let Inst{61} = src0_modifiers{0};
   let Inst{62} = src1_modifiers{0};
   let Inst{63} = src2_modifiers{0};
-  
-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let UseNamedOperandTable = 1;
-  let VOP3 = 1;
+
 }
 
 class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    Enc64 <outs, ins, asm, pattern> {
+    VOP3Common <outs, ins, asm, pattern> {
 
   bits<8> dst;
   bits<2> src0_modifiers;
@@ -266,11 +271,6 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
   let Inst{62} = src1_modifiers{0};
   let Inst{63} = src2_modifiers{0};
 
-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let UseNamedOperandTable = 1;
-  let VOP3 = 1;
 }
 
 class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index bfd514766acf..d2a13e2dc39c 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -266,7 +266,7 @@ class SIMCInstr <string pseudo, int subtarget> {
 multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
                    string opName> {
 
-  def "" : InstSI <outs, ins, "", pattern>, VOP <opName>,
+  def "" : VOP3Common <outs, ins, "", pattern>, VOP <opName>,
            SIMCInstr<OpName, SISubtarget.NONE> {
     let isPseudo = 1;
   }

From 97a66e604970685e7c7c954d3fb9ab6dab2c6f8e Mon Sep 17 00:00:00 2001
From: Zachary Turner <zturner@google.com>
Date: Tue, 17 Jun 2014 21:54:18 +0000
Subject: [PATCH 022/157] Remove more occurrences of the unused-mutex-parameter
 pattern.

This pattern loses some of its usefulness when the mutex type is
statically polymorphic as opposed to runtime polymorphic, as
swapping out the mutex type requires changing a significant number
of function parameters, and templatizing the function parameter
requires the methods to be defined in the headers.

Furthermore, if LLVM is compiled with threads disabled then there
may even be no mutex to acquire anyway, so it should not be up to
individual APIs to know whether or not acquiring a mutex is required
to use those APIs to begin with.  It should be up to the user of the
API.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211125 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/ExecutionEngine/JIT/JITEmitter.cpp | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index b6f90e2de05a..50b8c10b638b 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -121,21 +121,16 @@ namespace {
 #endif
     }
 
-    FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
-      const MutexGuard& locked) {
-      assert(locked.holds(TheJIT->lock));
+    FunctionToLazyStubMapTy& getFunctionToLazyStubMap() {
       return FunctionToLazyStubMap;
     }
 
-    GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& lck) {
-      assert(lck.holds(TheJIT->lock));
+    GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap() {
       return GlobalToIndirectSymMap;
     }
 
     std::pair<void *, Function *> LookupFunctionFromCallSite(
-        const MutexGuard &locked, void *CallSite) const {
-      assert(locked.holds(TheJIT->lock));
-
+        void *CallSite) const {
       // The address given to us for the stub may not be exactly right, it
       // might be a little bit after the stub.  As such, use upper_bound to
       // find it.
@@ -147,9 +142,7 @@ namespace {
       return *I;
     }
 
-    void AddCallSite(const MutexGuard &locked, void *CallSite, Function *F) {
-      assert(locked.holds(TheJIT->lock));
-
+    void AddCallSite(void *CallSite, Function *F) {
       bool Inserted = CallSiteToFunctionMap.insert(
           std::make_pair(CallSite, F)).second;
       (void)Inserted;
@@ -504,7 +497,7 @@ void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
   MutexGuard locked(TheJIT->lock);
 
   // If we already have a stub for this function, recycle it.
-  return state.getFunctionToLazyStubMap(locked).lookup(F);
+  return state.getFunctionToLazyStubMap().lookup(F);
 }
 
 /// getFunctionStub - This returns a pointer to a function stub, creating
@@ -513,7 +506,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
   MutexGuard locked(TheJIT->lock);
 
   // If we already have a lazy stub for this function, recycle it.
-  void *&Stub = state.getFunctionToLazyStubMap(locked)[F];
+  void *&Stub = state.getFunctionToLazyStubMap()[F];
   if (Stub) return Stub;
 
   // Call the lazy resolver function if we are JIT'ing lazily.  Otherwise we
@@ -555,7 +548,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
 
     // Finally, keep track of the stub-to-Function mapping so that the
     // JITCompilerFn knows which function to compile!
-    state.AddCallSite(locked, Stub, F);
+    state.AddCallSite(Stub, F);
   } else if (!Actual) {
     // If we are JIT'ing non-lazily but need to call a function that does not
     // exist yet, add it to the JIT's work list so that we can fill in the
@@ -574,7 +567,7 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
   MutexGuard locked(TheJIT->lock);
 
   // If we already have a stub for this global variable, recycle it.
-  void *&IndirectSym = state.getGlobalToIndirectSymMap(locked)[GV];
+  void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV];
   if (IndirectSym) return IndirectSym;
 
   // Otherwise, codegen a new indirect symbol.
@@ -634,7 +627,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
     // The address given to us for the stub may not be exactly right, it might
     // be a little bit after the stub.  As such, use upper_bound to find it.
     std::pair<void*, Function*> I =
-      JR->state.LookupFunctionFromCallSite(locked, Stub);
+      JR->state.LookupFunctionFromCallSite(Stub);
     F = I.second;
     ActualPtr = I.first;
   }

From e8cb2ee1cd8ad7568a81f294ba09e0af2aff9e77 Mon Sep 17 00:00:00 2001
From: Juergen Ributzka <juergen@apple.com>
Date: Tue, 17 Jun 2014 21:55:43 +0000
Subject: [PATCH 023/157] [FastISel][X86] Optimize predicates and fold CMP
 instructions.

This optimizes predicates for certain compares, such as fcmp oeq %x, %x to
fcmp ord %x, %x. The latter one is more efficient to generate.

The same optimization is applied to conditional branches.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211126 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86FastISel.cpp            | 122 +++++-
 test/CodeGen/X86/fast-isel-cmp-branch3.ll | 470 ++++++++++++++++++++++
 test/CodeGen/X86/fast-isel-cmp.ll         | 417 +++++++++++++++++++
 3 files changed, 996 insertions(+), 13 deletions(-)
 create mode 100644 test/CodeGen/X86/fast-isel-cmp-branch3.ll

diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 6a4a467f6a9a..37174a14b29d 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -155,6 +155,46 @@ class X86FastISel final : public FastISel {
 
 } // end anonymous namespace.
 
+static CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) {
+  // If both operands are the same, then try to optimize or fold the cmp.
+  CmpInst::Predicate Predicate = CI->getPredicate();
+  if (CI->getOperand(0) != CI->getOperand(1))
+    return Predicate;
+
+  switch (Predicate) {
+  default: llvm_unreachable("Invalid predicate!");
+  case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::FCMP_OEQ:   Predicate = CmpInst::FCMP_ORD;   break;
+  case CmpInst::FCMP_OGT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::FCMP_OGE:   Predicate = CmpInst::FCMP_ORD;   break;
+  case CmpInst::FCMP_OLT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::FCMP_OLE:   Predicate = CmpInst::FCMP_ORD;   break;
+  case CmpInst::FCMP_ONE:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::FCMP_ORD:   Predicate = CmpInst::FCMP_ORD;   break;
+  case CmpInst::FCMP_UNO:   Predicate = CmpInst::FCMP_UNO;   break;
+  case CmpInst::FCMP_UEQ:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::FCMP_UGT:   Predicate = CmpInst::FCMP_UNO;   break;
+  case CmpInst::FCMP_UGE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::FCMP_ULT:   Predicate = CmpInst::FCMP_UNO;   break;
+  case CmpInst::FCMP_ULE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::FCMP_UNE:   Predicate = CmpInst::FCMP_UNO;   break;
+  case CmpInst::FCMP_TRUE:  Predicate = CmpInst::FCMP_TRUE;  break;
+
+  case CmpInst::ICMP_EQ:    Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::ICMP_NE:    Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::ICMP_UGT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::ICMP_UGE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::ICMP_ULT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::ICMP_ULE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::ICMP_SGT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::ICMP_SGE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::ICMP_SLT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::ICMP_SLE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  }
+
+  return Predicate;
+}
+
 static std::pair<X86::CondCode, bool>
 getX86ConditonCode(CmpInst::Predicate Predicate) {
   X86::CondCode CC = X86::COND_INVALID;
@@ -1048,21 +1088,61 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
   if (!isTypeLegal(I->getOperand(0)->getType(), VT))
     return false;
 
+  // Try to optimize or fold the cmp.
+  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+  unsigned ResultReg = 0;
+  switch (Predicate) {
+  default: break;
+  case CmpInst::FCMP_FALSE: {
+    ResultReg = createResultReg(&X86::GR32RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
+            ResultReg);
+    ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+                                           X86::sub_8bit);
+    if (!ResultReg)
+      return false;
+    break;
+  }
+  case CmpInst::FCMP_TRUE: {
+    ResultReg = createResultReg(&X86::GR8RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
+            ResultReg).addImm(1);
+    break;
+  }
+  }
+
+  if (ResultReg) {
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+
+  const Value *LHS = CI->getOperand(0);
+  const Value *RHS = CI->getOperand(1);
+
+  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
+  // We don't have to materialize a zero constant for this case and can just use
+  // %x again on the RHS.
+  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+    const auto *RHSC = dyn_cast<ConstantFP>(RHS);
+    if (RHSC && RHSC->isNullValue())
+      RHS = LHS;
+  }
+
   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
   static unsigned SETFOpcTable[2][3] = {
     { X86::SETEr,  X86::SETNPr, X86::AND8rr },
     { X86::SETNEr, X86::SETPr,  X86::OR8rr  }
   };
   unsigned *SETFOpc = nullptr;
-  switch (CI->getPredicate()) {
+  switch (Predicate) {
   default: break;
   case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
   case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
   }
 
-  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
+  ResultReg = createResultReg(&X86::GR8RegClass);
   if (SETFOpc) {
-    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+    if (!X86FastEmitCompare(LHS, RHS, VT))
       return false;
 
     unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
@@ -1078,17 +1158,15 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
   }
 
   X86::CondCode CC;
-  bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
-  std::tie(CC, SwapArgs) = getX86ConditonCode(CI->getPredicate());
+  bool SwapArgs;
+  std::tie(CC, SwapArgs) = getX86ConditonCode(Predicate);
   assert(CC <= X86::LAST_VALID_COND && "Unexpected conditon code.");
   unsigned Opc = X86::getSETFromCond(CC);
 
-  const Value *LHS = CI->getOperand(0);
-  const Value *RHS = CI->getOperand(1);
   if (SwapArgs)
     std::swap(LHS, RHS);
 
-  // Emit a compare of Op0/Op1.
+  // Emit a compare of LHS/RHS.
   if (!X86FastEmitCompare(LHS, RHS, VT))
     return false;
 
@@ -1162,8 +1240,28 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
     if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
       EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
 
+      // Try to optimize or fold the cmp.
+      CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+      switch (Predicate) {
+      default: break;
+      case CmpInst::FCMP_FALSE: FastEmitBranch(FalseMBB, DbgLoc); return true;
+      case CmpInst::FCMP_TRUE:  FastEmitBranch(TrueMBB, DbgLoc); return true;
+      }
+
+      const Value *CmpLHS = CI->getOperand(0);
+      const Value *CmpRHS = CI->getOperand(1);
+
+      // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
+      // 0.0.
+      // We don't have to materialize a zero constant for this case and can just
+      // use %x again on the RHS.
+      if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+        const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
+        if (CmpRHSC && CmpRHSC->isNullValue())
+          CmpRHS = CmpLHS;
+      }
+
       // Try to take advantage of fallthrough opportunities.
-      CmpInst::Predicate Predicate = CI->getPredicate();
       if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
         std::swap(TrueMBB, FalseMBB);
         Predicate = CmpInst::getInversePredicate(Predicate);
@@ -1186,14 +1284,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       }
 
       X86::CondCode CC;
-      bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
-      unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
+      bool SwapArgs;
+      unsigned BranchOpc;
       std::tie(CC, SwapArgs) = getX86ConditonCode(Predicate);
       assert(CC <= X86::LAST_VALID_COND && "Unexpected conditon code.");
 
       BranchOpc = X86::GetCondBranchFromCond(CC);
-      const Value *CmpLHS = CI->getOperand(0);
-      const Value *CmpRHS = CI->getOperand(1);
       if (SwapArgs)
         std::swap(CmpLHS, CmpRHS);
 
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch3.ll b/test/CodeGen/X86/fast-isel-cmp-branch3.ll
new file mode 100644
index 000000000000..a3f6851ca240
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-cmp-branch3.ll
@@ -0,0 +1,470 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+define i32 @fcmp_oeq1(float %x) {
+; CHECK-LABEL: fcmp_oeq1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp oeq float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oeq2(float %x) {
+; CHECK-LABEL: fcmp_oeq2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jne {{LBB.+_1}}
+; CHECK-NEXT:  jnp {{LBB.+_2}}
+  %1 = fcmp oeq float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ogt1(float %x) {
+; CHECK-LABEL: fcmp_ogt1
+; CHECK-NOT:   ucomiss
+; CHECK:       movl $1, %eax
+  %1 = fcmp ogt float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ogt2(float %x) {
+; CHECK-LABEL: fcmp_ogt2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jbe {{LBB.+_1}}
+  %1 = fcmp ogt float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oge1(float %x) {
+; CHECK-LABEL: fcmp_oge1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp oge float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oge2(float %x) {
+; CHECK-LABEL: fcmp_oge2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jb {{LBB.+_1}}
+  %1 = fcmp oge float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_olt1(float %x) {
+; CHECK-LABEL: fcmp_olt1
+; CHECK-NOT:   ucomiss
+; CHECK:       movl $1, %eax
+  %1 = fcmp olt float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_olt2(float %x) {
+; CHECK-LABEL: fcmp_olt2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jbe {{LBB.+_1}}
+  %1 = fcmp olt float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ole1(float %x) {
+; CHECK-LABEL: fcmp_ole1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp ole float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ole2(float %x) {
+; CHECK-LABEL: fcmp_ole2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jb {{LBB.+_1}}
+  %1 = fcmp ole float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_one1(float %x) {
+; CHECK-LABEL: fcmp_one1
+; CHECK-NOT:   ucomiss
+; CHECK:       movl $1, %eax
+  %1 = fcmp one float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_one2(float %x) {
+; CHECK-LABEL: fcmp_one2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  je {{LBB.+_1}}
+  %1 = fcmp one float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ord1(float %x) {
+; CHECK-LABEL: fcmp_ord1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp ord float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ord2(float %x) {
+; CHECK-LABEL: fcmp_ord2
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp ord float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uno1(float %x) {
+; CHECK-LABEL: fcmp_uno1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_2}}
+  %1 = fcmp uno float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uno2(float %x) {
+; CHECK-LABEL: fcmp_uno2
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_2}}
+  %1 = fcmp uno float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ueq1(float %x) {
+; CHECK-LABEL: fcmp_ueq1
+; CHECK-NOT:   ucomiss
+  %1 = fcmp ueq float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ueq2(float %x) {
+; CHECK-LABEL: fcmp_ueq2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  je {{LBB.+_2}}
+  %1 = fcmp ueq float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ugt1(float %x) {
+; CHECK-LABEL: fcmp_ugt1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jnp {{LBB.+_1}}
+  %1 = fcmp ugt float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ugt2(float %x) {
+; CHECK-LABEL: fcmp_ugt2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jae {{LBB.+_1}}
+  %1 = fcmp ugt float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uge1(float %x) {
+; CHECK-LABEL: fcmp_uge1
+; CHECK-NOT:   ucomiss
+  %1 = fcmp uge float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uge2(float %x) {
+; CHECK-LABEL: fcmp_uge2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  ja {{LBB.+_1}}
+  %1 = fcmp uge float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ult1(float %x) {
+; CHECK-LABEL: fcmp_ult1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jnp {{LBB.+_1}}
+  %1 = fcmp ult float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ult2(float %x) {
+; CHECK-LABEL: fcmp_ult2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jae {{LBB.+_1}}
+  %1 = fcmp ult float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ule1(float %x) {
+; CHECK-LABEL: fcmp_ule1
+; CHECK-NOT:   ucomiss
+  %1 = fcmp ule float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ule2(float %x) {
+; CHECK-LABEL: fcmp_ule2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  ja {{LBB.+_1}}
+  %1 = fcmp ule float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_une1(float %x) {
+; CHECK-LABEL: fcmp_une1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jnp {{LBB.+_1}}
+  %1 = fcmp une float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_une2(float %x) {
+; CHECK-LABEL: fcmp_une2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jne {{LBB.+_2}}
+; CHECK-NEXT:  jp {{LBB.+_2}}
+; CHECK-NEXT:  jmp {{LBB.+_1}}
+  %1 = fcmp une float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_eq(i32 %x) {
+; CHECK-LABEL: icmp_eq
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp eq i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ne(i32 %x) {
+; CHECK-LABEL: icmp_ne
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp ne i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ugt(i32 %x) {
+; CHECK-LABEL: icmp_ugt
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp ugt i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_uge(i32 %x) {
+; CHECK-LABEL: icmp_uge
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp uge i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ult(i32 %x) {
+; CHECK-LABEL: icmp_ult
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp ult i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ule(i32 %x) {
+; CHECK-LABEL: icmp_ule
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp ule i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sgt(i32 %x) {
+; CHECK-LABEL: icmp_sgt
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp sgt i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sge(i32 %x) {
+; CHECK-LABEL: icmp_sge
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp sge i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_slt(i32 %x) {
+; CHECK-LABEL: icmp_slt
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp slt i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sle(i32 %x) {
+; CHECK-LABEL: icmp_sle
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp sle i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
diff --git a/test/CodeGen/X86/fast-isel-cmp.ll b/test/CodeGen/X86/fast-isel-cmp.ll
index 61cc67a244e4..1b72cfcde657 100644
--- a/test/CodeGen/X86/fast-isel-cmp.ll
+++ b/test/CodeGen/X86/fast-isel-cmp.ll
@@ -270,3 +270,420 @@ define zeroext i1 @icmp_sle(i32 %x, i32 %y) {
   ret i1 %1
 }
 
+; Test cmp folding and condition optimization.
+define zeroext i1 @fcmp_oeq2(float %x) {
+; SDAG-LABEL: fcmp_oeq2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_oeq2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp oeq float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oeq3(float %x) {
+; SDAG-LABEL: fcmp_oeq3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  cmpeqss  %xmm1, %xmm0
+; SDAG-NEXT:  movd     %xmm0, %eax
+; SDAG-NEXT:  andl     $1, %eax
+; FAST-LABEL: fcmp_oeq3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  sete     %al
+; FAST-NEXT:  setnp    %cl
+; FAST-NEXT:  andb     %al, %cl
+  %1 = fcmp oeq float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ogt2(float %x) {
+; SDAG-LABEL: fcmp_ogt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: fcmp_ogt2
+; FAST:       xorl     %eax, %eax
+  %1 = fcmp ogt float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ogt3(float %x) {
+; SDAG-LABEL: fcmp_ogt3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  seta     %al
+; FAST-LABEL: fcmp_ogt3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  seta     %al
+  %1 = fcmp ogt float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oge2(float %x) {
+; SDAG-LABEL: fcmp_oge2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_oge2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp oge float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oge3(float %x) {
+; SDAG-LABEL: fcmp_oge3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setae    %al
+; FAST-LABEL: fcmp_oge3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setae    %al
+  %1 = fcmp oge float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_olt2(float %x) {
+; SDAG-LABEL: fcmp_olt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: fcmp_olt2
+; FAST:       xorl     %eax, %eax
+  %1 = fcmp olt float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_olt3(float %x) {
+; SDAG-LABEL: fcmp_olt3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  seta     %al
+; FAST-LABEL: fcmp_olt3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  seta     %al
+  %1 = fcmp olt float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ole2(float %x) {
+; SDAG-LABEL: fcmp_ole2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_ole2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp ole float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ole3(float %x) {
+; SDAG-LABEL: fcmp_ole3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setae    %al
+; FAST-LABEL: fcmp_ole3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setae    %al
+  %1 = fcmp ole float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_one2(float %x) {
+; SDAG-LABEL: fcmp_one2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: fcmp_one2
+; FAST:       xorl     %eax, %eax
+  %1 = fcmp one float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_one3(float %x) {
+; SDAG-LABEL: fcmp_one3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setne    %al
+; FAST-LABEL: fcmp_one3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setne    %al
+  %1 = fcmp one float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ord2(float %x) {
+; SDAG-LABEL: fcmp_ord2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_ord2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp ord float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ord3(float %x) {
+; SDAG-LABEL: fcmp_ord3
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_ord3
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp ord float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uno2(float %x) {
+; SDAG-LABEL: fcmp_uno2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_uno2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp uno float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uno3(float %x) {
+; SDAG-LABEL: fcmp_uno3
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_uno3
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp uno float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ueq2(float %x) {
+; SDAG-LABEL: fcmp_ueq2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: fcmp_ueq2
+; FAST:       movb     $1, %al
+  %1 = fcmp ueq float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ueq3(float %x) {
+; SDAG-LABEL: fcmp_ueq3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  sete     %al
+; FAST-LABEL: fcmp_ueq3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  sete     %al
+  %1 = fcmp ueq float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ugt2(float %x) {
+; SDAG-LABEL: fcmp_ugt2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_ugt2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp ugt float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ugt3(float %x) {
+; SDAG-LABEL: fcmp_ugt3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setb     %al
+; FAST-LABEL: fcmp_ugt3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setb     %al
+  %1 = fcmp ugt float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uge2(float %x) {
+; SDAG-LABEL: fcmp_uge2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: fcmp_uge2
+; FAST:       movb     $1, %al
+  %1 = fcmp uge float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uge3(float %x) {
+; SDAG-LABEL: fcmp_uge3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setbe    %al
+; FAST-LABEL: fcmp_uge3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setbe    %al
+  %1 = fcmp uge float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ult2(float %x) {
+; SDAG-LABEL: fcmp_ult2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_ult2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp ult float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ult3(float %x) {
+; SDAG-LABEL: fcmp_ult3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setb     %al
+; FAST-LABEL: fcmp_ult3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setb     %al
+  %1 = fcmp ult float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ule2(float %x) {
+; SDAG-LABEL: fcmp_ule2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: fcmp_ule2
+; FAST:       movb     $1, %al
+  %1 = fcmp ule float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ule3(float %x) {
+; SDAG-LABEL: fcmp_ule3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setbe    %al
+; FAST-LABEL: fcmp_ule3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setbe    %al
+  %1 = fcmp ule float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_une2(float %x) {
+; SDAG-LABEL: fcmp_une2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_une2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp une float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_une3(float %x) {
+; SDAG-LABEL: fcmp_une3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  cmpneqss %xmm1, %xmm0
+; SDAG-NEXT:  movd     %xmm0, %eax
+; SDAG-NEXT:  andl     $1, %eax
+; FAST-LABEL: fcmp_une3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setne    %al
+; FAST-NEXT:  setp     %cl
+; FAST-NEXT:  orb      %al, %cl
+  %1 = fcmp une float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_eq2(i32 %x) {
+; SDAG-LABEL: icmp_eq2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_eq2
+; FAST:       movb     $1, %al
+  %1 = icmp eq i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ne2(i32 %x) {
+; SDAG-LABEL: icmp_ne2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_ne2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp ne i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ugt2(i32 %x) {
+; SDAG-LABEL: icmp_ugt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_ugt2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp ugt i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_uge2(i32 %x) {
+; SDAG-LABEL: icmp_uge2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_uge2
+; FAST:       movb     $1, %al
+  %1 = icmp uge i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ult2(i32 %x) {
+; SDAG-LABEL: icmp_ult2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_ult2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp ult i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ule2(i32 %x) {
+; SDAG-LABEL: icmp_ule2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_ule2
+; FAST:       movb     $1, %al
+  %1 = icmp ule i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sgt2(i32 %x) {
+; SDAG-LABEL: icmp_sgt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_sgt2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp sgt i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sge2(i32 %x) {
+; SDAG-LABEL: icmp_sge2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_sge2
+; FAST:       movb     $1, %al
+  %1 = icmp sge i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_slt2(i32 %x) {
+; SDAG-LABEL: icmp_slt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_slt2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp slt i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sle2(i32 %x) {
+; SDAG-LABEL: icmp_sle2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_sle2
+; FAST:       movb     $1, %al
+  %1 = icmp sle i32 %x, %x
+  ret i1 %1
+}
+

From 41b33299cfb43b4bd637140e175f8ddd0050e690 Mon Sep 17 00:00:00 2001
From: Louis Gerbarg <lgg@apple.com>
Date: Tue, 17 Jun 2014 23:22:41 +0000
Subject: [PATCH 024/157] Allow X86FastIsel to cope with 64 bit absolute
 relocations

This patch is a follow up to r211040 & r211052. Rather than bailing out of fast
isel this patch will generate an alternate instruction (movabsq) instead of the
leaq. While this will always have enough room to handle the 64 bit displacment
it is generally over kill for internal symbols (most displacements will be
within 32 bits) but since we have no way of communicating the code model to the
the assmebler in order to avoid flagging an absolute leal/leaq as illegal when
using a symbolic displacement.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211130 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86FastISel.cpp              | 22 +++++++++++----------
 test/CodeGen/X86/x86-64-static-relo-movl.ll |  2 +-
 test/DebugInfo/X86/debug-loc-asan.ll        |  8 ++++----
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 37174a14b29d..9557d96d5309 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -2813,15 +2813,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
     return 0;
   }
 
-  // Materialize addresses with LEA instructions.
+  // Materialize addresses with LEA/MOV instructions.
   if (isa<GlobalValue>(C)) {
-    // LEA can only handle 32 bit immediates. Currently this happens pretty
-    // rarely, so rather than deal with it just bail out of fast isel. If any
-    // architectures endis up needing to use this path a lot then fast isel
-    // could get the address with a MOV64ri and use that to load the value.
-    if (TM.getRelocationModel() == Reloc::Static && Subtarget->is64Bit())
-      return false;
-
     X86AddressMode AM;
     if (X86SelectAddress(C, AM)) {
       // If the expression is just a basereg, then we're done, otherwise we need
@@ -2830,10 +2823,19 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
           AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
         return AM.Base.Reg;
 
-      Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
       unsigned ResultReg = createResultReg(RC);
-      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+      if (TM.getRelocationModel() == Reloc::Static &&
+          TLI.getPointerTy() == MVT::i64) {
+        // The displacement code be more than 32 bits away so we need to use
+        // an instruction with a 64 bit immediate
+        Opc = X86::MOV64ri;
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(Opc), ResultReg).addGlobalAddress(cast<GlobalValue>(C));
+      } else {
+        Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+        addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                              TII.get(Opc), ResultReg), AM);
+      }
       return ResultReg;
     }
     return 0;
diff --git a/test/CodeGen/X86/x86-64-static-relo-movl.ll b/test/CodeGen/X86/x86-64-static-relo-movl.ll
index b184df461968..71e52bb99191 100644
--- a/test/CodeGen/X86/x86-64-static-relo-movl.ll
+++ b/test/CodeGen/X86/x86-64-static-relo-movl.ll
@@ -17,7 +17,7 @@ define void @setup() {
 done:
   ret void
 
-  ; CHECK: movl $_NO_MATCH, {{.*}}
+  ; CHECK: movabsq $_NO_MATCH, {{.*}}
 }
 
 ; Function Attrs: nounwind
diff --git a/test/DebugInfo/X86/debug-loc-asan.ll b/test/DebugInfo/X86/debug-loc-asan.ll
index 0e02c6787175..746b1dcaa3eb 100644
--- a/test/DebugInfo/X86/debug-loc-asan.ll
+++ b/test/DebugInfo/X86/debug-loc-asan.ll
@@ -9,11 +9,11 @@
 ; }
 ; with "clang++ -S -emit-llvm -fsanitize=address -O0 -g test.cc"
 
-; First, argument variable "y" resides in %rdx:
-; CHECK: DEBUG_VALUE: bar:y <- RDX
+; First, argument variable "y" resides in %rdi:
+; CHECK: DEBUG_VALUE: bar:y <- RDI
 
 ; Then its address is stored in a location on a stack:
-; CHECK: movq %rdx, [[OFFSET:[0-9]+]](%rsp)
+; CHECK: movq %rdi, [[OFFSET:[0-9]+]](%rsp)
 ; CHECK-NEXT: [[START_LABEL:.Ltmp[0-9]+]]
 ; CHECK-NEXT: DEBUG_VALUE: bar:y <- [RSP+[[OFFSET]]]
 ; This location should be valid until the end of the function.
@@ -26,7 +26,7 @@
 ; CHECK-NEXT: .quad .Lset{{[0-9]+}}
 ; CHECK-NEXT: .Lset{{[0-9]+}} = [[START_LABEL]]-.Lfunc_begin0
 ; CHECK-NEXT: .quad .Lset{{[0-9]+}}
-; CHECK: DW_OP_reg1
+; CHECK: DW_OP_reg5
 
 ; Then it's addressed via %rsp:
 ; CHECK:      .Lset{{[0-9]+}} = [[START_LABEL]]-.Lfunc_begin0

From 868e579be6c533836731500b47fc6d6d2e8e5314 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Tue, 17 Jun 2014 23:27:32 +0000
Subject: [PATCH 025/157] Add the coverage cflags to the link step as well to
 make sure that we link in the support libraries.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211131 91177308-0d34-0410-b5e6-96231b3b80d8
---
 Makefile.rules | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile.rules b/Makefile.rules
index 941797107c78..ff0a3e3f8191 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -377,6 +377,7 @@ ifeq ($(ENABLE_COVERAGE),1)
   BuildMode := $(BuildMode)+Coverage
   CXX.Flags += -ftest-coverage -fprofile-arcs
   C.Flags   += -ftest-coverage -fprofile-arcs
+  LD.Flags   += -ftest-coverage -fprofile-arcs
 endif
 
 # If DISABLE_ASSERTIONS=1 is specified (make command line or configured),

From 10d664fee72917960c8bf38cc2ec8e3b84924044 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Wed, 18 Jun 2014 05:05:13 +0000
Subject: [PATCH 026/157] Replace some assert(0)'s with llvm_unreachable.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211141 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Bitcode/Reader/BitstreamReader.cpp                      | 4 ++--
 lib/Linker/LinkModules.cpp                                  | 2 +-
 lib/Target/AArch64/AArch64AsmPrinter.cpp                    | 2 +-
 lib/Target/AArch64/AArch64BranchRelaxation.cpp              | 6 +++---
 lib/Target/AArch64/AArch64ISelDAGToDAG.cpp                  | 2 +-
 lib/Target/AArch64/AArch64ISelLowering.cpp                  | 4 ++--
 lib/Target/AArch64/AArch64InstrInfo.cpp                     | 2 +-
 lib/Target/AArch64/AArch64MCInstLower.cpp                   | 4 ++--
 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp       | 4 ++--
 lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp       | 2 +-
 lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp | 2 +-
 lib/Target/ARM/ARMInstrInfo.td                              | 2 +-
 lib/Target/ARM/ARMMachineFunctionInfo.h                     | 2 +-
 lib/Target/NVPTX/NVPTXAsmPrinter.cpp                        | 6 +++---
 lib/Target/Sparc/AsmParser/SparcAsmParser.cpp               | 2 +-
 lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp           | 2 +-
 lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp               | 2 +-
 lib/Target/Sparc/SparcJITInfo.cpp                           | 3 ++-
 utils/TableGen/IntrinsicEmitter.cpp                         | 2 +-
 19 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index f31e1fa9b173..72451ec9500c 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -97,7 +97,7 @@ void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op,
   switch (Op.getEncoding()) {
   case BitCodeAbbrevOp::Array:
   case BitCodeAbbrevOp::Blob:
-    assert(0 && "Should not reach here");
+    llvm_unreachable("Should not reach here");
   case BitCodeAbbrevOp::Fixed:
     Vals.push_back(Read((unsigned)Op.getEncodingData()));
     break;
@@ -117,7 +117,7 @@ void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) {
   switch (Op.getEncoding()) {
   case BitCodeAbbrevOp::Array:
   case BitCodeAbbrevOp::Blob:
-    assert(0 && "Should not reach here");
+    llvm_unreachable("Should not reach here");
   case BitCodeAbbrevOp::Fixed:
     (void)Read((unsigned)Op.getEncodingData());
     break;
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 8e205bb621e9..688b13f228d0 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -1141,7 +1141,7 @@ bool ModuleLinker::linkModuleFlagsMetadata() {
     // Perform the merge for standard behavior types.
     switch (SrcBehaviorValue) {
     case Module::Require:
-    case Module::Override: assert(0 && "not possible"); break;
+    case Module::Override: llvm_unreachable("not possible");
     case Module::Error: {
       // Emit an error if the values differ.
       if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index c3ee9bbb8179..cd94e244dc38 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -211,7 +211,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
   const MachineOperand &MO = MI->getOperand(OpNum);
   switch (MO.getType()) {
   default:
-    assert(0 && "<unknown operand type>");
+    llvm_unreachable("<unknown operand type>");
   case MachineOperand::MO_Register: {
     unsigned Reg = MO.getReg();
     assert(TargetRegisterInfo::isPhysicalRegister(Reg));
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index 52094526727d..484e7e8ffce4 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -291,7 +291,7 @@ static bool isConditionalBranch(unsigned Opc) {
 static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default:
-    assert(0 && "unexpected opcode!");
+    llvm_unreachable("unexpected opcode!");
   case AArch64::TBZW:
   case AArch64::TBNZW:
   case AArch64::TBZX:
@@ -309,7 +309,7 @@ static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
 static unsigned getOppositeConditionOpcode(unsigned Opc) {
   switch (Opc) {
   default:
-    assert(0 && "unexpected opcode!");
+    llvm_unreachable("unexpected opcode!");
   case AArch64::TBNZW:   return AArch64::TBZW;
   case AArch64::TBNZX:   return AArch64::TBZX;
   case AArch64::TBZW:    return AArch64::TBNZW;
@@ -325,7 +325,7 @@ static unsigned getOppositeConditionOpcode(unsigned Opc) {
 static unsigned getBranchDisplacementBits(unsigned Opc) {
   switch (Opc) {
   default:
-    assert(0 && "unexpected opcode!");
+    llvm_unreachable("unexpected opcode!");
   case AArch64::TBNZW:
   case AArch64::TBZW:
   case AArch64::TBNZX:
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7dbcb7bf5ee2..98609760a73a 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2108,7 +2108,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
                 .getVectorElementType()
                 .getSizeInBits()) {
     default:
-      assert(0 && "Unexpected vector element type!");
+      llvm_unreachable("Unexpected vector element type!");
     case 64:
       SubReg = AArch64::dsub;
       break;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7a2c9c95b615..ec10968874e5 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1273,7 +1273,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
   bool ExtraOp = false;
   switch (Op.getOpcode()) {
   default:
-    assert(0 && "Invalid code");
+    llvm_unreachable("Invalid code");
   case ISD::ADDC:
     Opc = AArch64ISD::ADDS;
     break;
@@ -6674,7 +6674,7 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
     else if (Vec.getValueType() == MVT::v2i64)
       VecResTy = MVT::v2f64;
     else
-      assert(0 && "unexpected vector type!");
+      llvm_unreachable("unexpected vector type!");
 
     SDValue Convert =
         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index cabf9ea87265..ce85b2ceba95 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1841,7 +1841,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
     *OutUnscaledOp = 0;
   switch (MI.getOpcode()) {
   default:
-    assert(0 && "unhandled opcode in rewriteAArch64FrameIndex");
+    llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
   // Vector spills/fills can't take an immediate offset.
   case AArch64::LD1Twov2d:
   case AArch64::LD1Threev2d:
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index ab6d37532a70..75a17b9dc999 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -51,7 +51,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
              AArch64II::MO_PAGEOFF)
       RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
     else
-      assert(0 && "Unexpected target flags with MO_GOT on GV operand");
+      llvm_unreachable("Unexpected target flags with MO_GOT on GV operand");
   } else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) {
     if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
       RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
@@ -154,7 +154,7 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
                                       MCOperand &MCOp) const {
   switch (MO.getType()) {
   default:
-    assert(0 && "unknown operand type");
+    llvm_unreachable("unknown operand type");
   case MachineOperand::MO_Register:
     // Ignore all implicit register operands.
     if (MO.isImplicit())
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index f484a5b1bdcc..8a21f0650cdc 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -918,7 +918,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
     else
       O << getRegisterName(Reg);
   } else
-    assert(0 && "unknown operand kind in printPostIncOperand64");
+    llvm_unreachable("unknown operand kind in printPostIncOperand64");
 }
 
 void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
@@ -1109,7 +1109,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
   while (Stride--) {
     switch (Reg) {
     default:
-      assert(0 && "Vector register expected!");
+      llvm_unreachable("Vector register expected!");
     case AArch64::Q0:  Reg = AArch64::Q1;  break;
     case AArch64::Q1:  Reg = AArch64::Q2;  break;
     case AArch64::Q2:  Reg = AArch64::Q3;  break;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index d8900d4fceb2..a917616f142d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -86,7 +86,7 @@ class AArch64AsmBackend : public MCAsmBackend {
 static unsigned getFixupKindNumBytes(unsigned Kind) {
   switch (Kind) {
   default:
-    assert(0 && "Unknown fixup kind!");
+    llvm_unreachable("Unknown fixup kind!");
 
   case AArch64::fixup_aarch64_tlsdesc_call:
     return 0;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 5c86189a6ef5..ba9536676e12 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -75,7 +75,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
     Log2Size = llvm::Log2_32(4);
     switch (Sym->getKind()) {
     default:
-      assert(0 && "Unexpected symbol reference variant kind!");
+      llvm_unreachable("Unexpected symbol reference variant kind!");
     case MCSymbolRefExpr::VK_PAGEOFF:
       RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12);
       return true;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index b44a19b01c5f..03eac2e0c84f 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -493,7 +493,7 @@ def neon_vcvt_imm32 : Operand<i32> {
 // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
 def rot_imm_XFORM: SDNodeXForm<imm, [{
   switch (N->getZExtValue()){
-  default: assert(0);
+  default: llvm_unreachable(nullptr);
   case 0:  return CurDAG->getTargetConstant(0, MVT::i32);
   case 8:  return CurDAG->getTargetConstant(1, MVT::i32);
   case 16: return CurDAG->getTargetConstant(2, MVT::i32);
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index d7ec6eba941c..7934761af25b 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -220,7 +220,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
 
   void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
     if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
-      assert(0 && "Duplicate entries!");
+      llvm_unreachable("Duplicate entries!");
   }
 
   unsigned getOriginalCPIdx(unsigned CloneIdx) const {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index e6cbf643596c..195b3c0fe9ed 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1438,7 +1438,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
         O << "linear";
         break;
       case 2:
-        assert(0 && "Anisotropic filtering is not supported");
+        llvm_unreachable("Anisotropic filtering is not supported");
       default:
         O << "nearest";
         break;
@@ -1562,7 +1562,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
       }
       break;
     default:
-      assert(0 && "type not supported yet");
+      llvm_unreachable("type not supported yet");
     }
 
   }
@@ -1682,7 +1682,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
     O << "]";
     break;
   default:
-    assert(0 && "type not supported yet");
+    llvm_unreachable("type not supported yet");
   }
   return;
 }
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 79075b06bdcd..9df005401897 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -330,7 +330,7 @@ class SparcOperand : public MCParsedAsmOperand {
     unsigned Reg = Op.getReg();
     unsigned regIdx = 0;
     switch (Op.Reg.Kind) {
-    default: assert(0 && "Unexpected register kind!");
+    default: llvm_unreachable("Unexpected register kind!");
     case rk_FloatReg:
       regIdx = Reg - Sparc::F0;
       if (regIdx % 4 || regIdx > 31)
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 7d517b699b22..0fbac218cb6b 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -201,7 +201,7 @@ namespace {
     }
     void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
       // FIXME.
-      assert(0 && "relaxInstruction() unimplemented");
+      llvm_unreachable("relaxInstruction() unimplemented");
     }
 
     bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index ae57fdc4eebd..3ccdd038fb33 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -124,7 +124,7 @@ SparcMCExpr::VariantKind SparcMCExpr::parseVariantKind(StringRef name)
 
 Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) {
   switch (Kind) {
-  default:           assert(0 && "Unhandled SparcMCExpr::VariantKind");
+  default: llvm_unreachable("Unhandled SparcMCExpr::VariantKind");
   case VK_Sparc_LO:       return Sparc::fixup_sparc_lo10;
   case VK_Sparc_HI:       return Sparc::fixup_sparc_hi22;
   case VK_Sparc_H44:      return Sparc::fixup_sparc_h44;
diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp
index c775e9e83ee6..d0eec98b5e91 100644
--- a/lib/Target/Sparc/SparcJITInfo.cpp
+++ b/lib/Target/Sparc/SparcJITInfo.cpp
@@ -213,7 +213,8 @@ extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) {
 
 
 void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
-  assert(0 && "FIXME: Implement SparcJITInfo::replaceMachineCodeForFunction");
+  llvm_unreachable("FIXME: Implement SparcJITInfo::"
+                   "replaceMachineCodeForFunction");
 }
 
 
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 1927ad923699..5dba9514283d 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -380,7 +380,7 @@ static void ComputeFixedEncoding(const CodeGenIntrinsic &Int,
       case 3: TypeSig.push_back(IIT_STRUCT3); break;
       case 4: TypeSig.push_back(IIT_STRUCT4); break;
       case 5: TypeSig.push_back(IIT_STRUCT5); break;
-      default: assert(0 && "Unhandled case in struct");
+      default: llvm_unreachable("Unhandled case in struct");
     }
 
     for (unsigned i = 0, e = Int.IS.RetVTs.size(); i != e; ++i)

From 74287ec34c1443e462aba573c6e8f7d7f9defa35 Mon Sep 17 00:00:00 2001
From: Kevin Qin <Kevin.Qin@arm.com>
Date: Wed, 18 Jun 2014 05:54:42 +0000
Subject: [PATCH 027/157] [AArch64] Fix a pattern match failure caused by
 creating improper CONCAT_VECTOR.

ReconstructShuffle() may wrongly creat a CONCAT_VECTOR trying to
concat 2 of v2i32 into v4i16. This commit is to fix this issue and
try to generate UZP1 instead of lots of MOV and INS.
Patch is initalized by Kevin Qin, and refactored by Tim Northover.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211144 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64ISelLowering.cpp  | 66 ++++++++++++---------
 test/CodeGen/AArch64/arm64-convert-v4f64.ll | 33 +++++++++++
 2 files changed, 72 insertions(+), 27 deletions(-)
 create mode 100644 test/CodeGen/AArch64/arm64-convert-v4f64.ll

diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index ec10968874e5..e45ca4dbc0d9 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4110,6 +4110,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
 // shuffle in combination with VEXTs.
 SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
                                                   SelectionDAG &DAG) const {
+  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
@@ -4158,35 +4159,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
 
   SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
   int VEXTOffsets[2] = { 0, 0 };
+  int OffsetMultipliers[2] = { 1, 1 };
 
   // This loop extracts the usage patterns of the source vectors
   // and prepares appropriate SDValues for a shuffle if possible.
   for (unsigned i = 0; i < SourceVecs.size(); ++i) {
-    if (SourceVecs[i].getValueType() == VT) {
+    unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
+    SDValue CurSource = SourceVecs[i];
+    if (SourceVecs[i].getValueType().getVectorElementType() !=
+        VT.getVectorElementType()) {
+      // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
+      // Then bitcast it to the vector which holds asserted element type,
+      // and record the multiplier of element width between SourceVecs and
+      // Build_vector which is needed to extract the correct lanes later.
+      EVT CastVT =
+          EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+                           SourceVecs[i].getValueSizeInBits() /
+                               VT.getVectorElementType().getSizeInBits());
+
+      CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
+      OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
+      NumSrcElts *= OffsetMultipliers[i];
+      MaxElts[i] *= OffsetMultipliers[i];
+      MinElts[i] *= OffsetMultipliers[i];
+    }
+
+    if (CurSource.getValueType() == VT) {
       // No VEXT necessary
-      ShuffleSrcs[i] = SourceVecs[i];
+      ShuffleSrcs[i] = CurSource;
       VEXTOffsets[i] = 0;
       continue;
-    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+    } else if (NumSrcElts < NumElts) {
       // We can pad out the smaller vector for free, so if it's part of a
       // shuffle...
-      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
-                                   DAG.getUNDEF(SourceVecs[i].getValueType()));
+      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
+                                   DAG.getUNDEF(CurSource.getValueType()));
       continue;
     }
 
-    // Don't attempt to extract subvectors from BUILD_VECTOR sources
-    // that expand or trunc the original value.
-    // TODO: We can try to bitcast and ANY_EXTEND the result but
-    // we need to consider the cost of vector ANY_EXTEND, and the
-    // legality of all the types.
-    if (SourceVecs[i].getValueType().getVectorElementType() !=
-        VT.getVectorElementType())
-      return SDValue();
-
     // Since only 64-bit and 128-bit vectors are legal on ARM and
     // we've eliminated the other cases...
-    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
+    assert(NumSrcElts == 2 * NumElts &&
            "unexpected vector sizes in ReconstructShuffle");
 
     if (MaxElts[i] - MinElts[i] >= NumElts) {
@@ -4197,22 +4210,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     if (MinElts[i] >= NumElts) {
       // The extraction can just take the second half
       VEXTOffsets[i] = NumElts;
-      ShuffleSrcs[i] =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
-                      DAG.getIntPtrConstant(NumElts));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                   DAG.getIntPtrConstant(NumElts));
     } else if (MaxElts[i] < NumElts) {
       // The extraction can just take the first half
       VEXTOffsets[i] = 0;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                   SourceVecs[i], DAG.getIntPtrConstant(0));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                   DAG.getIntPtrConstant(0));
     } else {
       // An actual VEXT is needed
       VEXTOffsets[i] = MinElts[i];
-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                     SourceVecs[i], DAG.getIntPtrConstant(0));
-      SDValue VEXTSrc2 =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
-                      DAG.getIntPtrConstant(NumElts));
+      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                     DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                     DAG.getIntPtrConstant(NumElts));
       unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
       ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
                                    DAG.getConstant(Imm, MVT::i32));
@@ -4232,9 +4243,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     int ExtractElt =
         cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
     if (ExtractVec == SourceVecs[0]) {
-      Mask.push_back(ExtractElt - VEXTOffsets[0]);
+      Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
     } else {
-      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+      Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
+                     VEXTOffsets[1]);
     }
   }
 
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
new file mode 100644
index 000000000000..6bfb0a54ea3a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+
+
+define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i16
+; CHECK-DAG: fcvtzs  v[[LHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs  v[[RHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: xtn  v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn  v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
+; CHECK:     uzp1  v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
+  %tmp1 = load <4 x double>* %ptr
+  %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
+  ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i8
+; CHECK-DAG:  fcvtzs  v[[CONV3:[0-9]+]].2d, v3.2d
+; CHECK-DAG:  fcvtzs  v[[CONV2:[0-9]+]].2d, v2.2d
+; CHECK-DAG:  fcvtzs  v[[CONV1:[0-9]+]].2d, v1.2d
+; CHECK-DAG:  fcvtzs  v[[CONV0:[0-9]+]].2d, v0.2d
+; CHECK-DAG:  xtn  v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
+; CHECK-DAG:  xtn  v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
+; CHECK-DAG:  xtn  v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
+; CHECK-DAG:  xtn  v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
+; CHECK-DAG:  uzp1  v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
+; CHECK-DAG:  uzp1  v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
+; CHECK:      uzp1  v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
+  %tmp1 = load <8 x double>* %ptr
+  %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
+  ret <8 x i8> %tmp2
+}
+

From 1d04a2324c4e86320b5452f5262782c569cd3315 Mon Sep 17 00:00:00 2001
From: JF Bastien <jfb@google.com>
Date: Wed, 18 Jun 2014 06:23:25 +0000
Subject: [PATCH 028/157] Random Number Generator (llvm)

Summary:
Provides an abstraction for a random number generator (RNG) that produces a stream of pseudo-random numbers.
The current implementation uses C++11 facilities and is therefore not cryptographically secure.

The RNG is salted with the text of the current command line invocation.
In addition, a user may specify a seed (reproducible builds).

In clang, the seed can be set via
  -frandom-seed=X
In the back end, the seed can be set via
  -rng-seed=X

This is the llvm part of the patch.
clang part: D3391

Reviewers: ahomescu, rinon, nicholas, jfb

Reviewed By: jfb

Subscribers: jfb, perl

Differential Revision: http://reviews.llvm.org/D3390

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211145 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/IR/Module.h                     |  8 +++
 include/llvm/Support/RandomNumberGenerator.h | 53 ++++++++++++++++++++
 lib/IR/Module.cpp                            | 15 +++++-
 lib/Support/CMakeLists.txt                   |  1 +
 lib/Support/RandomNumberGenerator.cpp        | 52 +++++++++++++++++++
 5 files changed, 128 insertions(+), 1 deletion(-)
 create mode 100644 include/llvm/Support/RandomNumberGenerator.h
 create mode 100644 lib/Support/RandomNumberGenerator.cpp

diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index e2b86f62fb8d..381d9927c251 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -29,6 +29,7 @@ namespace llvm {
 class FunctionType;
 class GVMaterializer;
 class LLVMContext;
+class RandomNumberGenerator;
 class StructType;
 template<typename T> struct DenseMapInfo;
 template<typename KeyT, typename ValueT, typename KeyInfoT> class DenseMap;
@@ -201,6 +202,8 @@ class Module {
   std::string ModuleID;           ///< Human readable identifier for the module
   std::string TargetTriple;       ///< Platform target triple Module compiled on
   void *NamedMDSymTab;            ///< NamedMDNode names.
+  // Allow lazy initialization in const method.
+  mutable RandomNumberGenerator *RNG; ///< The random number generator for this module.
 
   // We need to keep the string because the C API expects us to own the string
   // representation.
@@ -249,6 +252,11 @@ class Module {
   /// @returns a string containing the module-scope inline assembly blocks.
   const std::string &getModuleInlineAsm() const { return GlobalScopeAsm; }
 
+  /// Get the RandomNumberGenerator for this module. The RNG can be
+  /// seeded via -rng-seed=<uint64> and is salted with the ModuleID.
+  /// The returned RNG should not be shared across threads.
+  RandomNumberGenerator &getRNG() const;
+
 /// @}
 /// @name Module Level Mutators
 /// @{
diff --git a/include/llvm/Support/RandomNumberGenerator.h b/include/llvm/Support/RandomNumberGenerator.h
new file mode 100644
index 000000000000..84fcdcbc76ad
--- /dev/null
+++ b/include/llvm/Support/RandomNumberGenerator.h
@@ -0,0 +1,53 @@
+//==- llvm/Support/RandomNumberGenerator.h - RNG for diversity ---*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an abstraction for random number generation (RNG).
+// Note that the current implementation is not cryptographically secure
+// as it uses the C++11 <random> facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RANDOMNUMBERGENERATOR_H_
+#define LLVM_SUPPORT_RANDOMNUMBERGENERATOR_H_
+
+#include "llvm/ADT/StringRef.h"
+#include <random>
+
+namespace llvm {
+
+/// A random number generator.
+/// Instances of this class should not be shared across threads.
+class RandomNumberGenerator {
+public:
+  /// Seeds and salts the underlying RNG engine. The salt of type StringRef
+  /// is passed into the constructor. The seed can be set on the command
+  /// line via -rng-seed=<uint64>.
+  /// The reason for the salt is to ensure different random streams even if
+  /// the same seed is used for multiple invocations of the compiler.
+  /// A good salt value should add additional entropy and be constant across
+  /// different machines (i.e., no paths) to allow for reproducible builds.
+  /// An instance of this class can be retrieved from the current Module.
+  /// \see Module::getRNG
+  RandomNumberGenerator(StringRef Salt);
+
+  /// Returns a random number in the range [0, Max).
+  uint64_t next(uint64_t Max);
+
+private:
+  // 64-bit Mersenne Twister by Matsumoto and Nishimura, 2000
+  // http://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine
+  std::mt19937_64 Generator;
+
+  // Noncopyable.
+  RandomNumberGenerator(const RandomNumberGenerator &other) = delete;
+  RandomNumberGenerator &operator=(const RandomNumberGenerator &other) = delete;
+};
+}
+
+#endif
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 6a5b386c199f..0f73d44ce455 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -24,6 +24,8 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LeakDetector.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/RandomNumberGenerator.h"
 #include <algorithm>
 #include <cstdarg>
 #include <cstdlib>
@@ -44,7 +46,7 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
 //
 
 Module::Module(StringRef MID, LLVMContext &C)
-    : Context(C), Materializer(), ModuleID(MID), DL("") {
+    : Context(C), Materializer(), ModuleID(MID), RNG(nullptr), DL("") {
   ValSymTab = new ValueSymbolTable();
   NamedMDSymTab = new StringMap<NamedMDNode *>();
   Context.addModule(this);
@@ -59,6 +61,7 @@ Module::~Module() {
   NamedMDList.clear();
   delete ValSymTab;
   delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
+  delete RNG;
 }
 
 /// getNamedValue - Return the first global value in the module with
@@ -355,6 +358,16 @@ const DataLayout *Module::getDataLayout() const {
   return &DL;
 }
 
+// We want reproducible builds, but ModuleID may be a full path so we just use
+// the filename to salt the RNG (although it is not guaranteed to be unique).
+RandomNumberGenerator &Module::getRNG() const {
+  if (RNG == nullptr) {
+    StringRef Salt = sys::path::filename(ModuleID);
+    RNG = new RandomNumberGenerator(Salt);
+  }
+  return *RNG;
+}
+
 //===----------------------------------------------------------------------===//
 // Methods to control the materialization of GlobalValues in the Module.
 //
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 2438d729d8de..eac95c262b53 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -41,6 +41,7 @@ add_llvm_library(LLVMSupport
   MD5.cpp
   PluginLoader.cpp
   PrettyStackTrace.cpp
+  RandomNumberGenerator.cpp
   Regex.cpp
   SmallPtrSet.cpp
   SmallVector.cpp
diff --git a/lib/Support/RandomNumberGenerator.cpp b/lib/Support/RandomNumberGenerator.cpp
new file mode 100644
index 000000000000..5513982e7b13
--- /dev/null
+++ b/lib/Support/RandomNumberGenerator.cpp
@@ -0,0 +1,52 @@
+//===-- RandomNumberGenerator.cpp - Implement RNG class -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements random number generation (RNG).
+// The current implementation is NOT cryptographically secure as it uses
+// the C++11 <random> facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "rng"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+// Tracking BUG: 19665
+// http://llvm.org/bugs/show_bug.cgi?id=19665
+//
+// Do not change to cl::opt<uint64_t> since this silently breaks argument parsing.
+static cl::opt<unsigned long long>
+Seed("rng-seed", cl::value_desc("seed"),
+     cl::desc("Seed for the random number generator"), cl::init(0));
+
+RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) {
+  DEBUG(
+    if (Seed == 0)
+      errs() << "Warning! Using unseeded random number generator.\n"
+  );
+
+  // Combine seed and salt using std::seed_seq.
+  // Entropy: Seed-low, Seed-high, Salt...
+  size_t Size = Salt.size() + 2;
+  uint32_t Data[Size];
+  Data[0] = Seed;
+  Data[1] = Seed >> 32;
+  std::copy_n(Salt.begin(), Salt.size(), Data + 2);
+
+  std::seed_seq SeedSeq(Data, Data + Size);
+  Generator.seed(SeedSeq);
+}
+
+uint64_t RandomNumberGenerator::next(uint64_t Max) {
+  std::uniform_int_distribution<uint64_t> distribution(0, Max - 1);
+  return distribution(Generator);
+}

From 02b4e6e7ab1ad79336d9f23f3644c90aecc8457a Mon Sep 17 00:00:00 2001
From: JF Bastien <jfb@google.com>
Date: Wed, 18 Jun 2014 06:33:23 +0000
Subject: [PATCH 029/157] Revert "Random Number Generator (llvm)"

This reverts commit cccba093090d127e0b6d17473b14c264c14c5259.

It causes build breakage.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211146 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/IR/Module.h                     |  8 ---
 include/llvm/Support/RandomNumberGenerator.h | 53 --------------------
 lib/IR/Module.cpp                            | 15 +-----
 lib/Support/CMakeLists.txt                   |  1 -
 lib/Support/RandomNumberGenerator.cpp        | 52 -------------------
 5 files changed, 1 insertion(+), 128 deletions(-)
 delete mode 100644 include/llvm/Support/RandomNumberGenerator.h
 delete mode 100644 lib/Support/RandomNumberGenerator.cpp

diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index 381d9927c251..e2b86f62fb8d 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -29,7 +29,6 @@ namespace llvm {
 class FunctionType;
 class GVMaterializer;
 class LLVMContext;
-class RandomNumberGenerator;
 class StructType;
 template<typename T> struct DenseMapInfo;
 template<typename KeyT, typename ValueT, typename KeyInfoT> class DenseMap;
@@ -202,8 +201,6 @@ class Module {
   std::string ModuleID;           ///< Human readable identifier for the module
   std::string TargetTriple;       ///< Platform target triple Module compiled on
   void *NamedMDSymTab;            ///< NamedMDNode names.
-  // Allow lazy initialization in const method.
-  mutable RandomNumberGenerator *RNG; ///< The random number generator for this module.
 
   // We need to keep the string because the C API expects us to own the string
   // representation.
@@ -252,11 +249,6 @@ class Module {
   /// @returns a string containing the module-scope inline assembly blocks.
   const std::string &getModuleInlineAsm() const { return GlobalScopeAsm; }
 
-  /// Get the RandomNumberGenerator for this module. The RNG can be
-  /// seeded via -rng-seed=<uint64> and is salted with the ModuleID.
-  /// The returned RNG should not be shared across threads.
-  RandomNumberGenerator &getRNG() const;
-
 /// @}
 /// @name Module Level Mutators
 /// @{
diff --git a/include/llvm/Support/RandomNumberGenerator.h b/include/llvm/Support/RandomNumberGenerator.h
deleted file mode 100644
index 84fcdcbc76ad..000000000000
--- a/include/llvm/Support/RandomNumberGenerator.h
+++ /dev/null
@@ -1,53 +0,0 @@
-//==- llvm/Support/RandomNumberGenerator.h - RNG for diversity ---*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an abstraction for random number generation (RNG).
-// Note that the current implementation is not cryptographically secure
-// as it uses the C++11 <random> facilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_RANDOMNUMBERGENERATOR_H_
-#define LLVM_SUPPORT_RANDOMNUMBERGENERATOR_H_
-
-#include "llvm/ADT/StringRef.h"
-#include <random>
-
-namespace llvm {
-
-/// A random number generator.
-/// Instances of this class should not be shared across threads.
-class RandomNumberGenerator {
-public:
-  /// Seeds and salts the underlying RNG engine. The salt of type StringRef
-  /// is passed into the constructor. The seed can be set on the command
-  /// line via -rng-seed=<uint64>.
-  /// The reason for the salt is to ensure different random streams even if
-  /// the same seed is used for multiple invocations of the compiler.
-  /// A good salt value should add additional entropy and be constant across
-  /// different machines (i.e., no paths) to allow for reproducible builds.
-  /// An instance of this class can be retrieved from the current Module.
-  /// \see Module::getRNG
-  RandomNumberGenerator(StringRef Salt);
-
-  /// Returns a random number in the range [0, Max).
-  uint64_t next(uint64_t Max);
-
-private:
-  // 64-bit Mersenne Twister by Matsumoto and Nishimura, 2000
-  // http://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine
-  std::mt19937_64 Generator;
-
-  // Noncopyable.
-  RandomNumberGenerator(const RandomNumberGenerator &other) = delete;
-  RandomNumberGenerator &operator=(const RandomNumberGenerator &other) = delete;
-};
-}
-
-#endif
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 0f73d44ce455..6a5b386c199f 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -24,8 +24,6 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LeakDetector.h"
 #include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/RandomNumberGenerator.h"
 #include <algorithm>
 #include <cstdarg>
 #include <cstdlib>
@@ -46,7 +44,7 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
 //
 
 Module::Module(StringRef MID, LLVMContext &C)
-    : Context(C), Materializer(), ModuleID(MID), RNG(nullptr), DL("") {
+    : Context(C), Materializer(), ModuleID(MID), DL("") {
   ValSymTab = new ValueSymbolTable();
   NamedMDSymTab = new StringMap<NamedMDNode *>();
   Context.addModule(this);
@@ -61,7 +59,6 @@ Module::~Module() {
   NamedMDList.clear();
   delete ValSymTab;
   delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
-  delete RNG;
 }
 
 /// getNamedValue - Return the first global value in the module with
@@ -358,16 +355,6 @@ const DataLayout *Module::getDataLayout() const {
   return &DL;
 }
 
-// We want reproducible builds, but ModuleID may be a full path so we just use
-// the filename to salt the RNG (although it is not guaranteed to be unique).
-RandomNumberGenerator &Module::getRNG() const {
-  if (RNG == nullptr) {
-    StringRef Salt = sys::path::filename(ModuleID);
-    RNG = new RandomNumberGenerator(Salt);
-  }
-  return *RNG;
-}
-
 //===----------------------------------------------------------------------===//
 // Methods to control the materialization of GlobalValues in the Module.
 //
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index eac95c262b53..2438d729d8de 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -41,7 +41,6 @@ add_llvm_library(LLVMSupport
   MD5.cpp
   PluginLoader.cpp
   PrettyStackTrace.cpp
-  RandomNumberGenerator.cpp
   Regex.cpp
   SmallPtrSet.cpp
   SmallVector.cpp
diff --git a/lib/Support/RandomNumberGenerator.cpp b/lib/Support/RandomNumberGenerator.cpp
deleted file mode 100644
index 5513982e7b13..000000000000
--- a/lib/Support/RandomNumberGenerator.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- RandomNumberGenerator.cpp - Implement RNG class -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements random number generation (RNG).
-// The current implementation is NOT cryptographically secure as it uses
-// the C++11 <random> facilities.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "rng"
-#include "llvm/Support/RandomNumberGenerator.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-// Tracking BUG: 19665
-// http://llvm.org/bugs/show_bug.cgi?id=19665
-//
-// Do not change to cl::opt<uint64_t> since this silently breaks argument parsing.
-static cl::opt<unsigned long long>
-Seed("rng-seed", cl::value_desc("seed"),
-     cl::desc("Seed for the random number generator"), cl::init(0));
-
-RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) {
-  DEBUG(
-    if (Seed == 0)
-      errs() << "Warning! Using unseeded random number generator.\n"
-  );
-
-  // Combine seed and salt using std::seed_seq.
-  // Entropy: Seed-low, Seed-high, Salt...
-  size_t Size = Salt.size() + 2;
-  uint32_t Data[Size];
-  Data[0] = Seed;
-  Data[1] = Seed >> 32;
-  std::copy_n(Salt.begin(), Salt.size(), Data + 2);
-
-  std::seed_seq SeedSeq(Data, Data + Size);
-  Generator.seed(SeedSeq);
-}
-
-uint64_t RandomNumberGenerator::next(uint64_t Max) {
-  std::uniform_int_distribution<uint64_t> distribution(0, Max - 1);
-  return distribution(Generator);
-}

From 42469f61f5ea6ce6dafcabfe0dffdd19dc4ab527 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 18 Jun 2014 08:47:09 +0000
Subject: [PATCH 030/157] [llvm-readobj][ELF] New `-mips-plt-got` command line
 option to output MIPS GOT section.

Patch reviewed by Rafael Espindola.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211150 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Inputs/got-tls.so.elf-mips64el            | Bin 0 -> 7398 bytes
 test/tools/llvm-readobj/mips-got.test         | 306 ++++++++++++++++++
 tools/llvm-readobj/ELFDumper.cpp              | 217 +++++++++++++
 tools/llvm-readobj/ObjDumper.h                |   3 +
 tools/llvm-readobj/llvm-readobj.cpp           |  20 ++
 tools/llvm-readobj/llvm-readobj.h             |   1 +
 6 files changed, 547 insertions(+)
 create mode 100755 test/tools/llvm-readobj/Inputs/got-tls.so.elf-mips64el
 create mode 100644 test/tools/llvm-readobj/mips-got.test

diff --git a/test/tools/llvm-readobj/Inputs/got-tls.so.elf-mips64el b/test/tools/llvm-readobj/Inputs/got-tls.so.elf-mips64el
new file mode 100755
index 0000000000000000000000000000000000000000..3afc567f85d588c8f34e29a305d40cd231f259e9
GIT binary patch
literal 7398
zcmcH-TWlOx_3Vz5xN+L7lK_TRbQDG4i0;&JX-H5$Hr^ynBIiNuO6><s*1O~N!rE)?
zj*AUQ3{EOV>IWvgQq@wUmP!#I#VtsUL`kdyRS*@j_(~-!Ys3fnL2{9*u2jfy&b{ZX
zXYY<fRK&Hs=brbu=ia%G^WDRPN0NyINVA|52wUjXl#DPQ(w$8JK?e502FSp}(4%BG
zh)fJz>e57kpfvRFMq~+nsY}Noa8+zIL(`ok(lG|GsS^X2SY99xL+Ny&{CfUGP9VvC
zoF@8-UagIG-iWia`hpHr<h}!2?@R6xfPdbPy$-PM%O2WVxTig4zQ$0d<0|7@7}`AW
z2N@4|-X)K#mS)IWnWV|b#sKD6aI+@EhYZ;at~A})48Rs0NjKA%Xi;F>rU9I0xSe5&
zA!<!hSH^+yD4drQ#iMW*2aHGIGA{I|@Et6G{uD0pp+AN5=K^=butK799AC(|_*-Rs
zjQQgC{{<;u6g%aea!L7?SZR!uZ<3+ukh!I)rMyarDeRj~@@&!xmpP~Y1Ngs{Iv=B1
zs4eR(s&3&3WZw|_AFz{x?Xbw}MCgxUXB&=8)~C>~D|sBHA%9WHZ-z-;nA5CqTgmT$
zB}4vQrkj4>RP@cweU;6qb=VE>ho;|K>)81m(znIr;RU3BvN@A_{>aBk{I{uNv~MDA
z8#F>ch5tKa=s!Ukt%0-hL9$FgLYw<hFosB+9dMr-JT-7|&^>kJ$k5?oclhAI;9(bB
zw>VyzaBF_9>br=RCQ3fIqX@t~HhjV@cvY`hs`*}Z_(Zl`nec{lBV|wX;yG@9I!6X_
z<<hrM@c47?8Mcru=V~>t2DPzV)hobgrGoDLa?LGzzMCr)s!%SC<egf@*$d?Y!95s?
z?@d!gg`A%Q*VDiqsnvAM$iY4`CJOM>;IV<M+wVNl>`{_bOW=IB|7jk5jY-9&y_LQ(
z8~?YLO88k?@HgyJBkk7^$~U-8@sscm#clt&(z-r-ueIRYtV@jZ{?>cB72mxY{<aA4
zR+xlG4y0FJv^wENxO8s5cp;drcR)skaUpoI{y@W4TXO=s)lQyH1s3UCIye9M^vcWD
zX_P55&y?|5<{6avDa)XrI{K}5!itpwib1y|SO?NQGmtr#FW5n0xi=VDhArm{n_!^u
zc<K=DEd!5P7}up>W|?$J>))ZRwx>?eF%m5a8xN%W-h;;MJ80+kXy<KZhirV5O2XYo
zlaNl0z_)wRBV67w9lUo>drO!nJCdg>v-8Cs_*G~JKdkp+&K9?fTui|mVJEyE?t~0(
z!3*JK&S4+I%gr1fTDF6W^?kt)>UJ<!zteSQ^zjS*$(vh}w?Dej4Pm%v^b0*-!5!>u
zDuX%+9CxD!W9`Y}kutMP^MJB<NuJ~y;n^J|L$V8<`-@jYJwFLZ?!uT}!Px2;cO7G`
zV|<wJ*D>!4D(`)mcXR%izBgaAgY(G05jMJJm+>B$3edI%I>+R{(RE(=lR0dl-&aFO
z(L7R~C?7Ul4|_0Q>D9|>ozU6}&^E3cT-<azzI}BL^KvCPUrgW3fUGZo-+dn-`%B_o
zGkEDci<TbGbsW!Ib&6ls0L?kI74)Bq%oC{b%%c2_@K)Ee%VC(@MRT+ncH){@!1<f2
zhhg`wrLNQ{>8tUDsnJgO^zDCzduY4FyU7jA9j(>1+}XTlQ<$6d^om7#-Oy7Y`Dioq
zOL?6P=8NVxayro8(NVNl;5n51M!yj{Yv~{F8LEjd6BoJxgXYbM_W{{2R{4G|@<-_)
z$7b`nxXMsd@Bi04PyU-`N1#U{aXppIe%bCFs!Ub$UUkOKRtnyLH<2H+2TQ(Z_xJ7T
zcl!3H_wV0rf6@7p(=X@vQUlG1uRiR27XKf`|ATZnDNam5f^It<32<sN<9==gp<mT-
zOnN7dogQ*3lYXf(Q3IziGm#rF<-r-t)y5Qxni~35aL}gnoL8-(a0}uhuj-Xa9UOJ+
zIqIl!l=YfFHG;O)LFjm6?r1eP?t$a0^T)~il`47G;OlvEQ}HoW&88E{$ydh5y$K{v
z;=!h($oYP?G&1Gm5%|BihtUL{9h3OQsu#dB32js0O~Zu}LK=&~ZtT-tl)iXs=DSYb
zakQqXiN2hFbhHvr^yQr)aGsQ~@$;goFZe#>(UnH@<sBk$g##rYbeM>Lx$kF~F8cDW
z5%?kUqfN#ww4>O;rC1UDF}`FA>}5fTSL6gfjeL4z2rlmoff?4997+642HKG5Qx<)B
zM+rR5g5qEFCI92dr!mnSQnOh=-~~-?{)!zT&>D>DOWmOfB`{VHzoI{5=*xa0&=x_4
z62Fk2$4=CL!1r5$gCfY#jGyk^q%UE<%lZOMc@zIJ>s!o_cZ}32%y><GTHoYfU33uM
z;(huS$6@BnwD+o^e~0z&+^7D}4gCh|H(1~FXX13%<@j}|azEwA=QZR<n~X>7{2G~2
zeK{AdvA)QO4HN$jGKePmlXu%9>x)0pH}OSekpDgrU?}<~on)yA{9~lA82q!!{_Q9c
z`{w-r8EGGI4IwI;;tRbQulN;Ndh5&Bg+|b3eR}6co2gH@to^9mA{801_3*~<8SGOo
zB>%F1(%uv2{}w4=d&n?u`qxtNzZPa$&Y`G;g%R6^UE*8#C+9}1d`GiB)k@z0vd_2D
zH-hZnt@KUJI#4UU6J)<_rLR}-Xs;uXo&{ZP?{iIWuXkwrgCP49;nL#jDa5nzbtU_Y
z$ZC2DWZ!6|cZ2K)t#qs;$afxLSXi%-?>9n5zdPFMIK;Q$6H&2d-@-~rT(eYlqm@JJ
zse|e-Mx)O!5i?#|mI-`)o8#?O^!D*SiS#5^+2lM!RJG&yOZ0|(5KR9n)8&56WYFj2
zS{~#(m_>mE=I18oQ_Rh=9l4*29Xc=K#`_A=H)8!r_7{Qc<>wbDPx)Cf-UYN@C-6pS
zuB&$xy*)pFK|19j?)?xK=N%>AzRo{T_ETWKQ<6A<M9jL{s_5^B=04D)=<Vxs7bODM
ziPY^0qt=V`ox0odJAib%g`$zKAw8}x=^;I?F8K{bhqmwfd8Ehr{gJZY9{)?qPJ12m
zWhLKU$NU-7<vf(3EGT;WdVUk>apV0H(&Orze^qwY*0b3E`#N?K$@seGCZxyJJt1H9
z@$ob|>g2JymmNNJ#vMF1G)%Qc_fQLVJlkZgRT*7>+|5&+u?A9JjF4QnP;rap%1Ey4
zs>hd`o0<lEX-$?r-zzvz?(N?T#BfW6X~On_dbqiTsqyg{syJd)SUJ=MNDlAt$wMl-
zBWDhtIP3~W(Mf;Jse1S+r!GAc-J#*92ZowMYs%Lu?pSW3K!wsn&z?MZ;#d|(j|Eh*
zSe!&WS|`n9q<GnX@471&Ky`t(rFgn#;t4d~TGiFnQ&n)KLaEzQKh*`+m^$q`6=Pj+
RrQ)#6P-|&ey`?$D{{iu;d=&ry

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-readobj/mips-got.test b/test/tools/llvm-readobj/mips-got.test
new file mode 100644
index 000000000000..76db3c845eb4
--- /dev/null
+++ b/test/tools/llvm-readobj/mips-got.test
@@ -0,0 +1,306 @@
+RUN: llvm-readobj -mips-plt-got %p/Inputs/relocs.obj.elf-mips | \
+RUN:   FileCheck %s -check-prefix GOT-OBJ
+RUN: llvm-readobj -mips-plt-got %p/Inputs/dynamic-table-exe.mips | \
+RUN:   FileCheck %s -check-prefix GOT-EXE
+RUN: llvm-readobj -mips-plt-got %p/Inputs/dynamic-table-so.mips | \
+RUN:   FileCheck %s -check-prefix GOT-SO
+RUN: llvm-readobj -mips-plt-got %p/Inputs/got-tls.so.elf-mips64el | \
+RUN:   FileCheck %s -check-prefix GOT-TLS
+
+GOT-OBJ: Cannot find PLTGOT dynamic table tag.
+
+GOT-EXE:      Primary GOT {
+GOT-EXE-NEXT:   Canonical gp value: 0x418880
+GOT-EXE-NEXT:   Reserved entries [
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x410890
+GOT-EXE-NEXT:       Access: -32752
+GOT-EXE-NEXT:       Initial: 0x0
+GOT-EXE-NEXT:       Purpose: Lazy resolver
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x410894
+GOT-EXE-NEXT:       Access: -32748
+GOT-EXE-NEXT:       Initial: 0x80000000
+GOT-EXE-NEXT:       Purpose: Module pointer (GNU extension)
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:   ]
+GOT-EXE-NEXT:   Local entries [
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x410898
+GOT-EXE-NEXT:       Access: -32744
+GOT-EXE-NEXT:       Initial: 0x400418
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x41089C
+GOT-EXE-NEXT:       Access: -32740
+GOT-EXE-NEXT:       Initial: 0x410840
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x4108A0
+GOT-EXE-NEXT:       Access: -32736
+GOT-EXE-NEXT:       Initial: 0x0
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:   ]
+GOT-EXE-NEXT:   Global entries [
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x4108A4
+GOT-EXE-NEXT:       Access: -32732
+GOT-EXE-NEXT:       Initial: 0x0
+GOT-EXE-NEXT:       Value: 0x0
+GOT-EXE-NEXT:       Type: Function (0x2)
+GOT-EXE-NEXT:       Section: Undefined (0x0)
+GOT-EXE-NEXT:       Name: __gmon_start__@ (1)
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:   ]
+GOT-EXE-NEXT:   Number of TLS and multi-GOT entries: 0
+GOT-EXE-NEXT: }
+
+GOT-SO:      Primary GOT {
+GOT-SO-NEXT:   Canonical gp value: 0x188D0
+GOT-SO-NEXT:   Reserved entries [
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108E0
+GOT-SO-NEXT:       Access: -32752
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Purpose: Lazy resolver
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108E4
+GOT-SO-NEXT:       Access: -32748
+GOT-SO-NEXT:       Initial: 0x80000000
+GOT-SO-NEXT:       Purpose: Module pointer (GNU extension)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:   ]
+GOT-SO-NEXT:   Local entries [
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108E8
+GOT-SO-NEXT:       Access: -32744
+GOT-SO-NEXT:       Initial: 0x108E0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108EC
+GOT-SO-NEXT:       Access: -32740
+GOT-SO-NEXT:       Initial: 0x10000
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108F0
+GOT-SO-NEXT:       Access: -32736
+GOT-SO-NEXT:       Initial: 0x10920
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108F4
+GOT-SO-NEXT:       Access: -32732
+GOT-SO-NEXT:       Initial: 0x108CC
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108F8
+GOT-SO-NEXT:       Access: -32728
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108FC
+GOT-SO-NEXT:       Access: -32724
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10900
+GOT-SO-NEXT:       Access: -32720
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10904
+GOT-SO-NEXT:       Access: -32716
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:   ]
+GOT-SO-NEXT:   Global entries [
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10908
+GOT-SO-NEXT:       Access: -32712
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: None (0x0)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: _ITM_registerTMCloneTable@ (87)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x1090C
+GOT-SO-NEXT:       Access: -32708
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: None (0x0)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: _Jv_RegisterClasses@ (128)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10910
+GOT-SO-NEXT:       Access: -32704
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: Function (0x2)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: __gmon_start__@ (23)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10914
+GOT-SO-NEXT:       Access: -32700
+GOT-SO-NEXT:       Initial: 0x840
+GOT-SO-NEXT:       Value: 0x840
+GOT-SO-NEXT:       Type: Function (0x2)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: puts@GLIBC_2.0 (162)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10918
+GOT-SO-NEXT:       Access: -32696
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: None (0x0)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: _ITM_deregisterTMCloneTable@ (59)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x1091C
+GOT-SO-NEXT:       Access: -32692
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: Function (0x2)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: __cxa_finalize@GLIBC_2.2 (113)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:   ]
+GOT-SO-NEXT:   Number of TLS and multi-GOT entries: 0
+GOT-SO-NEXT: }
+
+GOT-TLS:      Primary GOT {
+GOT-TLS-NEXT:   Canonical gp value: 0x18BF0
+GOT-TLS-NEXT:   Reserved entries [
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C00
+GOT-TLS-NEXT:       Access: -32752
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Purpose: Lazy resolver
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C08
+GOT-TLS-NEXT:       Access: -32744
+GOT-TLS-NEXT:       Initial: 0x8000000000000000
+GOT-TLS-NEXT:       Purpose: Module pointer (GNU extension)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:   ]
+GOT-TLS-NEXT:   Local entries [
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C10
+GOT-TLS-NEXT:       Access: -32736
+GOT-TLS-NEXT:       Initial: 0x10000
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C18
+GOT-TLS-NEXT:       Access: -32728
+GOT-TLS-NEXT:       Initial: 0x10C00
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C20
+GOT-TLS-NEXT:       Access: -32720
+GOT-TLS-NEXT:       Initial: 0x10CB8
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C28
+GOT-TLS-NEXT:       Access: -32712
+GOT-TLS-NEXT:       Initial: 0x10BF0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C30
+GOT-TLS-NEXT:       Access: -32704
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C38
+GOT-TLS-NEXT:       Access: -32696
+GOT-TLS-NEXT:       Initial: 0x948
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C40
+GOT-TLS-NEXT:       Access: -32688
+GOT-TLS-NEXT:       Initial: 0xA20
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C48
+GOT-TLS-NEXT:       Access: -32680
+GOT-TLS-NEXT:       Initial: 0xAF0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C50
+GOT-TLS-NEXT:       Access: -32672
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C58
+GOT-TLS-NEXT:       Access: -32664
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C60
+GOT-TLS-NEXT:       Access: -32656
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:   ]
+GOT-TLS-NEXT:   Global entries [
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C68
+GOT-TLS-NEXT:       Access: -32648
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: None (0x0)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: _ITM_registerTMCloneTable@ (78)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C70
+GOT-TLS-NEXT:       Access: -32640
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: None (0x0)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: _Jv_RegisterClasses@ (119)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C78
+GOT-TLS-NEXT:       Access: -32632
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: Function (0x2)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: __gmon_start__@ (23)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C80
+GOT-TLS-NEXT:       Access: -32624
+GOT-TLS-NEXT:       Initial: 0xB60
+GOT-TLS-NEXT:       Value: 0xB60
+GOT-TLS-NEXT:       Type: Function (0x2)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: __tls_get_addr@GLIBC_2.3 (150)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C88
+GOT-TLS-NEXT:       Access: -32616
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: None (0x0)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: _ITM_deregisterTMCloneTable@ (50)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C90
+GOT-TLS-NEXT:       Access: -32608
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: Function (0x2)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: __cxa_finalize@GLIBC_2.2 (104)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:   ]
+GOT-TLS-NEXT:   Number of TLS and multi-GOT entries: 4
+GOT-TLS-NEXT: }
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index f13b300d6da4..03dfa379facf 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -18,6 +18,7 @@
 #include "Error.h"
 #include "ObjDumper.h"
 #include "StreamWriter.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Object/ELFObjectFile.h"
@@ -54,6 +55,7 @@ class ELFDumper : public ObjDumper {
   void printProgramHeaders() override;
 
   void printAttributes() override;
+  void printMipsPLTGOT() override;
 
 private:
   typedef ELFFile<ELFT> ELFO;
@@ -159,6 +161,15 @@ getSectionNameIndex(const ELFO &Obj, typename ELFO::Elf_Sym_Iter Symbol,
   }
 }
 
+template <class ELFT>
+static const typename ELFFile<ELFT>::Elf_Shdr *
+findSectionByAddress(const ELFFile<ELFT> *Obj, uint64_t Addr) {
+  for (const auto &Shdr : Obj->sections())
+    if (Shdr.sh_addr == Addr)
+      return &Shdr;
+  return nullptr;
+}
+
 static const EnumEntry<unsigned> ElfClass[] = {
   { "None",   ELF::ELFCLASSNONE },
   { "32-bit", ELF::ELFCLASS32   },
@@ -1021,3 +1032,209 @@ void ELFDumper<ELFType<support::little, 2, false> >::printAttributes() {
 }
 }
 
+namespace {
+template <class ELFT> class MipsGOTParser {
+public:
+  typedef object::ELFFile<ELFT> ObjectFile;
+  typedef typename ObjectFile::Elf_Shdr Elf_Shdr;
+
+  MipsGOTParser(const ObjectFile *Obj, StreamWriter &W) : Obj(Obj), W(W) {}
+
+  void ParseGOT(const Elf_Shdr &GOTShdr);
+
+private:
+  typedef typename ObjectFile::Elf_Sym_Iter Elf_Sym_Iter;
+  typedef typename ObjectFile::Elf_Addr GOTEntry;
+  typedef typename ObjectFile::template ELFEntityIterator<const GOTEntry>
+  GOTIter;
+
+  const ObjectFile *Obj;
+  StreamWriter &W;
+
+  std::size_t GetGOTTotal(ArrayRef<uint8_t> GOT) const;
+  GOTIter MakeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum);
+
+  bool getGOTTags(uint64_t &LocalGotNum, uint64_t &GotSym);
+  void printGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It);
+  void printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It,
+                           Elf_Sym_Iter Sym);
+};
+}
+
+template <class ELFT>
+void MipsGOTParser<ELFT>::ParseGOT(const Elf_Shdr &GOTShdr) {
+  // See "Global Offset Table" in Chapter 5 in the following document
+  // for detailed GOT description.
+  // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+
+  ErrorOr<ArrayRef<uint8_t>> GOT = Obj->getSectionContents(&GOTShdr);
+  if (!GOT) {
+    W.startLine() << "The .got section is empty.\n";
+    return;
+  }
+
+  uint64_t DtLocalGotNum;
+  uint64_t DtGotSym;
+  if (!getGOTTags(DtLocalGotNum, DtGotSym))
+    return;
+
+  if (DtLocalGotNum > GetGOTTotal(*GOT)) {
+    W.startLine() << "MIPS_LOCAL_GOTNO exceeds a number of GOT entries.\n";
+    return;
+  }
+
+  Elf_Sym_Iter DynSymBegin = Obj->begin_dynamic_symbols();
+  Elf_Sym_Iter DynSymEnd = Obj->end_dynamic_symbols();
+  std::size_t DynSymTotal = std::size_t(std::distance(DynSymBegin, DynSymEnd));
+
+  if (DtGotSym + 1 > DynSymTotal) {
+    W.startLine() << "MIPS_GOTSYM exceeds a number of dynamic symbols.\n";
+    return;
+  }
+
+  std::size_t GlobalGotNum = DynSymTotal - DtGotSym;
+
+  if (DtLocalGotNum + GlobalGotNum > GetGOTTotal(*GOT)) {
+    W.startLine() << "Number of global GOT entries exceeds the size of GOT.\n";
+    return;
+  }
+
+  GOTIter GotBegin = MakeGOTIter(*GOT, 0);
+  GOTIter GotLocalEnd = MakeGOTIter(*GOT, DtLocalGotNum);
+  GOTIter It = GotBegin;
+
+  DictScope GS(W, "Primary GOT");
+
+  W.printHex("Canonical gp value", GOTShdr.sh_addr + 0x7ff0);
+  {
+    ListScope RS(W, "Reserved entries");
+
+    {
+      DictScope D(W, "Entry");
+      printGotEntry(GOTShdr.sh_addr, GotBegin, It++);
+      W.printString("Purpose", StringRef("Lazy resolver"));
+    }
+
+    if (It != GotLocalEnd && (*It >> (sizeof(GOTEntry) * 8 - 1)) != 0) {
+      DictScope D(W, "Entry");
+      printGotEntry(GOTShdr.sh_addr, GotBegin, It++);
+      W.printString("Purpose", StringRef("Module pointer (GNU extension)"));
+    }
+  }
+  {
+    ListScope LS(W, "Local entries");
+    for (; It != GotLocalEnd; ++It) {
+      DictScope D(W, "Entry");
+      printGotEntry(GOTShdr.sh_addr, GotBegin, It);
+    }
+  }
+  {
+    ListScope GS(W, "Global entries");
+
+    GOTIter GotGlobalEnd = MakeGOTIter(*GOT, DtLocalGotNum + GlobalGotNum);
+    Elf_Sym_Iter GotDynSym = DynSymBegin + DtGotSym;
+    for (; It != GotGlobalEnd; ++It) {
+      DictScope D(W, "Entry");
+      printGlobalGotEntry(GOTShdr.sh_addr, GotBegin, It, GotDynSym++);
+    }
+  }
+
+  std::size_t SpecGotNum = GetGOTTotal(*GOT) - DtLocalGotNum - GlobalGotNum;
+  W.printNumber("Number of TLS and multi-GOT entries", SpecGotNum);
+}
+
+template <class ELFT>
+std::size_t MipsGOTParser<ELFT>::GetGOTTotal(ArrayRef<uint8_t> GOT) const {
+  return GOT.size() / sizeof(GOTEntry);
+}
+
+template <class ELFT>
+typename MipsGOTParser<ELFT>::GOTIter
+MipsGOTParser<ELFT>::MakeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum) {
+  const char *Data = reinterpret_cast<const char *>(GOT.data());
+  return GOTIter(sizeof(GOTEntry), Data + EntryNum * sizeof(GOTEntry));
+}
+
+template <class ELFT>
+bool MipsGOTParser<ELFT>::getGOTTags(uint64_t &LocalGotNum, uint64_t &GotSym) {
+  bool FoundLocalGotNum = false;
+  bool FoundGotSym = false;
+  for (const auto &Entry : Obj->dynamic_table()) {
+    switch (Entry.getTag()) {
+    case ELF::DT_MIPS_LOCAL_GOTNO:
+      LocalGotNum = Entry.getVal();
+      FoundLocalGotNum = true;
+      break;
+    case ELF::DT_MIPS_GOTSYM:
+      GotSym = Entry.getVal();
+      FoundGotSym = true;
+      break;
+    }
+  }
+
+  if (!FoundLocalGotNum) {
+    W.startLine() << "Cannot find MIPS_LOCAL_GOTNO dynamic table tag.\n";
+    return false;
+  }
+
+  if (!FoundGotSym) {
+    W.startLine() << "Cannot find MIPS_GOTSYM dynamic table tag.\n";
+    return false;
+  }
+
+  return true;
+}
+
+template <class ELFT>
+void MipsGOTParser<ELFT>::printGotEntry(uint64_t GotAddr, GOTIter BeginIt,
+                                        GOTIter It) {
+  int64_t Offset = std::distance(BeginIt, It) * sizeof(GOTEntry);
+  W.printHex("Address", GotAddr + Offset);
+  W.printNumber("Access", Offset - 0x7ff0);
+  W.printHex("Initial", *It);
+}
+
+template <class ELFT>
+void MipsGOTParser<ELFT>::printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt,
+                                              GOTIter It, Elf_Sym_Iter Sym) {
+  printGotEntry(GotAddr, BeginIt, It);
+
+  W.printHex("Value", Sym->st_value);
+  W.printEnum("Type", Sym->getType(), makeArrayRef(ElfSymbolTypes));
+
+  unsigned SectionIndex = 0;
+  StringRef SectionName;
+  getSectionNameIndex(*Obj, Sym, SectionName, SectionIndex);
+  W.printHex("Section", SectionName, SectionIndex);
+
+  std::string FullSymbolName = getFullSymbolName(*Obj, Sym);
+  W.printNumber("Name", FullSymbolName, Sym->st_name);
+}
+
+template <class ELFT> void ELFDumper<ELFT>::printMipsPLTGOT() {
+  if (Obj->getHeader()->e_machine != EM_MIPS) {
+    W.startLine() << "MIPS PLT GOT is available for MIPS targets only.\n";
+    return;
+  }
+
+  llvm::Optional<uint64_t> DtPltGot;
+  for (const auto &Entry : Obj->dynamic_table()) {
+    if (Entry.getTag() == ELF::DT_PLTGOT) {
+      DtPltGot = Entry.getVal();
+      break;
+    }
+  }
+
+  if (!DtPltGot) {
+    W.startLine() << "Cannot find PLTGOT dynamic table tag.\n";
+    return;
+  }
+
+  const Elf_Shdr *GotShdr = findSectionByAddress(Obj, *DtPltGot);
+  if (!GotShdr) {
+    W.startLine() << "There is no .got section in the file.\n";
+    return;
+  }
+
+  MipsGOTParser<ELFT>(Obj, W).ParseGOT(*GotShdr);
+}
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
index 795193327bc5..f80a28b25cfc 100644
--- a/tools/llvm-readobj/ObjDumper.h
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -40,6 +40,9 @@ class ObjDumper {
   // Only implemented for ARM ELF at this time.
   virtual void printAttributes() { }
 
+  // Only implemented for MIPS ELF at this time.
+  virtual void printMipsPLTGOT() { }
+
 protected:
   StreamWriter& W;
 };
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 52db0ebbef3e..8d2a997a2312 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -135,6 +135,11 @@ namespace opts {
                               cl::desc("Display the ARM attributes section"));
   cl::alias ARMAttributesShort("-a", cl::desc("Alias for --arm-attributes"),
                                cl::aliasopt(ARMAttributes));
+
+  // -mips-plt-got
+  cl::opt<bool>
+  MipsPLTGOT("mips-plt-got",
+             cl::desc("Display the MIPS GOT and PLT GOT sections"));
 } // namespace opts
 
 static int ReturnValue = EXIT_SUCCESS;
@@ -177,6 +182,18 @@ static void reportError(StringRef Input, StringRef Message) {
   ReturnValue = EXIT_FAILURE;
 }
 
+static bool isMipsArch(unsigned Arch) {
+  switch (Arch) {
+  case llvm::Triple::mips:
+  case llvm::Triple::mipsel:
+  case llvm::Triple::mips64:
+  case llvm::Triple::mips64el:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// @brief Creates an format-specific object file dumper.
 static std::error_code createDumper(const ObjectFile *Obj, StreamWriter &Writer,
                                     std::unique_ptr<ObjDumper> &Result) {
@@ -234,6 +251,9 @@ static void dumpObject(const ObjectFile *Obj) {
   if (Obj->getArch() == llvm::Triple::arm && Obj->isELF())
     if (opts::ARMAttributes)
       Dumper->printAttributes();
+  if (isMipsArch(Obj->getArch()) && Obj->isELF())
+    if (opts::MipsPLTGOT)
+      Dumper->printMipsPLTGOT();
 }
 
 
diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h
index 033195c1d49b..04139480a6d8 100644
--- a/tools/llvm-readobj/llvm-readobj.h
+++ b/tools/llvm-readobj/llvm-readobj.h
@@ -38,6 +38,7 @@ namespace opts {
   extern llvm::cl::opt<bool> ExpandRelocs;
   extern llvm::cl::opt<bool> CodeViewLineTables;
   extern llvm::cl::opt<bool> ARMAttributes;
+  extern llvm::cl::opt<bool> MipsPLTGOT;
 } // namespace opts
 
 #define LLVM_READOBJ_ENUM_ENT(ns, enum) \

From 4f05d1dac8dadd0e7ab4b7f31e656fb488f985df Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 18 Jun 2014 09:23:55 +0000
Subject: [PATCH 031/157] [llvm-readobj] Fix compile error.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211151 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/llvm-readobj/ELFDumper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 03dfa379facf..7dc47be8a03e 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -1140,7 +1140,7 @@ void MipsGOTParser<ELFT>::ParseGOT(const Elf_Shdr &GOTShdr) {
   }
 
   std::size_t SpecGotNum = GetGOTTotal(*GOT) - DtLocalGotNum - GlobalGotNum;
-  W.printNumber("Number of TLS and multi-GOT entries", SpecGotNum);
+  W.printNumber("Number of TLS and multi-GOT entries", uint64_t(SpecGotNum));
 }
 
 template <class ELFT>

From ef592db349f87eda79ac645602b5ec93929e20d8 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 18 Jun 2014 09:24:01 +0000
Subject: [PATCH 032/157] [llvm-readobj] Fix member functions name style.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211152 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/llvm-readobj/ELFDumper.cpp | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 7dc47be8a03e..502c7197ddb2 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -1040,7 +1040,7 @@ template <class ELFT> class MipsGOTParser {
 
   MipsGOTParser(const ObjectFile *Obj, StreamWriter &W) : Obj(Obj), W(W) {}
 
-  void ParseGOT(const Elf_Shdr &GOTShdr);
+  void parseGOT(const Elf_Shdr &GOTShdr);
 
 private:
   typedef typename ObjectFile::Elf_Sym_Iter Elf_Sym_Iter;
@@ -1051,8 +1051,8 @@ template <class ELFT> class MipsGOTParser {
   const ObjectFile *Obj;
   StreamWriter &W;
 
-  std::size_t GetGOTTotal(ArrayRef<uint8_t> GOT) const;
-  GOTIter MakeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum);
+  std::size_t getGOTTotal(ArrayRef<uint8_t> GOT) const;
+  GOTIter makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum);
 
   bool getGOTTags(uint64_t &LocalGotNum, uint64_t &GotSym);
   void printGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It);
@@ -1062,7 +1062,7 @@ template <class ELFT> class MipsGOTParser {
 }
 
 template <class ELFT>
-void MipsGOTParser<ELFT>::ParseGOT(const Elf_Shdr &GOTShdr) {
+void MipsGOTParser<ELFT>::parseGOT(const Elf_Shdr &GOTShdr) {
   // See "Global Offset Table" in Chapter 5 in the following document
   // for detailed GOT description.
   // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
@@ -1078,7 +1078,7 @@ void MipsGOTParser<ELFT>::ParseGOT(const Elf_Shdr &GOTShdr) {
   if (!getGOTTags(DtLocalGotNum, DtGotSym))
     return;
 
-  if (DtLocalGotNum > GetGOTTotal(*GOT)) {
+  if (DtLocalGotNum > getGOTTotal(*GOT)) {
     W.startLine() << "MIPS_LOCAL_GOTNO exceeds a number of GOT entries.\n";
     return;
   }
@@ -1094,13 +1094,13 @@ void MipsGOTParser<ELFT>::ParseGOT(const Elf_Shdr &GOTShdr) {
 
   std::size_t GlobalGotNum = DynSymTotal - DtGotSym;
 
-  if (DtLocalGotNum + GlobalGotNum > GetGOTTotal(*GOT)) {
+  if (DtLocalGotNum + GlobalGotNum > getGOTTotal(*GOT)) {
     W.startLine() << "Number of global GOT entries exceeds the size of GOT.\n";
     return;
   }
 
-  GOTIter GotBegin = MakeGOTIter(*GOT, 0);
-  GOTIter GotLocalEnd = MakeGOTIter(*GOT, DtLocalGotNum);
+  GOTIter GotBegin = makeGOTIter(*GOT, 0);
+  GOTIter GotLocalEnd = makeGOTIter(*GOT, DtLocalGotNum);
   GOTIter It = GotBegin;
 
   DictScope GS(W, "Primary GOT");
@@ -1131,7 +1131,7 @@ void MipsGOTParser<ELFT>::ParseGOT(const Elf_Shdr &GOTShdr) {
   {
     ListScope GS(W, "Global entries");
 
-    GOTIter GotGlobalEnd = MakeGOTIter(*GOT, DtLocalGotNum + GlobalGotNum);
+    GOTIter GotGlobalEnd = makeGOTIter(*GOT, DtLocalGotNum + GlobalGotNum);
     Elf_Sym_Iter GotDynSym = DynSymBegin + DtGotSym;
     for (; It != GotGlobalEnd; ++It) {
       DictScope D(W, "Entry");
@@ -1139,18 +1139,18 @@ void MipsGOTParser<ELFT>::ParseGOT(const Elf_Shdr &GOTShdr) {
     }
   }
 
-  std::size_t SpecGotNum = GetGOTTotal(*GOT) - DtLocalGotNum - GlobalGotNum;
+  std::size_t SpecGotNum = getGOTTotal(*GOT) - DtLocalGotNum - GlobalGotNum;
   W.printNumber("Number of TLS and multi-GOT entries", uint64_t(SpecGotNum));
 }
 
 template <class ELFT>
-std::size_t MipsGOTParser<ELFT>::GetGOTTotal(ArrayRef<uint8_t> GOT) const {
+std::size_t MipsGOTParser<ELFT>::getGOTTotal(ArrayRef<uint8_t> GOT) const {
   return GOT.size() / sizeof(GOTEntry);
 }
 
 template <class ELFT>
 typename MipsGOTParser<ELFT>::GOTIter
-MipsGOTParser<ELFT>::MakeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum) {
+MipsGOTParser<ELFT>::makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum) {
   const char *Data = reinterpret_cast<const char *>(GOT.data());
   return GOTIter(sizeof(GOTEntry), Data + EntryNum * sizeof(GOTEntry));
 }
@@ -1236,5 +1236,5 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsPLTGOT() {
     return;
   }
 
-  MipsGOTParser<ELFT>(Obj, W).ParseGOT(*GotShdr);
+  MipsGOTParser<ELFT>(Obj, W).parseGOT(*GotShdr);
 }

From 5393254646553a919753f0bca76dfc2dd56fb26b Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Wed, 18 Jun 2014 11:52:44 +0000
Subject: [PATCH 033/157] DAG: move sret demotion into most basic LowerCallTo
 implementation.

It looks like there are two versions of LowerCallTo here: the
SelectionDAGBuilder one is designed to operate on LLVM IR, and the
TargetLowering one in the case where everything is at DAG level.

Previously, only the SelectionDAGBuilder variant could handle demoting
an impossible return to sret semantics (before delegating to the
TargetLowering version), but this functionality is also useful for
certain libcalls (e.g. 128-bit operations on 32-bit x86).  So this
commit moves the sret handling down a level.

rdar://problem/17242889

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211155 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 243 +++++++++---------
 test/CodeGen/X86/libcall-sret.ll              |  28 ++
 2 files changed, 152 insertions(+), 119 deletions(-)
 create mode 100644 test/CodeGen/X86/libcall-sret.ll

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 136baf56e8a7..e6dc27219787 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5439,6 +5439,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                       bool isTailCall,
                                       MachineBasicBlock *LandingPad) {
+  const TargetLowering *TLI = TM.getTargetLowering();
   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   Type *RetTy = FTy->getReturnType();
@@ -5449,45 +5450,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   TargetLowering::ArgListEntry Entry;
   Args.reserve(CS.arg_size());
 
-  // Check whether the function can return without sret-demotion.
-  SmallVector<ISD::OutputArg, 4> Outs;
-  const TargetLowering *TLI = TM.getTargetLowering();
-  GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI);
-
-  bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(),
-                                            DAG.getMachineFunction(),
-                                            FTy->isVarArg(), Outs,
-                                            FTy->getContext());
-
-  SDValue DemoteStackSlot;
-  int DemoteStackIdx = -100;
-
-  if (!CanLowerReturn) {
-    assert(!CS.hasInAllocaArgument() &&
-           "sret demotion is incompatible with inalloca");
-    uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
-                      FTy->getReturnType());
-    unsigned Align  = TLI->getDataLayout()->getPrefTypeAlignment(
-                      FTy->getReturnType());
-    MachineFunction &MF = DAG.getMachineFunction();
-    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-    Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
-
-    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy());
-    Entry.Node = DemoteStackSlot;
-    Entry.Ty = StackSlotPtrType;
-    Entry.isSExt = false;
-    Entry.isZExt = false;
-    Entry.isInReg = false;
-    Entry.isSRet = true;
-    Entry.isNest = false;
-    Entry.isByVal = false;
-    Entry.isReturned = false;
-    Entry.Alignment = Align;
-    Args.push_back(Entry);
-    RetTy = Type::getVoidTy(FTy->getContext());
-  }
-
   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     const Value *V = *i;
@@ -5540,46 +5502,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
          "Non-null chain expected with non-tail call!");
   assert((Result.second.getNode() || !Result.first.getNode()) &&
          "Null value expected with tail call!");
-  if (Result.first.getNode()) {
+  if (Result.first.getNode())
     setValue(CS.getInstruction(), Result.first);
-  } else if (!CanLowerReturn && Result.second.getNode()) {
-    // The instruction result is the result of loading from the
-    // hidden sret parameter.
-    SmallVector<EVT, 1> PVTs;
-    Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
-
-    ComputeValueVTs(*TLI, PtrRetTy, PVTs);
-    assert(PVTs.size() == 1 && "Pointers should fit in one register");
-    EVT PtrVT = PVTs[0];
-
-    SmallVector<EVT, 4> RetTys;
-    SmallVector<uint64_t, 4> Offsets;
-    RetTy = FTy->getReturnType();
-    ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets);
-
-    unsigned NumValues = RetTys.size();
-    SmallVector<SDValue, 4> Values(NumValues);
-    SmallVector<SDValue, 4> Chains(NumValues);
-
-    for (unsigned i = 0; i < NumValues; ++i) {
-      SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT,
-                                DemoteStackSlot,
-                                DAG.getConstant(Offsets[i], PtrVT));
-      SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add,
-                  MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
-                              false, false, false, 1);
-      Values[i] = L;
-      Chains[i] = L.getValue(1);
-    }
-
-    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
-                                MVT::Other, Chains);
-    PendingLoads.push_back(Chain);
-
-    setValue(CS.getInstruction(),
-             DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
-                         DAG.getVTList(RetTys), Values));
-  }
 
   if (!Result.second.getNode()) {
     // As a special case, a null chain means that a tail call has been emitted
@@ -7121,6 +7045,21 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
   FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
 }
 
+/// Returns an AttributeSet representing the attributes applied to the return
+/// value of the given call.
+static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+  SmallVector<Attribute::AttrKind, 2> Attrs;
+  if (CLI.RetSExt)
+    Attrs.push_back(Attribute::SExt);
+  if (CLI.RetZExt)
+    Attrs.push_back(Attribute::ZExt);
+  if (CLI.IsInReg)
+    Attrs.push_back(Attribute::InReg);
+
+  return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
+                           Attrs);
+}
+
 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
 /// implementation, which just calls LowerCall.
 /// FIXME: When all targets are
@@ -7129,24 +7068,62 @@ std::pair<SDValue, SDValue>
 TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   // Handle the incoming return values from the call.
   CLI.Ins.clear();
+  Type *OrigRetTy = CLI.RetTy;
   SmallVector<EVT, 4> RetTys;
-  ComputeValueVTs(*this, CLI.RetTy, RetTys);
-  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    EVT VT = RetTys[I];
-    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
-    for (unsigned i = 0; i != NumRegs; ++i) {
-      ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT;
-      MyFlags.ArgVT = VT;
-      MyFlags.Used = CLI.IsReturnValueUsed;
-      if (CLI.RetSExt)
-        MyFlags.Flags.setSExt();
-      if (CLI.RetZExt)
-        MyFlags.Flags.setZExt();
-      if (CLI.IsInReg)
-        MyFlags.Flags.setInReg();
-      CLI.Ins.push_back(MyFlags);
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets);
+
+  SmallVector<ISD::OutputArg, 4> Outs;
+  GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this);
+
+  bool CanLowerReturn =
+      this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
+                           CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+  SDValue DemoteStackSlot;
+  int DemoteStackIdx = -100;
+  if (!CanLowerReturn) {
+    // FIXME: equivalent assert?
+    // assert(!CS.hasInAllocaArgument() &&
+    //        "sret demotion is incompatible with inalloca");
+    uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy);
+    unsigned Align  = getDataLayout()->getPrefTypeAlignment(CLI.RetTy);
+    MachineFunction &MF = CLI.DAG.getMachineFunction();
+    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
+
+    DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy());
+    ArgListEntry Entry;
+    Entry.Node = DemoteStackSlot;
+    Entry.Ty = StackSlotPtrType;
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Entry.isInReg = false;
+    Entry.isSRet = true;
+    Entry.isNest = false;
+    Entry.isByVal = false;
+    Entry.isReturned = false;
+    Entry.Alignment = Align;
+    CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+    CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+  } else {
+    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      EVT VT = RetTys[I];
+      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        ISD::InputArg MyFlags;
+        MyFlags.VT = RegisterVT;
+        MyFlags.ArgVT = VT;
+        MyFlags.Used = CLI.IsReturnValueUsed;
+        if (CLI.RetSExt)
+          MyFlags.Flags.setSExt();
+        if (CLI.RetZExt)
+          MyFlags.Flags.setZExt();
+        if (CLI.IsInReg)
+          MyFlags.Flags.setInReg();
+        CLI.Ins.push_back(MyFlags);
+      }
     }
   }
 
@@ -7289,31 +7266,59 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                  "LowerCall emitted a value with the wrong type!");
         });
 
-  // Collect the legal value parts into potentially illegal values
-  // that correspond to the original function's return values.
-  ISD::NodeType AssertOp = ISD::DELETED_NODE;
-  if (CLI.RetSExt)
-    AssertOp = ISD::AssertSext;
-  else if (CLI.RetZExt)
-    AssertOp = ISD::AssertZext;
   SmallVector<SDValue, 4> ReturnValues;
-  unsigned CurReg = 0;
-  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    EVT VT = RetTys[I];
-    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
-
-    ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
-                                            NumRegs, RegisterVT, VT, nullptr,
-                                            AssertOp));
-    CurReg += NumRegs;
-  }
-
-  // For a function returning void, there is no return value. We can't create
-  // such a node, so we just return a null return value in that case. In
-  // that case, nothing will actually look at the value.
-  if (ReturnValues.empty())
-    return std::make_pair(SDValue(), CLI.Chain);
+  if (!CanLowerReturn) {
+    // The instruction result is the result of loading from the
+    // hidden sret parameter.
+    SmallVector<EVT, 1> PVTs;
+    Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
+
+    ComputeValueVTs(*this, PtrRetTy, PVTs);
+    assert(PVTs.size() == 1 && "Pointers should fit in one register");
+    EVT PtrVT = PVTs[0];
+
+    unsigned NumValues = RetTys.size();
+    ReturnValues.resize(NumValues);
+    SmallVector<SDValue, 4> Chains(NumValues);
+
+    for (unsigned i = 0; i < NumValues; ++i) {
+      SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
+                                    CLI.DAG.getConstant(Offsets[i], PtrVT));
+      SDValue L = CLI.DAG.getLoad(
+          RetTys[i], CLI.DL, CLI.Chain, Add,
+          MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
+          false, false, 1);
+      ReturnValues[i] = L;
+      Chains[i] = L.getValue(1);
+    }
+
+    CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
+  } else {
+    // Collect the legal value parts into potentially illegal values
+    // that correspond to the original function's return values.
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    if (CLI.RetSExt)
+      AssertOp = ISD::AssertSext;
+    else if (CLI.RetZExt)
+      AssertOp = ISD::AssertZext;
+    unsigned CurReg = 0;
+    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      EVT VT = RetTys[I];
+      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+
+      ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+                                              NumRegs, RegisterVT, VT, nullptr,
+                                              AssertOp));
+      CurReg += NumRegs;
+    }
+
+    // For a function returning void, there is no return value. We can't create
+    // such a node, so we just return a null return value in that case. In
+    // that case, nothing will actually look at the value.
+    if (ReturnValues.empty())
+      return std::make_pair(SDValue(), CLI.Chain);
+  }
 
   SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
                                 CLI.DAG.getVTList(RetTys), ReturnValues);
diff --git a/test/CodeGen/X86/libcall-sret.ll b/test/CodeGen/X86/libcall-sret.ll
new file mode 100644
index 000000000000..67b99ac239cd
--- /dev/null
+++ b/test/CodeGen/X86/libcall-sret.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=i686-linux-gnu -o - %s | FileCheck %s
+
+@var = global i128 0
+
+; We were trying to convert the i128 operation into a libcall, but failing to
+; perform sret demotion when we couldn't return the result in registers. Make
+; sure we marshal the return properly:
+
+define void @test_sret_libcall(i128 %l, i128 %r) {
+; CHECK-LABEL: test_sret_libcall:
+
+  ; Stack for call: 4(sret ptr), 16(i128 %l), 16(128 %r). So next logical
+  ; (aligned) place for the actual sret data is %esp + 40.
+; CHECK: leal 40(%esp), [[SRET_ADDR:%[a-z]+]]
+; CHECK: movl [[SRET_ADDR]], (%esp)
+; CHECK: calll __multi3
+; CHECK-DAG: movl 40(%esp), [[RES0:%[a-z]+]]
+; CHECK-DAG: movl 44(%esp), [[RES1:%[a-z]+]]
+; CHECK-DAG: movl 48(%esp), [[RES2:%[a-z]+]]
+; CHECK-DAG: movl 52(%esp), [[RES3:%[a-z]+]]
+; CHECK-DAG: movl [[RES0]], var
+; CHECK-DAG: movl [[RES1]], var+4
+; CHECK-DAG: movl [[RES2]], var+8
+; CHECK-DAG: movl [[RES3]], var+12
+  %prod = mul i128 %l, %r
+  store i128 %prod, i128* @var
+  ret void
+}

From fc72762a0f81b0c2d78ed084c853100a496cadff Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Wed, 18 Jun 2014 12:02:29 +0000
Subject: [PATCH 034/157] [msan] Handle X86 *.psad.* and *.pmadd.* intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211156 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/MemorySanitizer.cpp       | 55 ++++++++++++++++
 .../MemorySanitizer/vector_arith.ll           | 65 +++++++++++++++++++
 2 files changed, 120 insertions(+)
 create mode 100644 test/Instrumentation/MemorySanitizer/vector_arith.ll

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index e71e540aa82d..75c56c2d4300 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2060,6 +2060,40 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
+  // \brief Instrument sum-of-absolute-differencies intrinsic.
+  void handleVectorSadIntrinsic(IntrinsicInst &I) {
+    const unsigned SignificantBitsPerResultElement = 16;
+    bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+    Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
+    unsigned ZeroBitsPerResultElement =
+        ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
+
+    IRBuilder<> IRB(&I);
+    Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+    S = IRB.CreateBitCast(S, ResTy);
+    S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+                       ResTy);
+    S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
+    S = IRB.CreateBitCast(S, getShadowTy(&I));
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
+
+  // \brief Instrument multiply-add intrinsic.
+  void handleVectorPmaddIntrinsic(IntrinsicInst &I,
+                                  unsigned EltSizeInBits = 0) {
+    bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+    Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
+    IRBuilder<> IRB(&I);
+    Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+    S = IRB.CreateBitCast(S, ResTy);
+    S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+                       ResTy);
+    S = IRB.CreateBitCast(S, getShadowTy(&I));
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
+
   void visitIntrinsicInst(IntrinsicInst &I) {
     switch (I.getIntrinsicID()) {
     case llvm::Intrinsic::bswap:
@@ -2196,6 +2230,27 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleVectorPackIntrinsic(I, 32);
       break;
 
+    case llvm::Intrinsic::x86_mmx_psad_bw:
+    case llvm::Intrinsic::x86_sse2_psad_bw:
+    case llvm::Intrinsic::x86_avx2_psad_bw:
+      handleVectorSadIntrinsic(I);
+      break;
+
+    case llvm::Intrinsic::x86_sse2_pmadd_wd:
+    case llvm::Intrinsic::x86_avx2_pmadd_wd:
+    case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw_128:
+    case llvm::Intrinsic::x86_avx2_pmadd_ub_sw:
+      handleVectorPmaddIntrinsic(I);
+      break;
+
+    case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw:
+      handleVectorPmaddIntrinsic(I, 8);
+      break;
+
+    case llvm::Intrinsic::x86_mmx_pmadd_wd:
+      handleVectorPmaddIntrinsic(I, 16);
+      break;
+
     default:
       if (!handleUnknownIntrinsic(I))
         visitInstruction(I);
diff --git a/test/Instrumentation/MemorySanitizer/vector_arith.ll b/test/Instrumentation/MemorySanitizer/vector_arith.ll
new file mode 100644
index 000000000000..6541a1c3a394
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/vector_arith.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define <4 x i32> @Test_sse2_pmadd_wd(<8 x i16> %a, <8 x i16> %b) sanitize_memory {
+entry:
+  %c = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b) nounwind
+  ret <4 x i32> %c
+}
+
+; CHECK-LABEL: @Test_sse2_pmadd_wd(
+; CHECK: or <8 x i16>
+; CHECK: bitcast <8 x i16> {{.*}} to <4 x i32>
+; CHECK: icmp ne <4 x i32> {{.*}}, zeroinitializer
+; CHECK: sext <4 x i1> {{.*}} to <4 x i32>
+; CHECK: ret <4 x i32>
+
+
+define x86_mmx @Test_ssse3_pmadd_ub_sw(x86_mmx %a, x86_mmx %b) sanitize_memory {
+entry:
+  %c = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind
+  ret x86_mmx %c
+}
+
+; CHECK-LABEL: @Test_ssse3_pmadd_ub_sw(
+; CHECK: or i64
+; CHECK: bitcast i64 {{.*}} to <4 x i16>
+; CHECK: icmp ne <4 x i16> {{.*}}, zeroinitializer
+; CHECK: sext <4 x i1> {{.*}} to <4 x i16>
+; CHECK: bitcast <4 x i16> {{.*}} to i64
+; CHECK: ret x86_mmx
+
+
+define <2 x i64> @Test_x86_sse2_psad_bw(<16 x i8> %a, <16 x i8> %b) sanitize_memory {
+  %c = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a, <16 x i8> %b)
+  ret <2 x i64> %c
+}
+
+; CHECK-LABEL: @Test_x86_sse2_psad_bw(
+; CHECK: or <16 x i8> {{.*}}, {{.*}}
+; CHECK: bitcast <16 x i8> {{.*}} to <2 x i64>
+; CHECK: icmp ne <2 x i64> {{.*}}, zeroinitializer
+; CHECK: sext <2 x i1> {{.*}} to <2 x i64>
+; CHECK: lshr <2 x i64> {{.*}}, <i64 48, i64 48>
+; CHECK: ret <2 x i64>
+
+
+define x86_mmx @Test_x86_mmx_psad_bw(x86_mmx %a, x86_mmx %b) sanitize_memory {
+entry:
+  %c = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind
+  ret x86_mmx %c
+}
+
+; CHECK-LABEL: @Test_x86_mmx_psad_bw(
+; CHECK: or i64
+; CHECK: icmp ne i64
+; CHECK: sext i1 {{.*}} to i64
+; CHECK: lshr i64 {{.*}}, 48
+; CHECK: ret x86_mmx

From a64058f3eb61fffc828aa4f31deb625adb06e79c Mon Sep 17 00:00:00 2001
From: Jan Vesely <jan.vesely@rutgers.edu>
Date: Wed, 18 Jun 2014 12:27:13 +0000
Subject: [PATCH 035/157] R600: Implement 64bit SHL

v2: Use c++ style comment

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211157 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/R600ISelLowering.cpp |  41 ++++++++++
 lib/Target/R600/R600ISelLowering.h   |   1 +
 test/CodeGen/R600/shl.ll             | 117 ++++++++++++++++++++++++++-
 3 files changed, 157 insertions(+), 2 deletions(-)

diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index beea54e14e9a..444190e17947 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -157,6 +157,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::UDIV, MVT::i64, Custom);
   setOperationAction(ISD::UREM, MVT::i64, Custom);
 
+  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
+  //  to be Legal/Custom in order to avoid library calls.
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
@@ -552,6 +556,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
   case ISD::FCOS:
   case ISD::FSIN: return LowerTrig(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -905,6 +910,42 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
       DAG.getConstantFP(3.14159265359, MVT::f32));
 }
 
+SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Shift = Op.getOperand(2);
+  SDValue Zero = DAG.getConstant(0, VT);
+  SDValue One  = DAG.getConstant(1, VT);
+
+  SDValue Width  = DAG.getConstant(VT.getSizeInBits(), VT);
+  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+  SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
+  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
+
+  // The dance around Width1 is necessary for 0 special case.
+  // Without it the CompShift might be 32, producing incorrect results in
+  // Overflow. So we do the shift in two steps, the alternative is to
+  // add a conditional to filter the special case.
+
+  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
+  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
+
+  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
+  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
+  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
+
+  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
+  SDValue LoBig = Zero;
+
+  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
+  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
+}
+
 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(
       ISD::SETCC,
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index ed12dab48b4a..a7d83742c1c6 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -60,6 +60,7 @@ class R600TargetLowering : public AMDGPUTargetLowering {
   SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
                                           SelectionDAG &DAG) const;
diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
index 4a6aab4a104a..43fab2a39dcc 100644
--- a/test/CodeGen/R600/shl.ll
+++ b/test/CodeGen/R600/shl.ll
@@ -39,5 +39,118 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
   ret void
 }
 
-; XXX: Add SI test for i64 shl once i64 stores and i64 function arguments are
-; supported.
+;EG-CHECK: @shl_i64
+;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+;EG-CHECK: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+;EG-CHECK: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
+;EG-CHECK-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-CHECK-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
+
+;SI-CHECK: @shl_i64
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+  %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+  %a = load i64 addrspace(1) * %in
+  %b = load i64 addrspace(1) * %b_ptr
+  %result = shl i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @shl_v2i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @shl_v2i64
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+  %a = load <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %result = shl <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @shl_v4i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHC]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHD]]
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @shl_v4i64
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+  %a = load <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %result = shl <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}

From 2d06e73d88d8acb201622be5838ff2bac1b0a4c7 Mon Sep 17 00:00:00 2001
From: Jan Vesely <jan.vesely@rutgers.edu>
Date: Wed, 18 Jun 2014 12:27:15 +0000
Subject: [PATCH 036/157] R600: Implement 64bit SRL

v2: use C++ style comment

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211158 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/R600ISelLowering.cpp |  40 +++++++++
 lib/Target/R600/R600ISelLowering.h   |   1 +
 test/CodeGen/R600/srl.ll             | 126 +++++++++++++++++++++++++++
 3 files changed, 167 insertions(+)

diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 444190e17947..ad5a82fcfea1 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -160,6 +160,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
   //  to be Legal/Custom in order to avoid library calls.
   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 
@@ -557,6 +558,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
+  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
   case ISD::FCOS:
   case ISD::FSIN: return LowerTrig(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -946,6 +948,44 @@ SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
 }
 
+SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Shift = Op.getOperand(2);
+  SDValue Zero = DAG.getConstant(0, VT);
+  SDValue One  = DAG.getConstant(1, VT);
+
+  SDValue Width  = DAG.getConstant(VT.getSizeInBits(), VT);
+  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+  SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
+  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
+
+  // The dance around Width1 is necessary for 0 special case.
+  // Without it the CompShift might be 32, producing incorrect results in
+  // Overflow. So we do the shift in two steps, the alternative is to
+  // add a conditional to filter the special case.
+
+  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
+  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
+
+  // TODO: SRA support here
+  SDValue HiSmall = DAG.getNode(ISD::SRL, DL, VT, Hi, Shift);
+  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
+  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
+
+  // TODO: SRA support here
+  SDValue LoBig = DAG.getNode(ISD::SRL, DL, VT, Hi, BigShift);
+  SDValue HiBig = Zero;
+
+  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
+  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
+}
+
 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(
       ISD::SETCC,
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index a7d83742c1c6..381642aa600c 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -61,6 +61,7 @@ class R600TargetLowering : public AMDGPUTargetLowering {
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
                                           SelectionDAG &DAG) const;
diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll
index 76373552fb16..44ad73f073ed 100644
--- a/test/CodeGen/R600/srl.ll
+++ b/test/CodeGen/R600/srl.ll
@@ -39,3 +39,129 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;EG-CHECK: @lshr_i64
+;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
+;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-CHECK-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
+
+;SI-CHECK: @lshr_i64
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+  %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+  %a = load i64 addrspace(1) * %in
+  %b = load i64 addrspace(1) * %b_ptr
+  %result = lshr i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @lshr_v2i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @lshr_v2i64
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+  %a = load <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %result = lshr <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+
+;EG-CHECK: @lshr_v4i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @lshr_v4i64
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+  %a = load <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %result = lshr <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}

From c32d52df24505b29bc4f9385d701c5bcec594fad Mon Sep 17 00:00:00 2001
From: Jan Vesely <jan.vesely@rutgers.edu>
Date: Wed, 18 Jun 2014 12:27:17 +0000
Subject: [PATCH 037/157] R600: Implement 64bit SRA

v2: Use capitalized variable name

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211159 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/R600ISelLowering.cpp |  12 +--
 test/CodeGen/R600/sra.ll             | 130 +++++++++++++++++++++++++++
 2 files changed, 137 insertions(+), 5 deletions(-)

diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index ad5a82fcfea1..f0e13e56d8ff 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -161,6 +161,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   //  to be Legal/Custom in order to avoid library calls.
   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 
@@ -558,6 +559,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
+  case ISD::SRA_PARTS:
   case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
   case ISD::FCOS:
   case ISD::FSIN: return LowerTrig(Op, DAG);
@@ -958,6 +960,8 @@ SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
   SDValue Zero = DAG.getConstant(0, VT);
   SDValue One  = DAG.getConstant(1, VT);
 
+  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
+
   SDValue Width  = DAG.getConstant(VT.getSizeInBits(), VT);
   SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
   SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
@@ -971,14 +975,12 @@ SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
   SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
   Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
 
-  // TODO: SRA support here
-  SDValue HiSmall = DAG.getNode(ISD::SRL, DL, VT, Hi, Shift);
+  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
   SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
   LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
 
-  // TODO: SRA support here
-  SDValue LoBig = DAG.getNode(ISD::SRL, DL, VT, Hi, BigShift);
-  SDValue HiBig = Zero;
+  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
+  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
 
   Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
   Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index fe9df104ae11..9eb3dc544074 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -52,3 +52,133 @@ entry:
   ret void
 }
 
+;EG-CHECK-LABEL: @ashr_i64_2
+;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
+;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-CHECK-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
+;EG-CHECK-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+
+;SI-CHECK-LABEL: @ashr_i64_2
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+entry:
+  %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+  %a = load i64 addrspace(1) * %in
+  %b = load i64 addrspace(1) * %b_ptr
+  %result = ashr i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @ashr_v2i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK-LABEL: @ashr_v2i64
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+  %a = load <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %result = ashr <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @ashr_v4i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHC]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK-LABEL: @ashr_v4i64
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+  %a = load <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %result = ashr <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}
+

From dd9e510da7c6d4e9fe3866bae88b5e50e436db9b Mon Sep 17 00:00:00 2001
From: Matheus Almeida <matheus.almeida@imgtec.com>
Date: Wed, 18 Jun 2014 13:08:59 +0000
Subject: [PATCH 038/157] [mips] Remove the last usage of parseRegister from
 MipsAsmParser.

Summary:
Added negative test case so that we can be sure we handle erroneous situations
while parsing the .cpsetup directive.

Reviewers: dsanders

Reviewed By: dsanders

Differential Revision: http://reviews.llvm.org/D3681

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211160 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 62 +++++++++++----------
 test/MC/Mips/cpsetup-bad.s                  | 14 +++++
 2 files changed, 47 insertions(+), 29 deletions(-)
 create mode 100644 test/MC/Mips/cpsetup-bad.s

diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 3133a51b3f9e..1c1a8265dee2 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -183,8 +183,6 @@ class MipsAsmParser : public MCTargetAsmParser {
     return STI.getFeatureBits() & Mips::FeatureMips64r6;
   }
 
-  bool parseRegister(unsigned &RegNum);
-
   bool eatComma(StringRef ErrorStr);
 
   int matchCPURegisterName(StringRef Symbol);
@@ -2342,25 +2340,6 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
   return false;
 }
 
-bool MipsAsmParser::parseRegister(unsigned &RegNum) {
-  if (!getLexer().is(AsmToken::Dollar))
-    return false;
-
-  Parser.Lex();
-
-  const AsmToken &Reg = Parser.getTok();
-  if (Reg.is(AsmToken::Identifier)) {
-    RegNum = matchCPURegisterName(Reg.getIdentifier());
-  } else if (Reg.is(AsmToken::Integer)) {
-    RegNum = Reg.getIntVal();
-  } else {
-    return false;
-  }
-
-  Parser.Lex();
-  return true;
-}
-
 bool MipsAsmParser::eatComma(StringRef ErrorStr) {
   if (getLexer().isNot(AsmToken::Comma)) {
     SMLoc Loc = getLexer().getLoc();
@@ -2400,23 +2379,48 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
   unsigned Save;
   bool SaveIsReg = true;
 
-  if (!parseRegister(FuncReg))
-    return reportParseError("expected register containing function address");
-  FuncReg = getGPR(FuncReg);
+  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg;
+  OperandMatchResultTy ResTy = ParseAnyRegister(TmpReg);
+  if (ResTy == MatchOperand_NoMatch) {
+    reportParseError("expected register containing function address");
+    Parser.eatToEndOfStatement();
+    return false;
+  }
+
+  MipsOperand &FuncRegOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+  if (!FuncRegOpnd.isGPRAsmReg()) {
+    reportParseError(FuncRegOpnd.getStartLoc(), "invalid register");
+    Parser.eatToEndOfStatement();
+    return false;
+  }
+
+  FuncReg = FuncRegOpnd.getGPR32Reg();
+  TmpReg.clear();
 
   if (!eatComma("expected comma parsing directive"))
     return true;
 
-  if (!parseRegister(Save)) {
+  ResTy = ParseAnyRegister(TmpReg);
+  if (ResTy == MatchOperand_NoMatch) {
     const AsmToken &Tok = Parser.getTok();
     if (Tok.is(AsmToken::Integer)) {
       Save = Tok.getIntVal();
       SaveIsReg = false;
       Parser.Lex();
-    } else
-      return reportParseError("expected save register or stack offset");
-  } else
-    Save = getGPR(Save);
+    } else {
+      reportParseError("expected save register or stack offset");
+      Parser.eatToEndOfStatement();
+      return false;
+    }
+  } else {
+    MipsOperand &SaveOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+    if (!SaveOpnd.isGPRAsmReg()) {
+      reportParseError(SaveOpnd.getStartLoc(), "invalid register");
+      Parser.eatToEndOfStatement();
+      return false;
+    }
+    Save = SaveOpnd.getGPR32Reg();
+  }
 
   if (!eatComma("expected comma parsing directive"))
     return true;
diff --git a/test/MC/Mips/cpsetup-bad.s b/test/MC/Mips/cpsetup-bad.s
new file mode 100644
index 000000000000..09252a1310ed
--- /dev/null
+++ b/test/MC/Mips/cpsetup-bad.s
@@ -0,0 +1,14 @@
+# RUN: not llvm-mc %s -triple mips64-unknown-unknown 2>%t1
+# RUN:   FileCheck %s < %t1 -check-prefix=ASM
+
+        .text
+        .option pic2
+t1:
+        .cpsetup $bar, 8, __cerror
+# ASM: :[[@LINE-1]]:18: error: expected register containing function address
+        .cpsetup $33, 8, __cerror
+# ASM: :[[@LINE-1]]:18: error: invalid register
+        .cpsetup $31, foo, __cerror
+# ASM: :[[@LINE-1]]:23: error: expected save register or stack offset
+        .cpsetup $31, $32, __cerror
+# ASM: :[[@LINE-1]]:23: error: invalid register

From 86f9d299d7a93df88c2c2509c20f0f6191683876 Mon Sep 17 00:00:00 2001
From: Matheus Almeida <matheus.almeida@imgtec.com>
Date: Wed, 18 Jun 2014 13:30:57 +0000
Subject: [PATCH 039/157] [mips] Implement alias for 'and' and 'or'
 instructions for all ISAs.

Summary:
Examples:
and $2, 4 <=> andi $2, $2, 4
or $2, 4 <=> ori $2, $2, 4

Reviewers: dsanders

Reviewed By: dsanders

Differential Revision: http://reviews.llvm.org/D4155

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211161 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/MipsInstrInfo.td | 4 ++++
 test/MC/Mips/mips1/valid.s       | 2 ++
 test/MC/Mips/mips2/valid.s       | 2 ++
 test/MC/Mips/mips3/valid.s       | 2 ++
 test/MC/Mips/mips32/valid.s      | 2 ++
 test/MC/Mips/mips32r2/valid.s    | 2 ++
 test/MC/Mips/mips32r6/valid.s    | 2 ++
 test/MC/Mips/mips4/valid.s       | 2 ++
 test/MC/Mips/mips5/valid.s       | 2 ++
 test/MC/Mips/mips64/valid.s      | 2 ++
 test/MC/Mips/mips64r2/valid.s    | 2 ++
 test/MC/Mips/mips64r6/valid.s    | 2 ++
 12 files changed, 26 insertions(+)

diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 6e8e4980109c..7c13f9a9c8d7 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1419,6 +1419,8 @@ def : MipsInstAlias<"add $rs, $rt, $imm",
                     (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
 def : MipsInstAlias<"and $rs, $rt, $imm",
                     (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+def : MipsInstAlias<"and $rs, $imm",
+                    (ANDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>;
 def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
 let Predicates = [NotInMicroMips] in {
 def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
@@ -1442,6 +1444,8 @@ def : MipsInstAlias<"xor $rs, $rt, $imm",
                     (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
 def : MipsInstAlias<"or $rs, $rt, $imm",
                     (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
+def : MipsInstAlias<"or $rs, $imm",
+                    (ORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
 def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
 def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
 def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
diff --git a/test/MC/Mips/mips1/valid.s b/test/MC/Mips/mips1/valid.s
index aacbfe5401fd..7cfcc3cf32f1 100644
--- a/test/MC/Mips/mips1/valid.s
+++ b/test/MC/Mips/mips1/valid.s
@@ -11,6 +11,7 @@
         addi      $13,$9,26322
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
@@ -73,6 +74,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         sb        $s6,-19857($14)
         sh        $14,-6704($15)
         sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index fbbc983ecd46..109752551de4 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -11,6 +11,7 @@
         addi      $13,$9,26322
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
@@ -81,6 +82,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         round.w.d $f6,$f4
         round.w.s $f27,$f28
         sb        $s6,-19857($14)
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index 7bd45a8be45a..6dfef03cca07 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -11,6 +11,7 @@
         addi      $13,$9,26322
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
@@ -134,6 +135,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
         round.w.d $f6,$f4
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index a2355ef9a65f..3399c9ddd5a3 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -11,6 +11,7 @@
         addi      $13,$9,26322
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -107,6 +108,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.w.d $f6,$f4
         round.w.s $f27,$f28
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index c814788744d3..341b434603bc 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -11,6 +11,7 @@
         addi      $13,$9,26322
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -127,6 +128,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4           # encoding: [0x34,0x42,0x00,0x04]
         pause                          # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)           # encoding: [0xcc,0xa1,0x00,0x08]
         rdhwr     $sp,$11              
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index 96cbcc832055..42263d4672a6 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -15,6 +15,7 @@
 
         .set noat
         # FIXME: Add the instructions carried forward from older ISA's
+        and     $2,4             # CHECK: andi $2, $2, 4      # encoding: [0x30,0x42,0x00,0x04]
         addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0xec,0x80,0x00,0x19]
         align   $4, $2, $3, 2    # CHECK: align $4, $2, $3, 2 # encoding: [0x7c,0x43,0x22,0xa0]
         aluipc  $3, 56           # CHECK: aluipc $3, 56       # encoding: [0xec,0x7f,0x00,0x38]
@@ -122,6 +123,7 @@
         maxa.d  $f0, $f2, $f4    # CHECK: maxa.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1f]
         mina.s  $f0, $f2, $f4    # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e]
         mina.d  $f0, $f2, $f4    # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e]
+        or      $2, 4            # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         seleqz.s $f0, $f2, $f4   # CHECK: seleqz.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x14]
         seleqz.d $f0, $f2, $f4   # CHECK: seleqz.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x14]
         selnez.s $f0, $f2, $f4   # CHECK: selnez.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x17]
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 56e0be6fd0a1..23f2fec5b256 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -11,6 +11,7 @@
         addi      $13,$9,26322
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -150,6 +151,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index 1505b3c0d8af..b84d4bba9c18 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -11,6 +11,7 @@
         addi      $13,$9,26322
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -151,6 +152,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index f7d882a4edcf..5e24e74b1db8 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -11,6 +11,7 @@
         addi      $13,$9,26322
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -165,6 +166,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 6ee24c2acec8..7a1273ced626 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -11,6 +11,7 @@
         addi      $13,$9,26322
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -185,6 +186,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4           # encoding: [0x34,0x42,0x00,0x04]
         pause                          # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)           # encoding: [0xcc,0xa1,0x00,0x08]
         rdhwr     $sp,$11
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index 1a51f8687997..e9df27558857 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -15,6 +15,7 @@
 
         .set noat
         # FIXME: Add the instructions carried forward from older ISA's
+        and     $2,4           # CHECK: andi $2, $2, 4        # encoding: [0x30,0x42,0x00,0x04]
         addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0xec,0x80,0x00,0x19]
         align   $4, $2, $3, 2    # CHECK: align $4, $2, $3, 2 # encoding: [0x7c,0x43,0x22,0xa0]
         aluipc  $3, 56           # CHECK: aluipc $3, 56       # encoding: [0xec,0x7f,0x00,0x38]
@@ -136,6 +137,7 @@
         maxa.d  $f0, $f2, $f4    # CHECK: maxa.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1f]
         mina.s  $f0, $f2, $f4    # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e]
         mina.d  $f0, $f2, $f4    # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e]
+        or      $2, 4            # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         seleqz.s $f0, $f2, $f4   # CHECK: seleqz.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x14]
         seleqz.d $f0, $f2, $f4   # CHECK: seleqz.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x14]
         selnez.s $f0, $f2, $f4   # CHECK: selnez.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x17]

From 20887ffc80ef65deef446f702b903a7f1cab7d69 Mon Sep 17 00:00:00 2001
From: Matheus Almeida <matheus.almeida@imgtec.com>
Date: Wed, 18 Jun 2014 13:55:18 +0000
Subject: [PATCH 040/157] [mips] Update MipsAsmParser so that it's possible to
 handle immediates that start with the binary operator NOT (~).

Reviewers: dsanders

Reviewed By: dsanders

Differential Revision: http://reviews.llvm.org/D4158

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211163 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 2 ++
 test/MC/Mips/mips-expansions.s              | 3 +++
 test/MC/Mips/mips1/valid.s                  | 1 +
 test/MC/Mips/mips2/valid.s                  | 1 +
 test/MC/Mips/mips3/valid.s                  | 1 +
 test/MC/Mips/mips32/valid.s                 | 1 +
 test/MC/Mips/mips32r2/valid.s               | 1 +
 test/MC/Mips/mips4/valid.s                  | 1 +
 test/MC/Mips/mips5/valid.s                  | 1 +
 test/MC/Mips/mips64/valid.s                 | 1 +
 test/MC/Mips/mips64r2/valid.s               | 1 +
 11 files changed, 14 insertions(+)

diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 1c1a8265dee2..70a7d341d3f6 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -1480,6 +1480,7 @@ bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) {
   case AsmToken::Minus:
   case AsmToken::Plus:
   case AsmToken::Integer:
+  case AsmToken::Tilde:
   case AsmToken::String: {
     DEBUG(dbgs() << ".. generic integer\n");
     OperandMatchResultTy ResTy = ParseImm(Operands);
@@ -1904,6 +1905,7 @@ MipsAsmParser::ParseImm(OperandVector &Operands) {
   case AsmToken::Minus:
   case AsmToken::Plus:
   case AsmToken::Integer:
+  case AsmToken::Tilde:
   case AsmToken::String:
     break;
   }
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index 1622965a4139..af80ccaa788f 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -8,6 +8,8 @@
 # CHECK: addiu   $6, $zero, -2345    # encoding: [0xd7,0xf6,0x06,0x24]
 # CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
 # CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
+# CHECK: addiu   $8, $zero, -8       # encoding: [0xf8,0xff,0x08,0x24]
+
 # CHECK: addiu   $4, $zero, 20       # encoding: [0x14,0x00,0x04,0x24]
 # CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
 # CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
@@ -35,6 +37,7 @@
     li $5,123
     li $6,-2345
     li $7,65538
+    li $8, ~7
 
     la $a0, 20
     la $7,65538
diff --git a/test/MC/Mips/mips1/valid.s b/test/MC/Mips/mips1/valid.s
index 7cfcc3cf32f1..66e11ba2fe52 100644
--- a/test/MC/Mips/mips1/valid.s
+++ b/test/MC/Mips/mips1/valid.s
@@ -9,6 +9,7 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index 109752551de4..d4f48ec8d64d 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -9,6 +9,7 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index 6dfef03cca07..a68dc91420b2 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -9,6 +9,7 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index 3399c9ddd5a3..68db86e91c74 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -9,6 +9,7 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index 341b434603bc..b92facf666e9 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -9,6 +9,7 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 23f2fec5b256..2086a12602a8 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -9,6 +9,7 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index b84d4bba9c18..3f350b1d368f 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -9,6 +9,7 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index 5e24e74b1db8..3f4149a8f5a3 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -9,6 +9,7 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 7a1273ced626..df144921a740 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -9,6 +9,7 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]

From c52345c0fce6ffdef4f4e96b27f3bff3e365834c Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Wed, 18 Jun 2014 14:04:37 +0000
Subject: [PATCH 041/157] Add pattern for unsigned v4i32->v4f64 convert on
 AVX512.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211164 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86InstrAVX512.td | 4 ++++
 test/CodeGen/X86/avx512-cvt.ll   | 8 ++++++++
 2 files changed, 12 insertions(+)

diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index c132663f473e..4b9f560597fe 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -3174,6 +3174,10 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
            (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
 
+def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
+          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
+           (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
+
 def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
                    (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
           (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 2476ea1253e6..f5cda96b99fa 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -192,6 +192,14 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind {
   ret <16 x double> %b
 }
 
+; CHECK-LABEL: uitof64_256
+; CHECK: vcvtudq2pd
+; CHECK: ret
+define <4 x double> @uitof64_256(<4 x i32> %a) nounwind {
+  %b = uitofp <4 x i32> %a to <4 x double>
+  ret <4 x double> %b
+}
+
 ; CHECK-LABEL: uitof32
 ; CHECK: vcvtudq2ps
 ; CHECK: ret

From 7047ad9a087fc0a52118977b00184b749af5be7d Mon Sep 17 00:00:00 2001
From: Matheus Almeida <matheus.almeida@imgtec.com>
Date: Wed, 18 Jun 2014 14:15:42 +0000
Subject: [PATCH 042/157] [mips] Access $at only if necessary.

Summary:
This patch doesn't really change the logic behind expandMemInst but it allows
us to assemble .S files that use .set noat with some macros. For example:

.set noat
lw $k0, offset($k1)

Can expand to:
lui	$k0, %hi(offset)
addu	$k0, $k0, $k1
lw	$k0, %lo(offset)($k0)

with no need to access $at.

Reviewers: dsanders, vmedic

Reviewed By: dsanders, vmedic

Differential Revision: http://reviews.llvm.org/D4159

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211165 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 9 ++++++---
 test/MC/Mips/mips-expansions.s              | 2 ++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 70a7d341d3f6..8286f51b3ee2 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -1125,8 +1125,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   unsigned ImmOffset, HiOffset, LoOffset;
   const MCExpr *ExprOffset;
   unsigned TmpRegNum;
-  unsigned AtRegNum = getReg(
-      (isGP64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, getATReg());
   // 1st operand is either the source or destination register.
   assert(Inst.getOperand(0).isReg() && "expected register operand kind");
   unsigned RegOpNum = Inst.getOperand(0).getReg();
@@ -1149,7 +1147,12 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   // 1st instruction in expansion is LUi. For load instruction we can use
   // the dst register as a temporary if base and dst are different,
   // but for stores we must use $at.
-  TmpRegNum = (isLoad && (BaseRegNum != RegOpNum)) ? RegOpNum : AtRegNum;
+  if (isLoad && (BaseRegNum != RegOpNum))
+    TmpRegNum = RegOpNum;
+  else
+    TmpRegNum = getReg(
+      (isGP64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, getATReg());
+
   TempInst.setOpcode(Mips::LUi);
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
   if (isImmOpnd)
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index af80ccaa788f..9925609ecd16 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -44,7 +44,9 @@
     la $a0, 20($a1)
     la $7,65538($8)
 
+    .set noat
     lw  $t2, symbol($a0)
+    .set at
     sw  $t2, symbol($t1)
 
     lw  $t2, 655483($a0)

From f3a4a4bb5681381014ccfbaebe774667b8e65dad Mon Sep 17 00:00:00 2001
From: Zoran Jovanovic <zoran.jovanovic@imgtec.com>
Date: Wed, 18 Jun 2014 14:36:00 +0000
Subject: [PATCH 043/157] [mips][mips64r6] Add BLTC and BLTUC instructions
 Differential Revision: http://reviews.llvm.org/D3923

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211167 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Mips/Disassembler/MipsDisassembler.cpp    | 19 +++++++++++++++----
 lib/Target/Mips/Mips32r6InstrInfo.td          | 12 ++++++++++--
 test/MC/Disassembler/Mips/mips32r6.txt        |  2 ++
 test/MC/Disassembler/Mips/mips64r6.txt        |  2 ++
 test/MC/Mips/mips32r6/valid.s                 |  2 ++
 test/MC/Mips/mips64r6/valid.s                 |  2 ++
 6 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 908166f0a7b3..902b87759dc0 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -576,6 +576,8 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
   //      BLTZC   if rs == rt && rt != 0
   //      BLTC    if rs != rt && rs != 0  && rt != 0
 
+  bool HasRs = false;
+
   InsnType Rs = fieldFromInstruction(insn, 21, 5);
   InsnType Rt = fieldFromInstruction(insn, 16, 5);
   InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
@@ -586,8 +588,14 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
     MI.setOpcode(Mips::BGTZC);
   else if (Rs == Rt)
     MI.setOpcode(Mips::BLTZC);
-  else
-    return MCDisassembler::Fail; // FIXME: BLTC is not implemented yet.
+  else {
+    MI.setOpcode(Mips::BLTC);
+    HasRs = true;
+  }
+
+  if (HasRs)
+    MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+                                              Rs)));
 
   MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
                                      Rt)));
@@ -627,8 +635,11 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
   } else if (Rs == Rt) {
     MI.setOpcode(Mips::BLTZALC);
     HasRs = true;
-  } else
-    return MCDisassembler::Fail; // BLTUC not implemented yet
+  } else {
+    MI.setOpcode(Mips::BLTUC);
+    HasRs = true;
+    HasRt = true;
+  }
 
   if (HasRs)
     MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 47dafcd29608..52e12fcf9462 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -77,6 +77,11 @@ class BGEZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZL>,
 class BGTZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BGTZ>,
                     DecodeDisambiguatedBy<"BgtzGroupBranch">;
 
+class BLTC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BGTZL>,
+                 DecodeDisambiguatedBy<"BgtzlGroupBranch">;
+class BLTUC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BGTZ>,
+                  DecodeDisambiguatedBy<"BgtzGroupBranch">;
+
 class BLEZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZL>,
                   DecodeDisambiguatedBy<"BlezlGroupBranch">;
 class BLTZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BGTZ>,
@@ -329,6 +334,9 @@ class BGEUC_DESC : CMP_BC_DESC_BASE<"bgeuc", brtarget, GPR32Opnd>;
 class BEQC_DESC : CMP_BC_DESC_BASE<"beqc", brtarget, GPR32Opnd>;
 class BNEC_DESC : CMP_BC_DESC_BASE<"bnec", brtarget, GPR32Opnd>;
 
+class BLTC_DESC : CMP_BC_DESC_BASE<"bltc", brtarget, GPR32Opnd>;
+class BLTUC_DESC : CMP_BC_DESC_BASE<"bltuc", brtarget, GPR32Opnd>;
+
 class BLTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR32Opnd>;
 class BGEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR32Opnd>;
 
@@ -636,8 +644,8 @@ def BGTZC : BGTZC_ENC, BGTZC_DESC, ISA_MIPS32R6;
 def BITSWAP : BITSWAP_ENC, BITSWAP_DESC, ISA_MIPS32R6;
 def BLEZALC : BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6;
 def BLEZC : BLEZC_ENC, BLEZC_DESC, ISA_MIPS32R6;
-def BLTC; // Also aliased to bgtc with operands swapped
-def BLTUC; // Also aliased to bgtuc with operands swapped
+def BLTC : BLTC_ENC, BLTC_DESC, ISA_MIPS32R6;
+def BLTUC : BLTUC_ENC, BLTUC_DESC, ISA_MIPS32R6;
 def BLTZALC : BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6;
 def BLTZC : BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6;
 def BNEC : BNEC_ENC, BNEC_DESC, ISA_MIPS32R6;
diff --git a/test/MC/Disassembler/Mips/mips32r6.txt b/test/MC/Disassembler/Mips/mips32r6.txt
index f64bcfd2a12f..35ddd56b0afa 100644
--- a/test/MC/Disassembler/Mips/mips32r6.txt
+++ b/test/MC/Disassembler/Mips/mips32r6.txt
@@ -42,6 +42,8 @@
 0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
 0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
 0x18 0x02 0x01 0x4d # CHECK: blezalc $2,
+0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
+0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
 0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
 0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
 0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
diff --git a/test/MC/Disassembler/Mips/mips64r6.txt b/test/MC/Disassembler/Mips/mips64r6.txt
index a2ef0d6d8f22..951cc5373c0e 100644
--- a/test/MC/Disassembler/Mips/mips64r6.txt
+++ b/test/MC/Disassembler/Mips/mips64r6.txt
@@ -42,6 +42,8 @@
 0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
 0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
 0x18 0x02 0x01 0x4d # CHECK: blezalc $2,
+0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
+0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
 0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
 0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
 0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index 42263d4672a6..b0f0b5c48229 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -53,6 +53,8 @@
         bgtzc $5, 256            # CHECK: bgtzc $5, 256       # encoding: [0x5c,0x05,0x00,0x40]
         bitswap $4, $2           # CHECK: bitswap $4, $2      # encoding: [0x7c,0x02,0x20,0x20]
         blezalc $2, 1332         # CHECK: blezalc $2, 1332    # encoding: [0x18,0x02,0x01,0x4d]
+        bltc $5, $6, 256         # CHECK: bltc $5, $6, 256    # encoding: [0x5c,0xa6,0x00,0x40]
+        bltuc $5, $6, 256        # CHECK: bltuc $5, $6, 256   # encoding: [0x1c,0xa6,0x00,0x40]
         # bnvc requires that rs >= rt but we accept both. See also bnec
         bnvc     $0, $0, 4       # CHECK: bnvc $zero, $zero, 4 # encoding: [0x60,0x00,0x00,0x01]
         bnvc     $2, $0, 4       # CHECK: bnvc $2, $zero, 4    # encoding: [0x60,0x40,0x00,0x01]
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index e9df27558857..c16752f7fb5b 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -53,6 +53,8 @@
         bgtzc $5, 256            # CHECK: bgtzc $5, 256       # encoding: [0x5c,0x05,0x00,0x40]
         bitswap $4, $2           # CHECK: bitswap $4, $2      # encoding: [0x7c,0x02,0x20,0x20]
         blezalc $2, 1332         # CHECK: blezalc $2, 1332    # encoding: [0x18,0x02,0x01,0x4d]
+        bltc $5, $6, 256         # CHECK: bltc $5, $6, 256    # encoding: [0x5c,0xa6,0x00,0x40]
+        bltuc $5, $6, 256        # CHECK: bltuc $5, $6, 256   # encoding: [0x1c,0xa6,0x00,0x40]
         # bnvc requires that rs >= rt but we accept both. See also bnec
         bnvc     $0, $0, 4       # CHECK: bnvc $zero, $zero, 4 # encoding: [0x60,0x00,0x00,0x01]
         bnvc     $2, $0, 4       # CHECK: bnvc $2, $zero, 4    # encoding: [0x60,0x40,0x00,0x01]

From cacc0625726e9fba539201543a770b4795c22792 Mon Sep 17 00:00:00 2001
From: Matheus Almeida <matheus.almeida@imgtec.com>
Date: Wed, 18 Jun 2014 14:46:05 +0000
Subject: [PATCH 044/157] [mips] Report correct location when "erroring" about
 the use of $at when it's not available.

Summary: This removes the FIXMEs from test/MC/Mips/mips-noat.s.

Reviewers: dsanders

Reviewed By: dsanders

Differential Revision: http://reviews.llvm.org/D4172

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211168 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 19 +++++++++++++------
 test/MC/Mips/mips-noat.s                    | 11 +++--------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 8286f51b3ee2..4a226a9f9d8f 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -203,7 +203,7 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   unsigned getGPR(int RegNo);
 
-  int getATReg();
+  int getATReg(SMLoc Loc);
 
   bool processInstruction(MCInst &Inst, SMLoc IDLoc,
                           SmallVectorImpl<MCInst> &Instructions);
@@ -1149,9 +1149,15 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   // but for stores we must use $at.
   if (isLoad && (BaseRegNum != RegOpNum))
     TmpRegNum = RegOpNum;
-  else
-    TmpRegNum = getReg(
-      (isGP64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, getATReg());
+  else {
+    int AT = getATReg(IDLoc);
+    // At this point we need AT to perform the expansions and we exit if it is
+    // not available.
+    if (!AT)
+      return;
+    TmpRegNum =
+        getReg((isGP64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT);
+  }
 
   TempInst.setOpcode(Mips::LUi);
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
@@ -1408,10 +1414,11 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
   return true;
 }
 
-int MipsAsmParser::getATReg() {
+int MipsAsmParser::getATReg(SMLoc Loc) {
   int AT = Options.getATRegNum();
   if (AT == 0)
-    TokError("Pseudo instruction requires $at, which is not available");
+    reportParseError(Loc,
+                     "Pseudo instruction requires $at, which is not available");
   return AT;
 }
 
diff --git a/test/MC/Mips/mips-noat.s b/test/MC/Mips/mips-noat.s
index b83c51780776..07db251b0506 100644
--- a/test/MC/Mips/mips-noat.s
+++ b/test/MC/Mips/mips-noat.s
@@ -10,11 +10,10 @@
 test1:
         lw      $2, 65536($2)
 
-# FIXME: It would be better if the error pointed at the mnemonic instead of the newline
-# ERROR: mips-noat.s:[[@LINE+4]]:1: error: Pseudo instruction requires $at, which is not available
 test2:
         .set noat
-        lw      $2, 65536($2)
+        lw      $2, 65536($2) # ERROR: mips-noat.s:[[@LINE]]:9: error: Pseudo instruction requires $at, which is not available
+
 
 # Can we switch it back on successfully?
 # CHECK-LABEL: test3:
@@ -25,10 +24,6 @@ test3:
         .set at
         lw      $2, 65536($2)
 
-# FIXME: It would be better if the error pointed at the mnemonic instead of the newline
-# ERROR: mips-noat.s:[[@LINE+4]]:1: error: Pseudo instruction requires $at, which is not available
 test4:
         .set at=$0
-        lw      $2, 65536($2)
-
-# ERROR-NOT: error
+        lw      $2, 65536($2) # ERROR: mips-noat.s:[[@LINE]]:9: error: Pseudo instruction requires $at, which is not available

From c39b18b306c7681cb155127096a56afbea24a08e Mon Sep 17 00:00:00 2001
From: Matheus Almeida <matheus.almeida@imgtec.com>
Date: Wed, 18 Jun 2014 14:49:56 +0000
Subject: [PATCH 045/157] [mips] Fix expansion of memory operation if
 destination register is not a GPR.

Summary:
The assembler tries to reuse the destination register for memory operations whenever
it can but it's not possible to do so if the destination register is not a GPR.

Example:
  ldc1 $f0, sym
should expand to:
  lui $at, %hi(sym)
  ldc1 $f0, %lo(sym)($at)

It's entirely wrong to expand to:
  lui $f0, %hi(sym)
  ldc1 $f0, %lo(sym)($f0)

Reviewers: dsanders

Reviewed By: dsanders

Differential Revision: http://reviews.llvm.org/D4173

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211169 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 33 ++++++++++++++++++---
 test/MC/Mips/mips-expansions.s              |  8 +++++
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 4a226a9f9d8f..dd2b857789ca 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -1144,10 +1144,35 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
     ExprOffset = Inst.getOperand(2).getExpr();
   // All instructions will have the same location.
   TempInst.setLoc(IDLoc);
-  // 1st instruction in expansion is LUi. For load instruction we can use
-  // the dst register as a temporary if base and dst are different,
-  // but for stores we must use $at.
-  if (isLoad && (BaseRegNum != RegOpNum))
+  // These are some of the types of expansions we perform here:
+  // 1) lw $8, sym        => lui $8, %hi(sym)
+  //                         lw $8, %lo(sym)($8)
+  // 2) lw $8, offset($9) => lui $8, %hi(offset)
+  //                         add $8, $8, $9
+  //                         lw $8, %lo(offset)($9)
+  // 3) lw $8, offset($8) => lui $at, %hi(offset)
+  //                         add $at, $at, $8
+  //                         lw $8, %lo(offset)($at)
+  // 4) sw $8, sym        => lui $at, %hi(sym)
+  //                         sw $8, %lo(sym)($at)
+  // 5) sw $8, offset($8) => lui $at, %hi(offset)
+  //                         add $at, $at, $8
+  //                         sw $8, %lo(offset)($at)
+  // 6) ldc1 $f0, sym     => lui $at, %hi(sym)
+  //                         ldc1 $f0, %lo(sym)($at)
+  //
+  // For load instructions we can use the destination register as a temporary
+  // if base and dst are different (examples 1 and 2) and if the base register
+  // is general purpose otherwise we must use $at (example 6) and error if it's
+  // not available. For stores we must use $at (examples 4 and 5) because we
+  // must not clobber the source register setting up the offset.
+  const MCInstrDesc &Desc = getInstDesc(Inst.getOpcode());
+  int16_t RegClassOp0 = Desc.OpInfo[0].RegClass;
+  unsigned RegClassIDOp0 =
+      getContext().getRegisterInfo()->getRegClass(RegClassOp0).getID();
+  bool IsGPR = (RegClassIDOp0 == Mips::GPR32RegClassID) ||
+               (RegClassIDOp0 == Mips::GPR64RegClassID);
+  if (isLoad && IsGPR && (BaseRegNum != RegOpNum))
     TmpRegNum = RegOpNum;
   else {
     int AT = getATReg(IDLoc);
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index 9925609ecd16..f0a04a5a2515 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -34,6 +34,11 @@
 # CHECK: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
 # CHECK: sw      $10, 57920($1)          # encoding: [0x40,0xe2,0x2a,0xac]
 
+# CHECK: lui     $1, %hi(symbol)
+# CHECK: ldc1    $f0, %lo(symbol)($1)
+# CHECK: lui     $1, %hi(symbol)
+# CHECK: sdc1    $f0, %lo(symbol)($1)
+
     li $5,123
     li $6,-2345
     li $7,65538
@@ -51,3 +56,6 @@
 
     lw  $t2, 655483($a0)
     sw  $t2, 123456($t1)
+
+    ldc1 $f0, symbol
+    sdc1 $f0, symbol

From 3fbbe94b9320d29d9be8e8064447c42b6c353352 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Wed, 18 Jun 2014 15:15:49 +0000
Subject: [PATCH 046/157] Support LE in RelocVisitor::visitELF_PPC64_*

Since we now support both LE and BE PPC64 variants, use of getAddend64BE
is no longer correct.  Use the generic getELFRelocationAddend instead,
as was already done for Mips.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211170 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Object/RelocVisitor.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
index a3aaf17f1d66..5ca245057a55 100644
--- a/include/llvm/Object/RelocVisitor.h
+++ b/include/llvm/Object/RelocVisitor.h
@@ -253,12 +253,14 @@ class RelocVisitor {
 
   /// PPC64 ELF
   RelocToApply visitELF_PPC64_ADDR32(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getAddend64BE(R);
+    int64_t Addend;
+    getELFRelocationAddend(R, Addend);
     uint32_t Res = (Value + Addend) & 0xFFFFFFFF;
     return RelocToApply(Res, 4);
   }
   RelocToApply visitELF_PPC64_ADDR64(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getAddend64BE(R);
+    int64_t Addend;
+    getELFRelocationAddend(R, Addend);
     return RelocToApply(Value + Addend, 8);
   }
 

From b7074b8c2d30631b293c13d2279fe2f031ca9726 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Wed, 18 Jun 2014 15:37:07 +0000
Subject: [PATCH 047/157] [PowerPC] Fix emitting instruction pairs on LE

My patch r204634 to emit instructions in little-endian format failed to
handle those special cases where we emit a pair of instructions from a
single LLVM MC instructions (like the bl; nop pairs used to implement
the call sequence).

In those cases, we still need to emit the "first" instruction (the one
in the more significant word) first, on both big and little endian,
and not swap them.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211171 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 46 +++++++++++++++----
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index a4983ad67235..435a93f78c1d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -102,17 +102,45 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
 
     // Output the constant in big/little endian byte order.
     unsigned Size = Desc.getSize();
-    if (IsLittleEndian) {
-      for (unsigned i = 0; i != Size; ++i) {
-        OS << (char)Bits;
-        Bits >>= 8;
+    switch (Size) {
+    case 4:
+      if (IsLittleEndian) {
+        OS << (char)(Bits);
+        OS << (char)(Bits >> 8);
+        OS << (char)(Bits >> 16);
+        OS << (char)(Bits >> 24);
+      } else {
+        OS << (char)(Bits >> 24);
+        OS << (char)(Bits >> 16);
+        OS << (char)(Bits >> 8);
+        OS << (char)(Bits);
       }
-    } else {
-      int ShiftValue = (Size * 8) - 8;
-      for (unsigned i = 0; i != Size; ++i) {
-        OS << (char)(Bits >> ShiftValue);
-        Bits <<= 8;
+      break;
+    case 8:
+      // If we emit a pair of instructions, the first one is
+      // always in the top 32 bits, even on little-endian.
+      if (IsLittleEndian) {
+        OS << (char)(Bits >> 32);
+        OS << (char)(Bits >> 40);
+        OS << (char)(Bits >> 48);
+        OS << (char)(Bits >> 56);
+        OS << (char)(Bits);
+        OS << (char)(Bits >> 8);
+        OS << (char)(Bits >> 16);
+        OS << (char)(Bits >> 24);
+      } else {
+        OS << (char)(Bits >> 56);
+        OS << (char)(Bits >> 48);
+        OS << (char)(Bits >> 40);
+        OS << (char)(Bits >> 32);
+        OS << (char)(Bits >> 24);
+        OS << (char)(Bits >> 16);
+        OS << (char)(Bits >> 8);
+        OS << (char)(Bits);
       }
+      break;
+    default:
+      llvm_unreachable ("Invalid instruction size");
     }
     
     ++MCNumEmitted;  // Keep track of the # of mi's emitted.

From aa6a13a8e2dc019ce17a68c2d24150d9b22376c4 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Wed, 18 Jun 2014 15:52:18 +0000
Subject: [PATCH 048/157] Do not XFAIL test/tools/llvm-cov tests on powerpc64le

All tests in test/tools/llvm-cov fail on big-endian targets and are
supposed to be XFAILed there.  However, including "powerpc64" in the
XFAIL line is now incorrect, since that matches both powerpc64- and
powerpc64le- targets, and the tests pass on the latter.

Update the XFAIL lines to use powerpc64- instead (like mips64-).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211172 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/tools/llvm-cov/copy_block_helper.m | 2 +-
 test/tools/llvm-cov/llvm-cov.test       | 2 +-
 test/tools/llvm-cov/range_based_for.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/tools/llvm-cov/copy_block_helper.m b/test/tools/llvm-cov/copy_block_helper.m
index 1859b883775d..64973f11009a 100644
--- a/test/tools/llvm-cov/copy_block_helper.m
+++ b/test/tools/llvm-cov/copy_block_helper.m
@@ -29,4 +29,4 @@ void test(id x) { // GCOV: -:    [[@LINE]]:void test
 int main(int argc, const char *argv[]) { test(0); }
 
 // llvm-cov doesn't work on big endian yet
-// XFAIL: powerpc64, s390x, mips-, mips64-, sparc
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/llvm-cov.test b/test/tools/llvm-cov/llvm-cov.test
index 2345f8d51e80..82d1273ae61f 100644
--- a/test/tools/llvm-cov/llvm-cov.test
+++ b/test/tools/llvm-cov/llvm-cov.test
@@ -110,4 +110,4 @@ RUN: not llvm-cov test.c -gcda=test_file_checksum_fail.gcda
 # Bad function checksum on gcda
 RUN: not llvm-cov test.c -gcda=test_func_checksum_fail.gcda
 
-XFAIL: powerpc64, s390x, mips-, mips64-, sparc
+XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/range_based_for.cpp b/test/tools/llvm-cov/range_based_for.cpp
index 61f60f6b0d65..3fdb2441399c 100644
--- a/test/tools/llvm-cov/range_based_for.cpp
+++ b/test/tools/llvm-cov/range_based_for.cpp
@@ -26,4 +26,4 @@ int main(int argc, const char *argv[]) { // GCOV: 1:    [[@LINE]]:int main(
 }                                        // GCOV: -:    [[@LINE]]:}
 
 // llvm-cov doesn't work on big endian yet
-// XFAIL: powerpc64, s390x, mips-, mips64-, sparc
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc

From 336da8cdc506c4f13e9a37ad33a6c0be83e4c5f4 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Wed, 18 Jun 2014 16:14:04 +0000
Subject: [PATCH 049/157] [PowerPC] Do not use BLA with the 64-bit SVR4 ABI

The PowerPC back-end uses BLA to implement calls to functions at
known-constant addresses, which is apparently used for certain
system routines on Darwin.

However, with the 64-bit SVR4 ABI, this is actually incorrect.
An immediate function pointer value on this platform is not
directly usable as a target address for BLA:
- in the ELFv1 ABI, the function pointer value refers to the
  *function descriptor*, not the code address
- in the ELFv2 ABI, the function pointer value refers to the
  global entry point, but BL(A) would only be correct when
  calling the *local* entry point

This bug didn't show up since using immediate function pointer
values is not usually done in the 64-bit SVR4 ABI in the first
place.  However, I ran into this issue with a certain use case
of LLVM as JIT, where immediate function pointer values were
uses to implement callbacks from JITted code to helpers in
statically compiled code.

Fixed by simply not using BLA with the 64-bit SVR4 ABI.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211174 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/PowerPC/PPCISelLowering.cpp | 14 +++++++-------
 test/CodeGen/PowerPC/ppc64-calls.ll    |  9 ---------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1db9070267a0..e815d615acb7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3431,11 +3431,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
   unsigned CallOpc = PPCISD::CALL;
 
   bool needIndirectCall = true;
-  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
-    // If this is an absolute destination address, use the munged value.
-    Callee = SDValue(Dest, 0);
-    needIndirectCall = false;
-  }
+  if (!isSVR4ABI || !isPPC64)
+    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+      // If this is an absolute destination address, use the munged value.
+      Callee = SDValue(Dest, 0);
+      needIndirectCall = false;
+    }
 
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
@@ -4383,8 +4384,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
   // pointers in the 64-bit SVR4 ABI.
   if (!isTailCall &&
       !dyn_cast<GlobalAddressSDNode>(Callee) &&
-      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
-      !isBLACompatibleAddress(Callee, DAG)) {
+      !dyn_cast<ExternalSymbolSDNode>(Callee)) {
     // Load r2 into a virtual register and store it to the TOC save area.
     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
     // TOC save area offset.
diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll
index 1f3bb7111efd..396a5e25caf7 100644
--- a/test/CodeGen/PowerPC/ppc64-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -42,15 +42,6 @@ define void @test_indirect(void ()* nocapture %fp) nounwind {
   ret void
 }
 
-; Absolute vales should be have the TOC restore 'nop'
-define void @test_abs() nounwind {
-; CHECK-LABEL: test_abs:
-  tail call void inttoptr (i64 1024 to void ()*)() nounwind
-; CHECK: bla 1024
-; CHECK-NEXT: nop
-  ret void
-}
-
 declare double @sin(double) nounwind
 
 ; External functions call should also have a 'nop'

From 7fc69597b78d96a42fb96f406b1f96232fe848dc Mon Sep 17 00:00:00 2001
From: Adam Nemet <anemet@apple.com>
Date: Wed, 18 Jun 2014 16:51:07 +0000
Subject: [PATCH 050/157] [X86] AVX512: Specify compressed displacement for
 vmovntdqa

Use the max 64-bit element size with EVEX_CD8.  This should work since element
size is ignored for a full-vector access (FVM).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211175 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86InstrAVX512.td | 2 +-
 test/MC/X86/avx512-encodings.s   | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 4b9f560597fe..618f42abe94e 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1798,7 +1798,7 @@ def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
                             "vmovntdqa\t{$src, $dst|$dst, $src}",
                             [(set VR512:$dst,
                               (int_x86_avx512_movntdqa addr:$src))]>,
-                   EVEX, EVEX_V512;
+                   EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512 - Integer arithmetic
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index f38eaf5402fe..a7a87cd0b405 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -3159,3 +3159,7 @@ vmovntdqa 0x12345678(%rbx), %zmm13
 // CHECK: vmovntdqa
 // CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x14,0x56]
 vmovntdqa (%r14,%rdx,2), %zmm18
+
+// CHECK: vmovntdqa
+// CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x7c,0x14,0x02]
+vmovntdqa 128(%r12,%rdx), %zmm23

From f1b790f7913a07ccc51f47a898ad1bed33bb614d Mon Sep 17 00:00:00 2001
From: Adam Nemet <anemet@apple.com>
Date: Wed, 18 Jun 2014 16:51:10 +0000
Subject: [PATCH 051/157] [X86] AVX512: Add non-temporal stores

Note that I followed the AVX2 convention here and didn't add LLVM intrinsics
for stores.  These can be generated with the nontemporal hint on LLVM IR
stores (see new test). The GCC builtins are lowered directly into nontemporal
stores.

<rdar://problem/17082571>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211176 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86InstrAVX512.td       | 29 ++++++++++++++++++++++++++
 test/CodeGen/X86/avx512-nontemporal.ll | 19 +++++++++++++++++
 test/MC/X86/avx512-encodings.s         | 12 +++++++++++
 3 files changed, 60 insertions(+)
 create mode 100644 test/CodeGen/X86/avx512-nontemporal.ll

diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 618f42abe94e..7cac5ebbece7 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1800,6 +1800,35 @@ def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
                               (int_x86_avx512_movntdqa addr:$src))]>,
                    EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
 
+// Prefer non-temporal over temporal versions
+let AddedComplexity = 400, SchedRW = [WriteStore] in {
+
+def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
+                            (ins f512mem:$dst, VR512:$src),
+                            "vmovntps\t{$src, $dst|$dst, $src}",
+                            [(alignednontemporalstore (v16f32 VR512:$src),
+                                                      addr:$dst)],
+                            IIC_SSE_MOVNT>,
+                  EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
+                            (ins f512mem:$dst, VR512:$src),
+                            "vmovntpd\t{$src, $dst|$dst, $src}",
+                            [(alignednontemporalstore (v8f64 VR512:$src),
+                                                      addr:$dst)],
+			    IIC_SSE_MOVNT>,
+                  EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+
+def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
+                           (ins i512mem:$dst, VR512:$src),
+                           "vmovntdq\t{$src, $dst|$dst, $src}",
+                           [(alignednontemporalstore (v8i64 VR512:$src),
+                                                     addr:$dst)],
+                           IIC_SSE_MOVNT>,
+                  EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+}
+
 //===----------------------------------------------------------------------===//
 // AVX-512 - Integer arithmetic
 //
diff --git a/test/CodeGen/X86/avx512-nontemporal.ll b/test/CodeGen/X86/avx512-nontemporal.ll
new file mode 100644
index 000000000000..ef50cdb82831
--- /dev/null
+++ b/test/CodeGen/X86/avx512-nontemporal.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s
+
+define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) {
+; CHECK: vmovntps %z
+  %cast = bitcast i8* %B to <16 x float>*
+  %A2 = fadd <16 x float> %A, %AA
+  store <16 x float> %A2, <16 x float>* %cast, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+  %cast1 = bitcast i8* %B to <8 x i64>*
+  %E2 = add <8 x i64> %E, %EE
+  store <8 x i64> %E2, <8 x i64>* %cast1, align 64, !nontemporal !0
+; CHECK: vmovntpd %z
+  %cast2 = bitcast i8* %B to <8 x double>*
+  %C2 = fadd <8 x double> %C, %CC
+  store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index a7a87cd0b405..b9674231d950 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -3163,3 +3163,15 @@ vmovntdqa (%r14,%rdx,2), %zmm18
 // CHECK: vmovntdqa
 // CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x7c,0x14,0x02]
 vmovntdqa 128(%r12,%rdx), %zmm23
+
+// CHECK: vmovntdq
+// CHECK: encoding: [0x62,0x21,0x7d,0x48,0xe7,0x24,0xa9]
+vmovntdq %zmm28, (%rcx,%r13,4)
+
+// CHECK: vmovntpd
+// CHECK: encoding: [0x62,0xf1,0xfd,0x48,0x2b,0xb2,0x04,0x00,0x00,0x00]
+vmovntpd %zmm6, 4(%rdx)
+
+// CHECK: vmovntps
+// CHECK: encoding: [0x62,0x51,0x7c,0x48,0x2b,0x5c,0x8d,0x00]
+vmovntps %zmm11, (%r13,%rcx,4)

From 3de1bed8f427a4961b2288cb7db32b6566576c06 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 18 Jun 2014 16:53:48 +0000
Subject: [PATCH 052/157] R600/SI: Comparisons set vcc.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211178 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/SIInstrInfo.td    |  16 ++-
 lib/Target/R600/SIInstructions.td | 204 +++++++++++++++---------------
 2 files changed, 115 insertions(+), 105 deletions(-)

diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index d2a13e2dc39c..c3d4376fbba6 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -363,12 +363,13 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
 }
 
 multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
-                        string opName, ValueType vt, PatLeaf cond> {
-
+                        string opName, ValueType vt, PatLeaf cond, bit defExec = 0> {
   def _e32 : VOPC <
     op, (ins arc:$src0, vrc:$src1),
     opName#"_e32 $dst, $src0, $src1", []
-  >, VOP <opName>;
+  >, VOP <opName> {
+    let Defs = !if(defExec, [VCC, EXEC], [VCC]);
+  }
 
   def _e64 : VOP3 <
     {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
@@ -381,6 +382,7 @@ multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
       [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
     )
   >, VOP <opName> {
+    let Defs = !if(defExec, [EXEC], []);
     let src2 = SIOperand.ZERO;
     let src2_modifiers = 0;
   }
@@ -394,6 +396,14 @@ multiclass VOPC_64 <bits<8> op, string opName,
   ValueType vt = untyped, PatLeaf cond = COND_NULL>
   : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
 
+multiclass VOPCX_32 <bits<8> op, string opName,
+  ValueType vt = untyped, PatLeaf cond = COND_NULL>
+  : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond, 1>;
+
+multiclass VOPCX_64 <bits<8> op, string opName,
+  ValueType vt = untyped, PatLeaf cond = COND_NULL>
+  : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond, 1>;
+
 multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
   op, (outs VReg_32:$dst),
   (ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index f7f02ee347dd..61cd2be967dd 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -331,7 +331,7 @@ def S_CMPK_EQ_I32 : SOPK <
 >;
 */
 
-let isCompare = 1 in {
+let isCompare = 1, Defs = [SCC] in {
 def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
 def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
 def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
@@ -343,7 +343,7 @@ def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
 def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
 def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
 def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
-} // End isCompare = 1
+} // End isCompare = 1, Defs = [SCC]
 
 let Defs = [SCC], isCommutable = 1 in {
   def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
@@ -478,26 +478,26 @@ defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>;
 defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
 defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">;
-defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">;
-defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">;
-defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">;
-defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">;
-defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">;
-defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">;
-defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">;
-defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">;
-defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">;
-defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">;
-defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">;
-defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">;
-defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">;
-defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">;
-defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
+defm V_CMPX_F_F32 : VOPCX_32 <0x00000010, "V_CMPX_F_F32">;
+defm V_CMPX_LT_F32 : VOPCX_32 <0x00000011, "V_CMPX_LT_F32">;
+defm V_CMPX_EQ_F32 : VOPCX_32 <0x00000012, "V_CMPX_EQ_F32">;
+defm V_CMPX_LE_F32 : VOPCX_32 <0x00000013, "V_CMPX_LE_F32">;
+defm V_CMPX_GT_F32 : VOPCX_32 <0x00000014, "V_CMPX_GT_F32">;
+defm V_CMPX_LG_F32 : VOPCX_32 <0x00000015, "V_CMPX_LG_F32">;
+defm V_CMPX_GE_F32 : VOPCX_32 <0x00000016, "V_CMPX_GE_F32">;
+defm V_CMPX_O_F32 : VOPCX_32 <0x00000017, "V_CMPX_O_F32">;
+defm V_CMPX_U_F32 : VOPCX_32 <0x00000018, "V_CMPX_U_F32">;
+defm V_CMPX_NGE_F32 : VOPCX_32 <0x00000019, "V_CMPX_NGE_F32">;
+defm V_CMPX_NLG_F32 : VOPCX_32 <0x0000001a, "V_CMPX_NLG_F32">;
+defm V_CMPX_NGT_F32 : VOPCX_32 <0x0000001b, "V_CMPX_NGT_F32">;
+defm V_CMPX_NLE_F32 : VOPCX_32 <0x0000001c, "V_CMPX_NLE_F32">;
+defm V_CMPX_NEQ_F32 : VOPCX_32 <0x0000001d, "V_CMPX_NEQ_F32">;
+defm V_CMPX_NLT_F32 : VOPCX_32 <0x0000001e, "V_CMPX_NLT_F32">;
+defm V_CMPX_TRU_F32 : VOPCX_32 <0x0000001f, "V_CMPX_TRU_F32">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
 defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>;
@@ -516,26 +516,26 @@ defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>;
 defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
 defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">;
-defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">;
-defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">;
-defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">;
-defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">;
-defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">;
-defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">;
-defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">;
-defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">;
-defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">;
-defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">;
-defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">;
-defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">;
-defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">;
-defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">;
-defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">;
+defm V_CMPX_F_F64 : VOPCX_64 <0x00000030, "V_CMPX_F_F64">;
+defm V_CMPX_LT_F64 : VOPCX_64 <0x00000031, "V_CMPX_LT_F64">;
+defm V_CMPX_EQ_F64 : VOPCX_64 <0x00000032, "V_CMPX_EQ_F64">;
+defm V_CMPX_LE_F64 : VOPCX_64 <0x00000033, "V_CMPX_LE_F64">;
+defm V_CMPX_GT_F64 : VOPCX_64 <0x00000034, "V_CMPX_GT_F64">;
+defm V_CMPX_LG_F64 : VOPCX_64 <0x00000035, "V_CMPX_LG_F64">;
+defm V_CMPX_GE_F64 : VOPCX_64 <0x00000036, "V_CMPX_GE_F64">;
+defm V_CMPX_O_F64 : VOPCX_64 <0x00000037, "V_CMPX_O_F64">;
+defm V_CMPX_U_F64 : VOPCX_64 <0x00000038, "V_CMPX_U_F64">;
+defm V_CMPX_NGE_F64 : VOPCX_64 <0x00000039, "V_CMPX_NGE_F64">;
+defm V_CMPX_NLG_F64 : VOPCX_64 <0x0000003a, "V_CMPX_NLG_F64">;
+defm V_CMPX_NGT_F64 : VOPCX_64 <0x0000003b, "V_CMPX_NGT_F64">;
+defm V_CMPX_NLE_F64 : VOPCX_64 <0x0000003c, "V_CMPX_NLE_F64">;
+defm V_CMPX_NEQ_F64 : VOPCX_64 <0x0000003d, "V_CMPX_NEQ_F64">;
+defm V_CMPX_NLT_F64 : VOPCX_64 <0x0000003e, "V_CMPX_NLT_F64">;
+defm V_CMPX_TRU_F64 : VOPCX_64 <0x0000003f, "V_CMPX_TRU_F64">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
 defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
@@ -554,26 +554,26 @@ defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
 defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
 defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">;
-defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">;
-defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">;
-defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">;
-defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">;
-defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">;
-defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">;
-defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">;
-defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">;
-defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">;
-defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">;
-defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">;
-defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">;
-defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
-defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">;
-defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">;
+defm V_CMPSX_F_F32 : VOPCX_32 <0x00000050, "V_CMPSX_F_F32">;
+defm V_CMPSX_LT_F32 : VOPCX_32 <0x00000051, "V_CMPSX_LT_F32">;
+defm V_CMPSX_EQ_F32 : VOPCX_32 <0x00000052, "V_CMPSX_EQ_F32">;
+defm V_CMPSX_LE_F32 : VOPCX_32 <0x00000053, "V_CMPSX_LE_F32">;
+defm V_CMPSX_GT_F32 : VOPCX_32 <0x00000054, "V_CMPSX_GT_F32">;
+defm V_CMPSX_LG_F32 : VOPCX_32 <0x00000055, "V_CMPSX_LG_F32">;
+defm V_CMPSX_GE_F32 : VOPCX_32 <0x00000056, "V_CMPSX_GE_F32">;
+defm V_CMPSX_O_F32 : VOPCX_32 <0x00000057, "V_CMPSX_O_F32">;
+defm V_CMPSX_U_F32 : VOPCX_32 <0x00000058, "V_CMPSX_U_F32">;
+defm V_CMPSX_NGE_F32 : VOPCX_32 <0x00000059, "V_CMPSX_NGE_F32">;
+defm V_CMPSX_NLG_F32 : VOPCX_32 <0x0000005a, "V_CMPSX_NLG_F32">;
+defm V_CMPSX_NGT_F32 : VOPCX_32 <0x0000005b, "V_CMPSX_NGT_F32">;
+defm V_CMPSX_NLE_F32 : VOPCX_32 <0x0000005c, "V_CMPSX_NLE_F32">;
+defm V_CMPSX_NEQ_F32 : VOPCX_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
+defm V_CMPSX_NLT_F32 : VOPCX_32 <0x0000005e, "V_CMPSX_NLT_F32">;
+defm V_CMPSX_TRU_F32 : VOPCX_32 <0x0000005f, "V_CMPSX_TRU_F32">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
 defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
@@ -622,18 +622,18 @@ defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
 defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>;
 defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">;
-defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">;
-defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">;
-defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">;
-defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">;
-defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">;
-defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">;
-defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
+defm V_CMPX_F_I32 : VOPCX_32 <0x00000090, "V_CMPX_F_I32">;
+defm V_CMPX_LT_I32 : VOPCX_32 <0x00000091, "V_CMPX_LT_I32">;
+defm V_CMPX_EQ_I32 : VOPCX_32 <0x00000092, "V_CMPX_EQ_I32">;
+defm V_CMPX_LE_I32 : VOPCX_32 <0x00000093, "V_CMPX_LE_I32">;
+defm V_CMPX_GT_I32 : VOPCX_32 <0x00000094, "V_CMPX_GT_I32">;
+defm V_CMPX_NE_I32 : VOPCX_32 <0x00000095, "V_CMPX_NE_I32">;
+defm V_CMPX_GE_I32 : VOPCX_32 <0x00000096, "V_CMPX_GE_I32">;
+defm V_CMPX_T_I32 : VOPCX_32 <0x00000097, "V_CMPX_T_I32">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
 defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>;
@@ -644,18 +644,18 @@ defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>;
 defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>;
 defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">;
-defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">;
-defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">;
-defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">;
-defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">;
-defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">;
-defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">;
-defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
+defm V_CMPX_F_I64 : VOPCX_64 <0x000000b0, "V_CMPX_F_I64">;
+defm V_CMPX_LT_I64 : VOPCX_64 <0x000000b1, "V_CMPX_LT_I64">;
+defm V_CMPX_EQ_I64 : VOPCX_64 <0x000000b2, "V_CMPX_EQ_I64">;
+defm V_CMPX_LE_I64 : VOPCX_64 <0x000000b3, "V_CMPX_LE_I64">;
+defm V_CMPX_GT_I64 : VOPCX_64 <0x000000b4, "V_CMPX_GT_I64">;
+defm V_CMPX_NE_I64 : VOPCX_64 <0x000000b5, "V_CMPX_NE_I64">;
+defm V_CMPX_GE_I64 : VOPCX_64 <0x000000b6, "V_CMPX_GE_I64">;
+defm V_CMPX_T_I64 : VOPCX_64 <0x000000b7, "V_CMPX_T_I64">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
 defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>;
@@ -666,18 +666,18 @@ defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>;
 defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>;
 defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">;
-defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">;
-defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">;
-defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">;
-defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">;
-defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">;
-defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">;
-defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
+defm V_CMPX_F_U32 : VOPCX_32 <0x000000d0, "V_CMPX_F_U32">;
+defm V_CMPX_LT_U32 : VOPCX_32 <0x000000d1, "V_CMPX_LT_U32">;
+defm V_CMPX_EQ_U32 : VOPCX_32 <0x000000d2, "V_CMPX_EQ_U32">;
+defm V_CMPX_LE_U32 : VOPCX_32 <0x000000d3, "V_CMPX_LE_U32">;
+defm V_CMPX_GT_U32 : VOPCX_32 <0x000000d4, "V_CMPX_GT_U32">;
+defm V_CMPX_NE_U32 : VOPCX_32 <0x000000d5, "V_CMPX_NE_U32">;
+defm V_CMPX_GE_U32 : VOPCX_32 <0x000000d6, "V_CMPX_GE_U32">;
+defm V_CMPX_T_U32 : VOPCX_32 <0x000000d7, "V_CMPX_T_U32">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
 defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>;
@@ -688,30 +688,30 @@ defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>;
 defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>;
 defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">;
-defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">;
-defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">;
-defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">;
-defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">;
-defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">;
-defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">;
-defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">;
+defm V_CMPX_F_U64 : VOPCX_64 <0x000000f0, "V_CMPX_F_U64">;
+defm V_CMPX_LT_U64 : VOPCX_64 <0x000000f1, "V_CMPX_LT_U64">;
+defm V_CMPX_EQ_U64 : VOPCX_64 <0x000000f2, "V_CMPX_EQ_U64">;
+defm V_CMPX_LE_U64 : VOPCX_64 <0x000000f3, "V_CMPX_LE_U64">;
+defm V_CMPX_GT_U64 : VOPCX_64 <0x000000f4, "V_CMPX_GT_U64">;
+defm V_CMPX_NE_U64 : VOPCX_64 <0x000000f5, "V_CMPX_NE_U64">;
+defm V_CMPX_GE_U64 : VOPCX_64 <0x000000f6, "V_CMPX_GE_U64">;
+defm V_CMPX_T_U64 : VOPCX_64 <0x000000f7, "V_CMPX_T_U64">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+let hasSideEffects = 1 in {
+defm V_CMPX_CLASS_F32 : VOPCX_32 <0x00000098, "V_CMPX_CLASS_F32">;
+} // End hasSideEffects = 1
 
 defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+let hasSideEffects = 1 in {
+defm V_CMPX_CLASS_F64 : VOPCX_64 <0x000000b8, "V_CMPX_CLASS_F64">;
+} // End hasSideEffects = 1
 
 } // End isCompare = 1
 

From f7aa5f93797df1d5d85fb74771d19e9b0f61f79b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 18 Jun 2014 17:05:22 +0000
Subject: [PATCH 053/157] R600/SI: Temporary fix for f64 fneg

This should be a source modifier, but this unblocks
most of my math patches.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211181 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/SIISelLowering.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 846aeb63093a..f9b466709afb 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -212,6 +212,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
     setOperationAction(ISD::FRINT, MVT::f64, Legal);
   }
 
+  // FIXME: This should be removed and handled the same was as f32 fneg. Source
+  // modifiers also work for the double instructions.
+  setOperationAction(ISD::FNEG, MVT::f64, Expand);
+
   setTargetDAGCombine(ISD::SELECT_CC);
   setTargetDAGCombine(ISD::SETCC);
 

From a5395c03f0b94dd0e936da8a9fdf7c579867c1bf Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 18 Jun 2014 17:05:26 +0000
Subject: [PATCH 054/157] R600: Custom lower f64 frint for pre-CI

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211182 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 26 ++++++++++++++++++++++++++
 lib/Target/R600/AMDGPUISelLowering.h   |  1 +
 test/CodeGen/R600/llvm.rint.f64.ll     | 20 ++++++++++++++------
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 4d95723a71a1..e09db6429b23 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -218,6 +218,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
 
   setOperationAction(ISD::BR_CC, MVT::i1, Expand);
 
+  if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+    setOperationAction(ISD::FRINT, MVT::f64, Custom);
+  }
+
   if (!Subtarget->hasBFI()) {
     // fcopysign can be done in a single instruction with BFI.
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
@@ -490,6 +494,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::SDIV: return LowerSDIV(Op, DAG);
   case ISD::SREM: return LowerSREM(Op, DAG);
   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+  case ISD::FRINT: return LowerFRINT(Op, DAG);
   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
 
   // AMDIL DAG lowering.
@@ -1566,6 +1571,27 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
   return DAG.getMergeValues(Ops, DL);
 }
 
+SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  assert(Op.getValueType() == MVT::f64);
+
+  SDValue C1 = DAG.getConstantFP(0x1.0p+52, MVT::f64);
+  SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
+
+  SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
+  SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
+
+  SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
+  SDValue C2 = DAG.getConstantFP(0x1.fffffffffffffp+51, MVT::f64);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+  SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
+
+  return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
+}
+
 SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
                                                SelectionDAG &DAG) const {
   SDValue S0 = Op.getOperand(0);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index a4baaf12588f..9b54022e1566 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -51,6 +51,7 @@ class AMDGPUTargetLowering : public TargetLowering {
   SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue ExpandSIGN_EXTEND_INREG(SDValue Op,
diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll
index a7a909addafb..3e2884b7ce02 100644
--- a/test/CodeGen/R600/llvm.rint.f64.ll
+++ b/test/CodeGen/R600/llvm.rint.f64.ll
@@ -1,30 +1,38 @@
 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @f64
+; FUNC-LABEL: @rint_f64
 ; CI: V_RNDNE_F64_e32
-define void @f64(double addrspace(1)* %out, double %in) {
+
+; SI-DAG: V_ADD_F64
+; SI-DAG: V_ADD_F64
+; SI-DAG V_CMP_GT_F64_e64
+; SI: V_CNDMASK_B32
+; SI: V_CNDMASK_B32
+; SI: S_ENDPGM
+define void @rint_f64(double addrspace(1)* %out, double %in) {
 entry:
   %0 = call double @llvm.rint.f64(double %in)
   store double %0, double addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @v2f64
+; FUNC-LABEL: @rint_v2f64
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
-define void @v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+define void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
 entry:
   %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
   store <2 x double> %0, <2 x double> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @v4f64
+; FUNC-LABEL: @rint_v4f64
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
-define void @v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+define void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
 entry:
   %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
   store <4 x double> %0, <4 x double> addrspace(1)* %out

From debd8312233176171abdd6426ea49cb73ae7b1fd Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 18 Jun 2014 17:05:30 +0000
Subject: [PATCH 055/157] R600: Implement f64 ftrunc, ffloor and fceil.

CI has instructions for these, so this fixes them for older hardware.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211183 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 107 +++++++++++++++++++++++++
 lib/Target/R600/AMDGPUISelLowering.h   |   4 +
 test/CodeGen/R600/fceil.ll             |  35 ++++++--
 test/CodeGen/R600/ffloor.ll            |  34 ++++++--
 test/CodeGen/R600/ftrunc.ll            |  40 +++++++--
 5 files changed, 198 insertions(+), 22 deletions(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index e09db6429b23..1e6f38ffc3fd 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -219,7 +219,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::BR_CC, MVT::i1, Expand);
 
   if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+    setOperationAction(ISD::FCEIL, MVT::f64, Custom);
+    setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
     setOperationAction(ISD::FRINT, MVT::f64, Custom);
+    setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
   }
 
   if (!Subtarget->hasBFI()) {
@@ -494,7 +497,10 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::SDIV: return LowerSDIV(Op, DAG);
   case ISD::SREM: return LowerSREM(Op, DAG);
   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+  case ISD::FCEIL: return LowerFCEIL(Op, DAG);
+  case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
   case ISD::FRINT: return LowerFRINT(Op, DAG);
+  case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
 
   // AMDIL DAG lowering.
@@ -1571,6 +1577,84 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
   return DAG.getMergeValues(Ops, DL);
 }
 
+SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  // result = trunc(src)
+  // if (src > 0.0 && src != result)
+  //   result += 1.0
+
+  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+  const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
+  const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
+  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
+SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  assert(Op.getValueType() == MVT::f64);
+
+  const SDValue Zero = DAG.getConstant(0, MVT::i32);
+  const SDValue One = DAG.getConstant(1, MVT::i32);
+
+  SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+  // Extract the upper half, since this is where we will find the sign and
+  // exponent.
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
+
+  const unsigned FractBits = 52;
+  const unsigned ExpBits = 11;
+
+  // Extract the exponent.
+  SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_I32, SL, MVT::i32,
+                                Hi,
+                                DAG.getConstant(FractBits - 32, MVT::i32),
+                                DAG.getConstant(ExpBits, MVT::i32));
+  SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
+                            DAG.getConstant(1023, MVT::i32));
+
+  // Extract the sign bit.
+  const SDValue SignBitMask = DAG.getConstant(1ul << 31, MVT::i32);
+  SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
+
+  // Extend back to to 64-bits.
+  SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+                                  Zero, SignBit);
+  SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
+
+  SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
+  const SDValue FractMask = DAG.getConstant((1L << FractBits) - 1, MVT::i64);
+
+  SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
+  SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
+  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+
+  const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32);
+
+  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
+  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
+
+  SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
+  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
+
+  return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
+}
+
 SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
   SDLoc SL(Op);
   SDValue Src = Op.getOperand(0);
@@ -1592,6 +1676,29 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
 }
 
+SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  // result = trunc(src);
+  // if (src < 0.0 && src != result)
+  //   result += -1.0.
+
+  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+  const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
+  const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
+  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
 SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
                                                SelectionDAG &DAG) const {
   SDValue S0 = Op.getOperand(0);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 9b54022e1566..34e36d886b97 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -51,7 +51,11 @@ class AMDGPUTargetLowering : public TargetLowering {
   SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue ExpandSIGN_EXTEND_INREG(SDValue Op,
diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll
index b8b945f46ffe..b42aefa17328 100644
--- a/test/CodeGen/R600/fceil.ll
+++ b/test/CodeGen/R600/fceil.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.ceil.f64(double) nounwind readnone
 declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
@@ -7,15 +8,33 @@ declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
 declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
 declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
 
-; CI-LABEL: @fceil_f64:
-; CI: V_CEIL_F64_e32
+; FUNC-LABEL: @fceil_f64:
+; CI: V_CEIL_F64_e32
+; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: S_LSHR_B64
+; SI: S_NOT_B64
+; SI: S_AND_B64
+; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: CMP_LT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_F64
+; SI: CNDMASK_B32
+; SI: CMP_NE_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: V_ADD_F64
 define void @fceil_f64(double addrspace(1)* %out, double %x) {
   %y = call double @llvm.ceil.f64(double %x) nounwind readnone
   store double %y, double addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @fceil_v2f64:
+; FUNC-LABEL: @fceil_v2f64:
 ; CI: V_CEIL_F64_e32
 ; CI: V_CEIL_F64_e32
 define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
@@ -24,7 +43,7 @@ define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
   ret void
 }
 
-; FIXME-CI-LABEL: @fceil_v3f64:
+; FIXME-FUNC-LABEL: @fceil_v3f64:
 ; FIXME-CI: V_CEIL_F64_e32
 ; FIXME-CI: V_CEIL_F64_e32
 ; FIXME-CI: V_CEIL_F64_e32
@@ -34,7 +53,7 @@ define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 ;   ret void
 ; }
 
-; CI-LABEL: @fceil_v4f64:
+; FUNC-LABEL: @fceil_v4f64:
 ; CI: V_CEIL_F64_e32
 ; CI: V_CEIL_F64_e32
 ; CI: V_CEIL_F64_e32
@@ -45,7 +64,7 @@ define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @fceil_v8f64:
+; FUNC-LABEL: @fceil_v8f64:
 ; CI: V_CEIL_F64_e32
 ; CI: V_CEIL_F64_e32
 ; CI: V_CEIL_F64_e32
@@ -60,7 +79,7 @@ define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @fceil_v16f64:
+; FUNC-LABEL: @fceil_v16f64:
 ; CI: V_CEIL_F64_e32
 ; CI: V_CEIL_F64_e32
 ; CI: V_CEIL_F64_e32
diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll
index 51d2b8961504..31c6116988e6 100644
--- a/test/CodeGen/R600/ffloor.ll
+++ b/test/CodeGen/R600/ffloor.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.floor.f64(double) nounwind readnone
 declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
@@ -7,15 +8,34 @@ declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
 declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
 declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
 
-; CI-LABEL: @ffloor_f64:
+; FUNC-LABEL: @ffloor_f64:
 ; CI: V_FLOOR_F64_e32
+
+; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: S_LSHR_B64
+; SI: S_NOT_B64
+; SI: S_AND_B64
+; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: CMP_LT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_LT_F64
+; SI: CNDMASK_B32
+; SI: CMP_NE_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: V_ADD_F64
 define void @ffloor_f64(double addrspace(1)* %out, double %x) {
   %y = call double @llvm.floor.f64(double %x) nounwind readnone
   store double %y, double addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @ffloor_v2f64:
+; FUNC-LABEL: @ffloor_v2f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
@@ -24,7 +44,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
   ret void
 }
 
-; FIXME-CI-LABEL: @ffloor_v3f64:
+; FIXME-FUNC-LABEL: @ffloor_v3f64:
 ; FIXME-CI: V_FLOOR_F64_e32
 ; FIXME-CI: V_FLOOR_F64_e32
 ; FIXME-CI: V_FLOOR_F64_e32
@@ -34,7 +54,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 ;   ret void
 ; }
 
-; CI-LABEL: @ffloor_v4f64:
+; FUNC-LABEL: @ffloor_v4f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
@@ -45,7 +65,7 @@ define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @ffloor_v8f64:
+; FUNC-LABEL: @ffloor_v8f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
@@ -60,7 +80,7 @@ define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @ffloor_v16f64:
+; FUNC-LABEL: @ffloor_v16f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll
index 6b235ffbd980..3cd1deb921fc 100644
--- a/test/CodeGen/R600/ftrunc.ll
+++ b/test/CodeGen/R600/ftrunc.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.trunc.f64(double) nounwind readnone
 declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
@@ -7,15 +8,40 @@ declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
 declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
 declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
 
-; CI-LABEL: @ftrunc_f64:
+; FUNC-LABEL: @v_ftrunc_f64:
 ; CI: V_TRUNC_F64_e32
+; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
+; SI: S_ENDPGM
+define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+  %x = load double addrspace(1)* %in, align 8
+  %y = call double @llvm.trunc.f64(double %x) nounwind readnone
+  store double %y, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @ftrunc_f64:
+; CI: V_TRUNC_F64_e32
+
+; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: S_LSHR_B64
+; SI: S_NOT_B64
+; SI: S_AND_B64
+; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: CMP_LT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: S_ENDPGM
 define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
   %y = call double @llvm.trunc.f64(double %x) nounwind readnone
   store double %y, double addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @ftrunc_v2f64:
+; FUNC-LABEL: @ftrunc_v2f64:
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
 define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
@@ -24,7 +50,7 @@ define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
   ret void
 }
 
-; FIXME-CI-LABEL: @ftrunc_v3f64:
+; FIXME-FUNC-LABEL: @ftrunc_v3f64:
 ; FIXME-CI: V_TRUNC_F64_e32
 ; FIXME-CI: V_TRUNC_F64_e32
 ; FIXME-CI: V_TRUNC_F64_e32
@@ -34,7 +60,7 @@ define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 ;   ret void
 ; }
 
-; CI-LABEL: @ftrunc_v4f64:
+; FUNC-LABEL: @ftrunc_v4f64:
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
@@ -45,7 +71,7 @@ define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @ftrunc_v8f64:
+; FUNC-LABEL: @ftrunc_v8f64:
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
@@ -60,7 +86,7 @@ define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @ftrunc_v16f64:
+; FUNC-LABEL: @ftrunc_v16f64:
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32

From a841620f66f2e5673a0a0529f655dcb742031e6b Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 18 Jun 2014 17:07:15 +0000
Subject: [PATCH 056/157] Fix a memory leak in the error path.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211184 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Object/ELFObjectFile.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 356288ffc51b..0a3e2cb790d0 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -17,8 +17,8 @@
 namespace llvm {
 using namespace object;
 
-ErrorOr<ObjectFile *> ObjectFile::createELFObjectFile(MemoryBuffer *Obj,
-                                                      bool BufferOwned) {
+static ErrorOr<ObjectFile *> createELFObjectFileAux(MemoryBuffer *Obj,
+                                                    bool BufferOwned) {
   std::pair<unsigned char, unsigned char> Ident = getElfArchType(Obj);
   std::size_t MaxAlignment =
     1ULL << countTrailingZeros(uintptr_t(Obj->getBufferStart()));
@@ -82,4 +82,12 @@ ErrorOr<ObjectFile *> ObjectFile::createELFObjectFile(MemoryBuffer *Obj,
   return R.release();
 }
 
+ErrorOr<ObjectFile *> ObjectFile::createELFObjectFile(MemoryBuffer *Obj,
+                                                      bool BufferOwned) {
+  ErrorOr<ObjectFile *> Ret = createELFObjectFileAux(Obj, BufferOwned);
+  if (BufferOwned && Ret.getError())
+    delete Obj;
+  return Ret;
+}
+
 } // end namespace llvm

From 95f1fa7ec339f1dc27e279a2241420c4db850f5b Mon Sep 17 00:00:00 2001
From: Matheus Almeida <matheus.almeida@imgtec.com>
Date: Wed, 18 Jun 2014 17:10:30 +0000
Subject: [PATCH 057/157] [mips] SYNC $stype instruction was added in Mips32
 but SYNC with an implied operand ($stype = 0) is valid since Mips2.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211185 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/MipsInstrInfo.td    |  8 +++++++-
 test/CodeGen/Mips/atomic.ll         |  4 ++--
 test/MC/Mips/mips1/invalid-mips2.s  |  1 +
 test/MC/Mips/mips1/invalid-mips32.s | 10 ++++++++++
 test/MC/Mips/mips2/invalid-mips32.s |  2 ++
 test/MC/Mips/mips2/valid.s          |  1 +
 test/MC/Mips/mips3/invalid-mips32.s | 10 ++++++++++
 test/MC/Mips/mips3/valid.s          |  1 +
 test/MC/Mips/mips32/valid.s         |  2 ++
 test/MC/Mips/mips32r2/valid.s       |  2 ++
 test/MC/Mips/mips32r6/valid.s       |  2 ++
 test/MC/Mips/mips4/invalid-mips32.s | 10 ++++++++++
 test/MC/Mips/mips4/valid.s          |  1 +
 test/MC/Mips/mips5/invalid-mips32.s | 10 ++++++++++
 test/MC/Mips/mips5/valid.s          |  1 +
 test/MC/Mips/mips64/valid.s         |  2 ++
 test/MC/Mips/mips64r2/valid.s       |  2 ++
 test/MC/Mips/mips64r6/valid.s       |  2 ++
 18 files changed, 68 insertions(+), 3 deletions(-)
 create mode 100644 test/MC/Mips/mips1/invalid-mips32.s
 create mode 100644 test/MC/Mips/mips3/invalid-mips32.s
 create mode 100644 test/MC/Mips/mips4/invalid-mips32.s
 create mode 100644 test/MC/Mips/mips5/invalid-mips32.s

diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 7c13f9a9c8d7..b1b455769477 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1156,7 +1156,7 @@ def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>,
           ISA_MIPS1_NOT_32R6_64R6;
 }
 
-def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM;
+def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
 def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>;
 def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>;
 def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>;
@@ -1488,6 +1488,8 @@ def : MipsInstAlias<"sra $rd, $rt, $rs",
                     (SRAV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
 def : MipsInstAlias<"srl $rd, $rt, $rs",
                     (SRLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"sync",
+                    (SYNC 0), 1>, ISA_MIPS2;
 //===----------------------------------------------------------------------===//
 // Assembler Pseudo Instructions
 //===----------------------------------------------------------------------===//
@@ -1540,6 +1542,10 @@ let AdditionalPredicates = [NotDSP] in {
                 (ADDiu GPR32:$src, imm:$imm)>;
 }
 
+// SYNC
+def : MipsPat<(MipsSync (i32 immz)),
+              (SYNC 0)>, ISA_MIPS2;
+
 // Call
 def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
               (JAL tglobaladdr:$dst)>;
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index 20d46708c550..066d42cc302d 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -344,11 +344,11 @@ entry:
 
 ; ALL-LABEL: CheckSync:
 
-; ALL:           sync 0
+; ALL:           sync
 ; ALL:           ll
 ; ALL:           sc
 ; ALL:           beq
-; ALL:           sync 0
+; ALL:           sync
 }
 
 ; make sure that this assertion in
diff --git a/test/MC/Mips/mips1/invalid-mips2.s b/test/MC/Mips/mips1/invalid-mips2.s
index 6c3e80ac458a..7db261d42c98 100644
--- a/test/MC/Mips/mips1/invalid-mips2.s
+++ b/test/MC/Mips/mips1/invalid-mips2.s
@@ -21,3 +21,4 @@
         tnei      $t4,-29647      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         trunc.w.d $f22,$f15       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         trunc.w.s $f28,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips32.s b/test/MC/Mips/mips1/invalid-mips32.s
new file mode 100644
index 000000000000..4ad8d63eb2c8
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips32.s b/test/MC/Mips/mips2/invalid-mips32.s
index 653d2a13110d..43ea345441c5 100644
--- a/test/MC/Mips/mips2/invalid-mips32.s
+++ b/test/MC/Mips/mips2/invalid-mips32.s
@@ -40,3 +40,5 @@
         msubu     $15,$a1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mtc0      $9,$29,3        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mul       $s0,$s4,$at     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync      0               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync      1               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index d4f48ec8d64d..9c3706ee3ff5 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -122,6 +122,7 @@
         swc3      $10,-32265($k0)
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips3/invalid-mips32.s b/test/MC/Mips/mips3/invalid-mips32.s
new file mode 100644
index 000000000000..3acd7651e629
--- /dev/null
+++ b/test/MC/Mips/mips3/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips3 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index a68dc91420b2..cb209fdb208f 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -179,6 +179,7 @@
         swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index 68db86e91c74..c58cb88b052d 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -147,6 +147,8 @@
         swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index b92facf666e9..e152f6f437c2 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -177,6 +177,8 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index b0f0b5c48229..0766079fda5b 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -147,3 +147,5 @@
         clz     $sp,$gp          # CHECK: clz $sp, $gp           # encoding: [0x03,0x80,0xe8,0x50]
         ssnop                    # WARNING: [[@LINE]]:9: warning: ssnop is deprecated for MIPS32r6 and is equivalent to a nop instruction
         ssnop                    # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sync                     # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync    1                # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
diff --git a/test/MC/Mips/mips4/invalid-mips32.s b/test/MC/Mips/mips4/invalid-mips32.s
new file mode 100644
index 000000000000..52dea02d10c6
--- /dev/null
+++ b/test/MC/Mips/mips4/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips4 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 2086a12602a8..949b91da922c 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -198,6 +198,7 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips5/invalid-mips32.s b/test/MC/Mips/mips5/invalid-mips32.s
new file mode 100644
index 000000000000..2e2c8da462df
--- /dev/null
+++ b/test/MC/Mips/mips5/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips5 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index 3f350b1d368f..3afdee1887c1 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -200,6 +200,7 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index 3f4149a8f5a3..81c22687796e 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -214,6 +214,8 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index df144921a740..3d85c137ab9f 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -241,6 +241,8 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index c16752f7fb5b..9f32562191a4 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -165,3 +165,5 @@
         dclz    $s0,$25          # CHECK: dclz $16, $25          # encoding: [0x03,0x20,0x80,0x52]
         ssnop                    # WARNING: [[@LINE]]:9: warning: ssnop is deprecated for MIPS64r6 and is equivalent to a nop instruction
         ssnop                    # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sync                     # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync    1                # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]

From 795ae8615f20eb5b55e15b8eeca9c909ef78a8f0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 18 Jun 2014 17:13:51 +0000
Subject: [PATCH 058/157] R600/SI: Prettier operand printing for 64-bit ops.

Copy what is done for 32-bit already so the order is about the same.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211186 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/SIISelLowering.cpp | 23 ++++++++++++-----------
 lib/Target/R600/SIInstrInfo.td     |  8 +++++---
 test/CodeGen/R600/fsub64.ll        |  7 +++----
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index f9b466709afb..4e61d5b03aa6 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -485,19 +485,20 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     MI->eraseFromParent();
     break;
   }
-  case AMDGPU::V_SUB_F64:
-    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
-            MI->getOperand(0).getReg())
-            .addReg(MI->getOperand(1).getReg())
-            .addReg(MI->getOperand(2).getReg())
-            .addImm(0)  /* src2 */
-            .addImm(0)  /* ABS */
-            .addImm(0)  /* CLAMP */
-            .addImm(0)  /* OMOD */
-            .addImm(2); /* NEG */
+  case AMDGPU::V_SUB_F64: {
+    unsigned DestReg = MI->getOperand(0).getReg();
+    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
+      .addImm(0)  // SRC0 modifiers
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(1)  // SRC1 modifiers
+      .addReg(MI->getOperand(2).getReg())
+      .addImm(0)  // SRC2 modifiers
+      .addImm(0)  // src2
+      .addImm(0)  // CLAMP
+      .addImm(0); // OMOD
     MI->eraseFromParent();
     break;
-
+  }
   case AMDGPU::SI_RegisterStorePseudo: {
     MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
     unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index c3d4376fbba6..eb9746779374 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -426,9 +426,11 @@ class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
 
 class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
   op, (outs VReg_64:$dst),
-  (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
-   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
-  opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+  (ins InputMods:$src0_modifiers, VSrc_64:$src0,
+       InputMods:$src1_modifiers, VSrc_64:$src1,
+       InputMods:$src2_modifiers, VSrc_64:$src2,
+       InstFlag:$clamp, InstFlag:$omod),
+  opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
 >, VOP <opName>;
 
 //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
index 1445a20839ad..f5e5708f1b41 100644
--- a/test/CodeGen/R600/fsub64.ll
+++ b/test/CodeGen/R600/fsub64.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-
-; CHECK: @fsub_f64
-; CHECK: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}, 0, 0, 0, 0, 2
+; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
+; SI-LABEL: @fsub_f64:
+; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1

From 2b6e6fc1a8c188a9ccbe028b42697d32edaa2a1c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 18 Jun 2014 17:13:57 +0000
Subject: [PATCH 059/157] R600/SI: Add intrinsics for brev instructions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211187 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUISelLowering.cpp |  4 ++++
 lib/Target/R600/AMDGPUISelLowering.h   |  1 +
 lib/Target/R600/AMDGPUInstrInfo.td     |  2 ++
 lib/Target/R600/AMDGPUIntrinsics.td    |  1 +
 lib/Target/R600/SIInstrInfo.cpp        |  1 +
 lib/Target/R600/SIInstructions.td      |  4 +++-
 test/CodeGen/R600/llvm.AMDGPU.brev.ll  | 27 ++++++++++++++++++++++++++
 7 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/R600/llvm.AMDGPU.brev.ll

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 1e6f38ffc3fd..ac5d79087818 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -900,6 +900,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                          Op.getOperand(1),
                          Op.getOperand(2));
 
+    case AMDGPUIntrinsic::AMDGPU_brev:
+      return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
+
     case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
       return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
 
@@ -2026,6 +2029,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(BFE_I32)
   NODE_NAME_CASE(BFI)
   NODE_NAME_CASE(BFM)
+  NODE_NAME_CASE(BREV)
   NODE_NAME_CASE(MUL_U24)
   NODE_NAME_CASE(MUL_I24)
   NODE_NAME_CASE(MAD_U24)
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 34e36d886b97..5be3070f589b 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -188,6 +188,7 @@ enum {
   BFE_I32, // Extract range of bits with sign extension to 32-bits.
   BFI, // (src0 & src1) | (~src0 & src2)
   BFM, // Insert a range of bits into a 32-bit word.
+  BREV, // Reverse bits.
   MUL_U24,
   MUL_I24,
   MAD_U24,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 5cb2559dbb83..942a9e8ff351 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -105,6 +105,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
 def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
 def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
 
+def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;
+
 // Signed and unsigned 24-bit mulitply.  The highest 8-bits are ignore when
 // performing the mulitply.  The result is a 32-bit value.
 def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index 4ef23abfe3ed..6dc7612d46fb 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -64,6 +64,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_barrier_local  : Intrinsic<[], [], []>;
   def int_AMDGPU_barrier_global  : Intrinsic<[], [], []>;
 }
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index e06dd1de3ec4..f5b82d53ba7a 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -664,6 +664,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
   case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
   case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
   case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+  case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
   case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
   case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
   case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 61cd2be967dd..428e49c6431c 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -101,7 +101,9 @@ def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64",
 >;
 def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
 def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
-def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32",
+  [(set i32:$dst, (AMDGPUbrev i32:$src0))]
+>;
 def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
 } // End neverHasSideEffects = 1
 
diff --git a/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
new file mode 100644
index 000000000000..68a5ad0649c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone
+
+; FUNC-LABEL: @s_brev_i32:
+; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
+; SI: S_BREV_B32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_brev_i32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_BFREV_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+  %val = load i32 addrspace(1)* %valptr, align 4
+  %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}

From 5d5ddf9663fdfbc47ac0285c449f0abd92b3f5ad Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Wed, 18 Jun 2014 17:20:49 +0000
Subject: [PATCH 060/157] Add a triple so that right syntax is choosen on mac
 osx systems

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211188 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/AArch64/arm64-convert-v4f64.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
index 6bfb0a54ea3a..7123e5e0b235 100644
--- a/test/CodeGen/AArch64/arm64-convert-v4f64.ll
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -march=arm64 | FileCheck %s
 
 
 define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {

From 496e7ea119421a2779d8e144266b6ccba2354cd7 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Wed, 18 Jun 2014 17:28:56 +0000
Subject: [PATCH 061/157] [PowerPC] Add back test case for absolute calls
 (removed in r211174)

As requested by Hal Finkel, this adds back a test for calls to
a known-constant function pointer value, and verifies that the
64-bit SVR4 indirect function call sequence is used.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211190 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/PowerPC/ppc64-calls.ll | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll
index 396a5e25caf7..54b6b6fe0b39 100644
--- a/test/CodeGen/PowerPC/ppc64-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -42,6 +42,22 @@ define void @test_indirect(void ()* nocapture %fp) nounwind {
   ret void
 }
 
+; Absolute values must use the regular indirect call sequence
+; The main purpose of this test is to ensure that BLA is not
+; used on 64-bit SVR4 (as e.g. on Darwin).
+define void @test_abs() nounwind {
+; CHECK-LABEL: test_abs:
+  tail call void inttoptr (i64 1024 to void ()*)() nounwind
+; CHECK: ld [[FP:[0-9]+]], 1024(0)
+; CHECK: ld 11, 1040(0)
+; CHECK: mtctr [[FP]]
+; CHECK: li [[FD:[0-9]+]], 1024
+; CHECK: ld 2, 8([[FD]])
+; CHECK: bctrl
+; CHECK-NEXT: ld 2, 40(1)
+  ret void
+}
+
 declare double @sin(double) nounwind
 
 ; External functions call should also have a 'nop'

From 4380c61415d94ac89131c09971c0ede71d5cd317 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 18 Jun 2014 17:45:58 +0000
Subject: [PATCH 062/157] Work around ridiculous warning.

Apparently C++ doesn't really have hex floating point constants.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211192 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index ac5d79087818..e9720ef8af19 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -1664,14 +1664,17 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
 
   assert(Op.getValueType() == MVT::f64);
 
-  SDValue C1 = DAG.getConstantFP(0x1.0p+52, MVT::f64);
+  APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
+  SDValue C1 = DAG.getConstantFP(C1Val, MVT::f64);
   SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
 
   SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
   SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
 
   SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
-  SDValue C2 = DAG.getConstantFP(0x1.fffffffffffffp+51, MVT::f64);
+
+  APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
+  SDValue C2 = DAG.getConstantFP(C2Val, MVT::f64);
 
   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
   SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);

From 0c57babfc6fde8f6b5b5e1ca810b85fa2afd734a Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Wed, 18 Jun 2014 17:52:49 +0000
Subject: [PATCH 063/157] [PowerPC] Simplify and improve loading into TOC
 register

During an indirect function call sequence on the 64-bit SVR4 ABI,
generate code must load and then restore the TOC register.

This does not use a regular LOAD instruction since the TOC
register r2 is marked as reserved.  Instead, the are two
special instruction patterns:

 let RST = 2, DS = 2 in
 def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
                     "ld 2, 8($reg)", IIC_LdStLD,
                     [(PPCload_toc i64:$reg)]>, isPPC64;

 let RST = 2, DS = 10, RA = 1 in
 def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
                     "ld 2, 40(1)", IIC_LdStLD,
                     [(PPCtoc_restore)]>, isPPC64;

Note that these not only restrict the destination of the
load to r2, but they also restrict the *source* of the
load to particular address combinations.  The latter is
a problem when we want to support the ELFv2 ABI, since
there the TOC save slot is no longer at 40(1).

This patch replaces those two instructions with a single
instruction pattern that only hard-codes r2 as destination,
but supports generic addresses as source.  This will allow
supporting the ELFv2 ABI, and also helps generate more
efficient code for calls to absolute addresses (allowing
simplification of the ppc64-calls.ll test case).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211193 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/PowerPC/PPCFrameLowering.h  |  6 ++++++
 lib/Target/PowerPC/PPCISelLowering.cpp | 15 +++++++++++----
 lib/Target/PowerPC/PPCISelLowering.h   | 11 +++--------
 lib/Target/PowerPC/PPCInstr64Bit.td    | 16 +++++-----------
 lib/Target/PowerPC/PPCInstrFormats.td  | 14 --------------
 lib/Target/PowerPC/PPCInstrInfo.td     |  3 ---
 test/CodeGen/PowerPC/ppc64-calls.ll    |  7 +++----
 7 files changed, 28 insertions(+), 44 deletions(-)

diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 1280dec86439..ca1ca56d08fa 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -74,6 +74,12 @@ class PPCFrameLowering: public TargetFrameLowering {
     return isPPC64 ? 16 : 4;
   }
 
+  /// getTOCSaveOffset - Return the previous frame offset to save the
+  /// TOC register -- 64-bit SVR4 ABI only.
+  static unsigned getTOCSaveOffset(void) {
+    return 40;
+  }
+
   /// getFramePointerSaveOffset - Return the previous frame offset to save the
   /// frame pointer.
   static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index e815d615acb7..201d1241996b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -773,7 +773,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::Hi:              return "PPCISD::Hi";
   case PPCISD::Lo:              return "PPCISD::Lo";
   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
-  case PPCISD::TOC_RESTORE:     return "PPCISD::TOC_RESTORE";
   case PPCISD::LOAD:            return "PPCISD::LOAD";
   case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
@@ -3544,8 +3543,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
       // additional register being allocated and an unnecessary move instruction
       // being generated.
       VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+      SDValue TOCOff = DAG.getIntPtrConstant(8);
+      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
       SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
-                                       Callee, InFlag);
+                                       AddTOC, InFlag);
       Chain = LoadTOCPtr.getValue(0);
       InFlag = LoadTOCPtr.getValue(1);
 
@@ -3729,7 +3730,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
 
   if (needsTOCRestore) {
     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
-    Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
+    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+    SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
+    SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+    Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
     InFlag = Chain.getValue(1);
   }
 
@@ -4388,7 +4394,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     // Load r2 into a virtual register and store it to the TOC save area.
     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
     // TOC save area offset.
-    SDValue PtrOff = DAG.getIntPtrConstant(40);
+    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
                          false, false, 0);
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 9b85c99726e1..2b69208fea7e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -70,19 +70,14 @@ namespace llvm {
 
       TOC_ENTRY,
 
-      /// The following three target-specific nodes are used for calls through
+      /// The following two target-specific nodes are used for calls through
       /// function pointers in the 64-bit SVR4 ABI.
 
-      /// Restore the TOC from the TOC save area of the current stack frame.
-      /// This is basically a hard coded load instruction which additionally
-      /// takes/produces a flag.
-      TOC_RESTORE,
-
       /// Like a regular LOAD but additionally taking/producing a flag.
       LOAD,
 
-      /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
-      /// a hard coded load instruction.
+      /// Like LOAD (taking/producing a flag), but using r2 as hard-coded
+      /// destination.
       LOAD_TOC,
 
       /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index b71c09ea8e12..9318f70f4305 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -802,17 +802,11 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   [(set i64:$rD,
                      (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
 
-let hasSideEffects = 1, isCodeGenOnly = 1 in {
-let RST = 2, DS = 2 in
-def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
-                    "ld 2, 8($reg)", IIC_LdStLD,
-                    [(PPCload_toc i64:$reg)]>, isPPC64;
-                    
-let RST = 2, DS = 10, RA = 1 in
-def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
-                    "ld 2, 40(1)", IIC_LdStLD,
-                    [(PPCtoc_restore)]>, isPPC64;
-}
+let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2 in
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
+                    "ld 2, $src", IIC_LdStLD,
+                    [(PPCload_toc ixaddr:$src)]>, isPPC64;
+
 def LDX  : XForm_1<31,  21, (outs g8rc:$rD), (ins memrr:$src),
                    "ldx $rD, $src", IIC_LdStLD,
                    [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 7fed2c65da73..1e4396cd1017 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -360,20 +360,6 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
   let Inst{30-31} = xo;
 }
 
-class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
-                InstrItinClass itin, list<dag> pattern>
-         : I<opcode, OOL, IOL, asmstr, itin> {
-   bits<5>  RST;
-   bits<14> DS;
-   bits<5>  RA;
- 
-   let Pattern = pattern;
-   
-   let Inst{6-10}  = RST;
-   let Inst{11-15} = RA;
-   let Inst{16-29} = DS;
-   let Inst{30-31} = xo;
-}
 
 // 1.7.6 X-Form
 class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index e421f8e69e65..c2e3382b3e79 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -141,9 +141,6 @@ def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
 def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
                           [SDNPHasChain, SDNPSideEffect,
                            SDNPInGlue, SDNPOutGlue]>;
-def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
-                            [SDNPHasChain, SDNPSideEffect,
-                             SDNPInGlue, SDNPOutGlue]>;
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll
index 54b6b6fe0b39..31794be25beb 100644
--- a/test/CodeGen/PowerPC/ppc64-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -50,10 +50,9 @@ define void @test_abs() nounwind {
   tail call void inttoptr (i64 1024 to void ()*)() nounwind
 ; CHECK: ld [[FP:[0-9]+]], 1024(0)
 ; CHECK: ld 11, 1040(0)
-; CHECK: mtctr [[FP]]
-; CHECK: li [[FD:[0-9]+]], 1024
-; CHECK: ld 2, 8([[FD]])
-; CHECK: bctrl
+; CHECK: ld 2, 1032(0)
+; CHECK-NEXT: mtctr [[FP]]
+; CHECK-NEXT: bctrl
 ; CHECK-NEXT: ld 2, 40(1)
   ret void
 }

From 52b6c2d6efccb774da0244b65a4d9380b0a107eb Mon Sep 17 00:00:00 2001
From: Jan Vesely <jan.vesely@rutgers.edu>
Date: Wed, 18 Jun 2014 17:57:29 +0000
Subject: [PATCH 064/157] R600: Expand vector fceil

Move fp64 fceil tests to fceil64.ll

v2: rebase

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211194 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUISelLowering.cpp |   1 +
 test/CodeGen/R600/fceil.ll             | 198 ++++++++++++++-----------
 test/CodeGen/R600/fceil64.ll           | 103 +++++++++++++
 3 files changed, 217 insertions(+), 85 deletions(-)
 create mode 100644 test/CodeGen/R600/fceil64.ll

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index e9720ef8af19..dc39bee6511e 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -317,6 +317,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   for (MVT VT : FloatVectorTypes) {
     setOperationAction(ISD::FABS, VT, Expand);
     setOperationAction(ISD::FADD, VT, Expand);
+    setOperationAction(ISD::FCEIL, VT, Expand);
     setOperationAction(ISD::FCOS, VT, Expand);
     setOperationAction(ISD::FDIV, VT, Expand);
     setOperationAction(ISD::FPOW, VT, Expand);
diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll
index b42aefa17328..458363adc1e3 100644
--- a/test/CodeGen/R600/fceil.ll
+++ b/test/CodeGen/R600/fceil.ll
@@ -1,103 +1,131 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-declare double @llvm.ceil.f64(double) nounwind readnone
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
-declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
-declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
-declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
-declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
+declare float @llvm.ceil.f32(float) nounwind readnone
+declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
+declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
+declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
+declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
 
-; FUNC-LABEL: @fceil_f64:
-; CI: V_CEIL_F64_e32
-; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: S_LSHR_B64
-; SI: S_NOT_B64
-; SI: S_AND_B64
-; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI: CMP_LT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_F64
-; SI: CNDMASK_B32
-; SI: CMP_NE_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: V_ADD_F64
-define void @fceil_f64(double addrspace(1)* %out, double %x) {
-  %y = call double @llvm.ceil.f64(double %x) nounwind readnone
-  store double %y, double addrspace(1)* %out
+; FUNC-LABEL: @fceil_f32:
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+define void @fceil_f32(float addrspace(1)* %out, float %x) {
+  %y = call float @llvm.ceil.f32(float %x) nounwind readnone
+  store float %y, float addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @fceil_v2f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
-  %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
-  store <2 x double> %y, <2 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v2f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
+  %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone
+  store <2 x float> %y, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; FIXME-FUNC-LABEL: @fceil_v3f64:
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
-; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
-;   %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
-;   store <3 x double> %y, <3 x double> addrspace(1)* %out
-;   ret void
-; }
+; FUNC-LABEL: @fceil_v3f32:
+; FIXME-SI: V_CEIL_F32_e32
+; FIXME-SI: V_CEIL_F32_e32
+; FIXME-SI: V_CEIL_F32_e32
+; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
+  %y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone
+  store <3 x float> %y, <3 x float> addrspace(1)* %out
+  ret void
+}
 
-; FUNC-LABEL: @fceil_v4f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
-  %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
-  store <4 x double> %y, <4 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v4f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
+  %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
+  store <4 x float> %y, <4 x float> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @fceil_v8f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
-  %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
-  store <8 x double> %y, <8 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v8f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
+  %y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone
+  store <8 x float> %y, <8 x float> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @fceil_v16f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
-  %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
-  store <16 x double> %y, <16 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v16f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT4:T[0-9]+]]{{\.[XYZW]}}
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+define void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
+  %y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone
+  store <16 x float> %y, <16 x float> addrspace(1)* %out
   ret void
 }
diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll
new file mode 100644
index 000000000000..b42aefa17328
--- /dev/null
+++ b/test/CodeGen/R600/fceil64.ll
@@ -0,0 +1,103 @@
+; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.ceil.f64(double) nounwind readnone
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
+declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
+declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
+declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
+
+; FUNC-LABEL: @fceil_f64:
+; CI: V_CEIL_F64_e32
+; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: S_LSHR_B64
+; SI: S_NOT_B64
+; SI: S_AND_B64
+; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: CMP_LT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_F64
+; SI: CNDMASK_B32
+; SI: CMP_NE_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: V_ADD_F64
+define void @fceil_f64(double addrspace(1)* %out, double %x) {
+  %y = call double @llvm.ceil.f64(double %x) nounwind readnone
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fceil_v2f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
+  %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
+  store <2 x double> %y, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+; FIXME-FUNC-LABEL: @fceil_v3f64:
+; FIXME-CI: V_CEIL_F64_e32
+; FIXME-CI: V_CEIL_F64_e32
+; FIXME-CI: V_CEIL_F64_e32
+; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
+;   %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
+;   store <3 x double> %y, <3 x double> addrspace(1)* %out
+;   ret void
+; }
+
+; FUNC-LABEL: @fceil_v4f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
+  %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
+  store <4 x double> %y, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fceil_v8f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
+  %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
+  store <8 x double> %y, <8 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fceil_v16f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
+  %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
+  store <16 x double> %y, <16 x double> addrspace(1)* %out
+  ret void
+}

From 7ec69f6e10840562dca6b557e28f1b6e784a3328 Mon Sep 17 00:00:00 2001
From: Ed Maste <emaste@freebsd.org>
Date: Wed, 18 Jun 2014 18:08:55 +0000
Subject: [PATCH 065/157] ADT: correct typo in comment

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211196 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/ADT/APSInt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h
index 053defff5234..ee34e9b53088 100644
--- a/include/llvm/ADT/APSInt.h
+++ b/include/llvm/ADT/APSInt.h
@@ -56,7 +56,7 @@ class APSInt : public APInt {
     APInt::toString(Str, Radix, isSigned());
   }
   /// toString - Converts an APInt to a std::string.  This is an inefficient
-  /// method, your should prefer passing in a SmallString instead.
+  /// method; you should prefer passing in a SmallString instead.
   std::string toString(unsigned Radix) const {
     return APInt::toString(Radix, isSigned());
   }

From de04c485787ae207b035a6850e6449495ff8e65d Mon Sep 17 00:00:00 2001
From: Weiming Zhao <weimingz@codeaurora.org>
Date: Wed, 18 Jun 2014 18:17:25 +0000
Subject: [PATCH 066/157] [ARM] [MC] Refactor the constant pool classes

ARMTargetStreamer implements ConstantPool and AssmeblerConstantPools
to keep track of assembler-generated constant pools that are used for
ldr-pseudo.

When implementing ldr-pseudo for AArch64, these two classes can be reused.
So this patch factors them out from ARM target to the general MC lib.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211198 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/MC/ConstantPools.h               |  80 ++++++++++
 lib/MC/CMakeLists.txt                         |   1 +
 lib/MC/ConstantPools.cpp                      |  95 ++++++++++++
 .../ARM/MCTargetDesc/ARMTargetStreamer.cpp    | 137 +-----------------
 4 files changed, 177 insertions(+), 136 deletions(-)
 create mode 100644 include/llvm/MC/ConstantPools.h
 create mode 100644 lib/MC/ConstantPools.cpp

diff --git a/include/llvm/MC/ConstantPools.h b/include/llvm/MC/ConstantPools.h
new file mode 100644
index 000000000000..2819b757b8bf
--- /dev/null
+++ b/include/llvm/MC/ConstantPools.h
@@ -0,0 +1,80 @@
+//===- ConstantPool.h - Keep track of assembler-generated  ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ConstantPool and AssemblerConstantPools classes.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_MC_CONSTANTPOOL_H
+#define LLVM_MC_CONSTANTPOOL_H
+
+#include "llvm/ADT/SmallVector.h"
+namespace llvm {
+class MCContext;
+class MCExpr;
+class MCSection;
+class MCStreamer;
+class MCSymbol;
+// A class to keep track of assembler-generated constant pools that are use to
+// implement the ldr-pseudo.
+class ConstantPool {
+  typedef SmallVector<std::pair<MCSymbol *, const MCExpr *>, 4> EntryVecTy;
+  EntryVecTy Entries;
+
+public:
+  // Initialize a new empty constant pool
+  ConstantPool() {}
+
+  // Add a new entry to the constant pool in the next slot.
+  // \param Value is the new entry to put in the constant pool.
+  //
+  // \returns a MCExpr that references the newly inserted value
+  const MCExpr *addEntry(const MCExpr *Value, MCContext &Context);
+
+  // Emit the contents of the constant pool using the provided streamer.
+  void emitEntries(MCStreamer &Streamer);
+
+  // Return true if the constant pool is empty
+  bool empty();
+};
+
+class AssemblerConstantPools {
+  // Map type used to keep track of per-Section constant pools used by the
+  // ldr-pseudo opcode. The map associates a section to its constant pool. The
+  // constant pool is a vector of (label, value) pairs. When the ldr
+  // pseudo is parsed we insert a new (label, value) pair into the constant pool
+  // for the current section and add MCSymbolRefExpr to the new label as
+  // an opcode to the ldr. After we have parsed all the user input we
+  // output the (label, value) pairs in each constant pool at the end of the
+  // section.
+  //
+  // We use the MapVector for the map type to ensure stable iteration of
+  // the sections at the end of the parse. We need to iterate over the
+  // sections in a stable order to ensure that we have print the
+  // constant pools in a deterministic order when printing an assembly
+  // file.
+  typedef MapVector<const MCSection *, ConstantPool> ConstantPoolMapTy;
+  ConstantPoolMapTy ConstantPools;
+
+public:
+  AssemblerConstantPools() {}
+  ~AssemblerConstantPools() {}
+
+  void emitAll(MCStreamer &Streamer);
+  void emitForCurrentSection(MCStreamer &Streamer);
+  const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr);
+
+private:
+  ConstantPool *getConstantPool(const MCSection *Section);
+  ConstantPool &getOrCreateConstantPool(const MCSection *Section);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 6a384c1a8e1c..78bd8c4ba144 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_library(LLVMMC
+  ConstantPools.cpp
   ELFObjectWriter.cpp
   MCAsmBackend.cpp
   MCAsmInfo.cpp
diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp
new file mode 100644
index 000000000000..f979dad47da5
--- /dev/null
+++ b/lib/MC/ConstantPools.cpp
@@ -0,0 +1,95 @@
+//===- ConstantPools.cpp - ConstantPool class --*- C++ -*---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ConstantPool and  AssemblerConstantPools classes.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/ConstantPools.h"
+
+using namespace llvm;
+//
+// ConstantPool implementation
+//
+// Emit the contents of the constant pool using the provided streamer.
+void ConstantPool::emitEntries(MCStreamer &Streamer) {
+  if (Entries.empty())
+    return;
+  Streamer.EmitCodeAlignment(4); // align to 4-byte address
+  Streamer.EmitDataRegion(MCDR_DataRegion);
+  for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end();
+       I != E; ++I) {
+    Streamer.EmitLabel(I->first);
+    Streamer.EmitValue(I->second, 4);
+  }
+  Streamer.EmitDataRegion(MCDR_DataRegionEnd);
+  Entries.clear();
+}
+
+const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) {
+  MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
+
+  Entries.push_back(std::make_pair(CPEntryLabel, Value));
+  return MCSymbolRefExpr::Create(CPEntryLabel, Context);
+}
+
+bool ConstantPool::empty() { return Entries.empty(); }
+
+//
+// AssemblerConstantPools implementation
+//
+ConstantPool *
+AssemblerConstantPools::getConstantPool(const MCSection *Section) {
+  ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
+  if (CP == ConstantPools.end())
+    return nullptr;
+
+  return &CP->second;
+}
+
+ConstantPool &
+AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) {
+  return ConstantPools[Section];
+}
+
+static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section,
+                             ConstantPool &CP) {
+  if (!CP.empty()) {
+    Streamer.SwitchSection(Section);
+    CP.emitEntries(Streamer);
+  }
+}
+
+void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
+  // Dump contents of assembler constant pools.
+  for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(),
+                                   CPE = ConstantPools.end();
+       CPI != CPE; ++CPI) {
+    const MCSection *Section = CPI->first;
+    ConstantPool &CP = CPI->second;
+
+    emitConstantPool(Streamer, Section, CP);
+  }
+}
+
+void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
+  const MCSection *Section = Streamer.getCurrentSection().first;
+  if (ConstantPool *CP = getConstantPool(Section)) {
+    emitConstantPool(Streamer, Section, *CP);
+  }
+}
+
+const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
+                                               const MCExpr *Expr) {
+  const MCSection *Section = Streamer.getCurrentSection().first;
+  return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext());
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index e3cfb05b379d..0cb795ba3ccc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,147 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/ADT/MapVector.h"
+#include "llvm/MC/ConstantPools.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
 
 using namespace llvm;
-
-namespace {
-// A class to keep track of assembler-generated constant pools that are use to
-// implement the ldr-pseudo.
-class ConstantPool {
-  typedef SmallVector<std::pair<MCSymbol *, const MCExpr *>, 4> EntryVecTy;
-  EntryVecTy Entries;
-
-public:
-  // Initialize a new empty constant pool
-  ConstantPool() {}
-
-  // Add a new entry to the constant pool in the next slot.
-  // \param Value is the new entry to put in the constant pool.
-  //
-  // \returns a MCExpr that references the newly inserted value
-  const MCExpr *addEntry(const MCExpr *Value, MCContext &Context);
-
-  // Emit the contents of the constant pool using the provided streamer.
-  void emitEntries(MCStreamer &Streamer);
-
-  // Return true if the constant pool is empty
-  bool empty();
-};
-}
-
-namespace llvm {
-class AssemblerConstantPools {
-  // Map type used to keep track of per-Section constant pools used by the
-  // ldr-pseudo opcode. The map associates a section to its constant pool. The
-  // constant pool is a vector of (label, value) pairs. When the ldr
-  // pseudo is parsed we insert a new (label, value) pair into the constant pool
-  // for the current section and add MCSymbolRefExpr to the new label as
-  // an opcode to the ldr. After we have parsed all the user input we
-  // output the (label, value) pairs in each constant pool at the end of the
-  // section.
-  //
-  // We use the MapVector for the map type to ensure stable iteration of
-  // the sections at the end of the parse. We need to iterate over the
-  // sections in a stable order to ensure that we have print the
-  // constant pools in a deterministic order when printing an assembly
-  // file.
-  typedef MapVector<const MCSection *, ConstantPool> ConstantPoolMapTy;
-  ConstantPoolMapTy ConstantPools;
-
-public:
-  AssemblerConstantPools() {}
-  ~AssemblerConstantPools() {}
-
-  void emitAll(MCStreamer &Streamer);
-  void emitForCurrentSection(MCStreamer &Streamer);
-  const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr);
-
-private:
-  ConstantPool *getConstantPool(const MCSection *Section);
-  ConstantPool &getOrCreateConstantPool(const MCSection *Section);
-};
-}
-
-//
-// ConstantPool implementation
-//
-// Emit the contents of the constant pool using the provided streamer.
-void ConstantPool::emitEntries(MCStreamer &Streamer) {
-  if (Entries.empty())
-    return;
-  Streamer.EmitCodeAlignment(4); // align to 4-byte address
-  Streamer.EmitDataRegion(MCDR_DataRegion);
-  for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end();
-       I != E; ++I) {
-    Streamer.EmitLabel(I->first);
-    Streamer.EmitValue(I->second, 4);
-  }
-  Streamer.EmitDataRegion(MCDR_DataRegionEnd);
-  Entries.clear();
-}
-
-const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) {
-  MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
-
-  Entries.push_back(std::make_pair(CPEntryLabel, Value));
-  return MCSymbolRefExpr::Create(CPEntryLabel, Context);
-}
-
-bool ConstantPool::empty() { return Entries.empty(); }
-
-//
-// AssemblerConstantPools implementation
-//
-ConstantPool *
-AssemblerConstantPools::getConstantPool(const MCSection *Section) {
-  ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
-  if (CP == ConstantPools.end())
-    return nullptr;
-
-  return &CP->second;
-}
-
-ConstantPool &
-AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) {
-  return ConstantPools[Section];
-}
-
-static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section,
-                             ConstantPool &CP) {
-  if (!CP.empty()) {
-    Streamer.SwitchSection(Section);
-    CP.emitEntries(Streamer);
-  }
-}
-
-void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
-  // Dump contents of assembler constant pools.
-  for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(),
-                                   CPE = ConstantPools.end();
-       CPI != CPE; ++CPI) {
-    const MCSection *Section = CPI->first;
-    ConstantPool &CP = CPI->second;
-
-    emitConstantPool(Streamer, Section, CP);
-  }
-}
-
-void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
-  const MCSection *Section = Streamer.getCurrentSection().first;
-  if (ConstantPool *CP = getConstantPool(Section)) {
-    emitConstantPool(Streamer, Section, *CP);
-  }
-}
-
-const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
-                                               const MCExpr *Expr) {
-  const MCSection *Section = Streamer.getCurrentSection().first;
-  return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext());
-}
-
 //
 // ARMTargetStreamer Implemenation
 //

From 8252de6ddc3fe36bb7d026587c48b9ac073e4097 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Wed, 18 Jun 2014 18:20:44 +0000
Subject: [PATCH 067/157] ProfileData: Fix copy-paste type in
 RawInstrProfReader

These deleted definitions had the wrong types.

Patch by Alex L!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211199 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/ProfileData/InstrProfReader.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h
index c5005a1d11ed..7a5a71dc6a31 100644
--- a/include/llvm/ProfileData/InstrProfReader.h
+++ b/include/llvm/ProfileData/InstrProfReader.h
@@ -168,8 +168,8 @@ class RawInstrProfReader : public InstrProfReader {
   const char *NamesStart;
   const char *ProfileEnd;
 
-  RawInstrProfReader(const TextInstrProfReader &) LLVM_DELETED_FUNCTION;
-  RawInstrProfReader &operator=(const TextInstrProfReader &)
+  RawInstrProfReader(const RawInstrProfReader &) LLVM_DELETED_FUNCTION;
+  RawInstrProfReader &operator=(const RawInstrProfReader &)
     LLVM_DELETED_FUNCTION;
 public:
   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)

From 8dfacbc003942ced9b3d702d59241aad1e7ca42e Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 18 Jun 2014 18:26:53 +0000
Subject: [PATCH 068/157] Run clang-format in a small chunk of code I am about
 to change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211201 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Bitcode/Reader/BitcodeReader.h | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 36849d404541..cb0ce3228263 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -224,20 +224,16 @@ class BitcodeReader : public GVMaterializer {
   }
 
   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
-    : Context(C), TheModule(nullptr), Buffer(buffer), BufferOwned(false),
-      LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false),
-      ValueList(C), MDValueList(C),
-      SeenFirstFunctionBody(false), UseRelativeIDs(false) {
-  }
+      : Context(C), TheModule(nullptr), Buffer(buffer), BufferOwned(false),
+        LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false),
+        ValueList(C), MDValueList(C), SeenFirstFunctionBody(false),
+        UseRelativeIDs(false) {}
   explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
-    : Context(C), TheModule(nullptr), Buffer(nullptr), BufferOwned(false),
-      LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
-      ValueList(C), MDValueList(C),
-      SeenFirstFunctionBody(false), UseRelativeIDs(false) {
-  }
-  ~BitcodeReader() {
-    FreeState();
-  }
+      : Context(C), TheModule(nullptr), Buffer(nullptr), BufferOwned(false),
+        LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
+        ValueList(C), MDValueList(C), SeenFirstFunctionBody(false),
+        UseRelativeIDs(false) {}
+  ~BitcodeReader() { FreeState(); }
 
   void materializeForwardReferencedFunctions();
 

From 75b56dc6fe8c8e17ba37683c158dcb63abcd9056 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 18 Jun 2014 18:30:15 +0000
Subject: [PATCH 069/157] Update to the latest registered ELF e_machine names
 and values.

Patch by John Wolf!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211202 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/ELF.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index 0b3e55b91524..31b34ffa703e 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -124,6 +124,8 @@ enum {
 };
 
 // Machine architectures
+// See current registered ELF machine architectures at:
+//    http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
 enum {
   EM_NONE          = 0, // No machine
   EM_M32           = 1, // AT&T WE 32100
@@ -287,7 +289,26 @@ enum {
   EM_RL78          = 197, // Renesas RL78 family
   EM_VIDEOCORE5    = 198, // Broadcom VideoCore V processor
   EM_78KOR         = 199, // Renesas 78KOR family
-  EM_56800EX       = 200  // Freescale 56800EX Digital Signal Controller (DSC)
+  EM_56800EX       = 200, // Freescale 56800EX Digital Signal Controller (DSC)
+  EM_BA1           = 201, // Beyond BA1 CPU architecture
+  EM_BA2           = 202, // Beyond BA2 CPU architecture
+  EM_XCORE         = 203, // XMOS xCORE processor family
+  EM_MCHP_PIC      = 204, // Microchip 8-bit PIC(r) family
+  EM_INTEL205      = 205, // Reserved by Intel
+  EM_INTEL206      = 206, // Reserved by Intel
+  EM_INTEL207      = 207, // Reserved by Intel
+  EM_INTEL208      = 208, // Reserved by Intel
+  EM_INTEL209      = 209, // Reserved by Intel
+  EM_KM32          = 210, // KM211 KM32 32-bit processor
+  EM_KMX32         = 211, // KM211 KMX32 32-bit processor
+  EM_KMX16         = 212, // KM211 KMX16 16-bit processor
+  EM_KMX8          = 213, // KM211 KMX8 8-bit processor
+  EM_KVARC         = 214, // KM211 KVARC processor
+  EM_CDP           = 215, // Paneve CDP architecture family
+  EM_COGE          = 216, // Cognitive Smart Memory Processor
+  EM_COOL          = 217, // iCelero CoolEngine
+  EM_NORC          = 218, // Nanoradio Optimized RISC
+  EM_CSR_KALIMBA   = 219  // CSR Kalimba architecture family
 };
 
 // Object file classes.

From ffbd906558a6bb927ba81570491472393ee7ebac Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Wed, 18 Jun 2014 18:33:36 +0000
Subject: [PATCH 070/157] [PowerPC] Remove unnecessary load of r12 in indirect
 call

When looking at the 64-bit SVR4 indirect call sequence, I noticed
an unnecessary load of r12.  And indeed the code says:

  // R12 must contain the address of an indirect callee.

But this is not correct; in the 64-bit SVR4 (ELFv1) ABI, there is
no need to load r12 at this point.  It seems this code and comment
is a remnant of code originally shared with the Darwin ABI ...

This patch simply removes the unnecessary load.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211203 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/PowerPC/PPCISelLowering.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 201d1241996b..f6884d5a2716 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4399,10 +4399,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
                          false, false, 0);
-    // R12 must contain the address of an indirect callee.  This does not
-    // mean the MTCTR instruction must use R12; it's easier to model this
-    // as an extra parameter, so do that.
-    RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
   }
 
   // Build a sequence of copy-to-reg nodes chained together with token chain

From ec2d8233296fd6d8f37880cd750db1f7225f6b3a Mon Sep 17 00:00:00 2001
From: Diego Novillo <dnovillo@google.com>
Date: Wed, 18 Jun 2014 18:46:58 +0000
Subject: [PATCH 071/157] Simply test for available locations in optimization
 remarks.

When emitting optimization remarks, we test for the presence of
instruction locations by testing for a valid llvm.dbg.cu annotation.
This is slightly inefficient because we can simply ask whether the
debug location we have is known or not.

Additionally, if my current plan works, I will need to remove the
llvm.dbg.cu annotation from the IL (or prevent it from being generated)
when -Rpass is used without -g.  In those cases, we'll want to generate
line tables but we will want to prevent code generation from emitting
DWARF code for them.

Tested on x86_64.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211204 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/IR/DiagnosticInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index 6eeb16220eac..27270636004f 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -128,7 +128,7 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
 }
 
 bool DiagnosticInfoOptimizationRemarkBase::isLocationAvailable() const {
-  return getFunction().getParent()->getNamedMetadata("llvm.dbg.cu") != nullptr;
+  return getDebugLoc().isUnknown() == false;
 }
 
 void DiagnosticInfoOptimizationRemarkBase::getLocation(StringRef *Filename,

From cc21bbde87de290fa09dc0468808594bab4203c7 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 18 Jun 2014 18:55:41 +0000
Subject: [PATCH 072/157] Remove BitcodeReader::setBufferOwned.

We do have use cases for the bitcode reader owning the buffer or not, but we
always know which one we have when we construct it.

It might be possible to simplify this further, but this is a step in the
right direction.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211205 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Bitcode/ReaderWriter.h  |  3 ++-
 lib/Bitcode/Reader/BitcodeReader.cpp | 18 +++++-------------
 lib/Bitcode/Reader/BitcodeReader.h   | 14 +++++---------
 3 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
index 4c194a638d4a..0d0d6a71ab14 100644
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -30,7 +30,8 @@ namespace llvm {
   /// deserialization of function bodies.  If successful, this takes ownership
   /// of 'buffer. On error, this *does not* take ownership of Buffer.
   ErrorOr<Module *> getLazyBitcodeModule(MemoryBuffer *Buffer,
-                                         LLVMContext &Context);
+                                         LLVMContext &Context,
+                                         bool BufferOwned = true);
 
   /// getStreamedBitcodeModule - Read the header of the specified stream
   /// and prepare for lazy deserialization and streaming of function bodies.
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index c13eba78c2ad..04fc18ec21f0 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -3374,16 +3374,15 @@ const std::error_category &BitcodeReader::BitcodeErrorCategory() {
 /// getLazyBitcodeModule - lazy function-at-a-time loading from a file.
 ///
 ErrorOr<Module *> llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
-                                             LLVMContext &Context) {
+                                             LLVMContext &Context,
+                                             bool BufferOwned) {
   Module *M = new Module(Buffer->getBufferIdentifier(), Context);
-  BitcodeReader *R = new BitcodeReader(Buffer, Context);
+  BitcodeReader *R = new BitcodeReader(Buffer, Context, BufferOwned);
   M->setMaterializer(R);
   if (std::error_code EC = R->ParseBitcodeInto(M)) {
     delete M;  // Also deletes R.
     return EC;
   }
-  // Have the BitcodeReader dtor delete 'Buffer'.
-  R->setBufferOwned(true);
 
   R->materializeForwardReferencedFunctions();
 
@@ -3404,21 +3403,16 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name,
     delete M;  // Also deletes R.
     return nullptr;
   }
-  R->setBufferOwned(false); // no buffer to delete
   return M;
 }
 
 ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBuffer *Buffer,
                                          LLVMContext &Context) {
-  ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(Buffer, Context);
+  ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(Buffer, Context, false);
   if (!ModuleOrErr)
     return ModuleOrErr;
   Module *M = ModuleOrErr.get();
 
-  // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether
-  // there was an error.
-  static_cast<BitcodeReader*>(M->getMaterializer())->setBufferOwned(false);
-
   // Read in the entire module, and destroy the BitcodeReader.
   if (std::error_code EC = M->materializeAllPermanently()) {
     delete M;
@@ -3434,9 +3428,7 @@ ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBuffer *Buffer,
 std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
                                          LLVMContext& Context,
                                          std::string *ErrMsg) {
-  BitcodeReader *R = new BitcodeReader(Buffer, Context);
-  // Don't let the BitcodeReader dtor delete 'Buffer'.
-  R->setBufferOwned(false);
+  BitcodeReader *R = new BitcodeReader(Buffer, Context, /*BufferOwned*/ false);
 
   std::string Triple("");
   if (std::error_code EC = R->ParseTriple(Triple))
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index cb0ce3228263..51a8c6a11130 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -223,11 +223,11 @@ class BitcodeReader : public GVMaterializer {
     return std::error_code(E, BitcodeErrorCategory());
   }
 
-  explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
-      : Context(C), TheModule(nullptr), Buffer(buffer), BufferOwned(false),
-        LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false),
-        ValueList(C), MDValueList(C), SeenFirstFunctionBody(false),
-        UseRelativeIDs(false) {}
+  explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, bool BufferOwned)
+      : Context(C), TheModule(nullptr), Buffer(buffer),
+        BufferOwned(BufferOwned), LazyStreamer(nullptr), NextUnreadBit(0),
+        SeenValueSymbolTable(false), ValueList(C), MDValueList(C),
+        SeenFirstFunctionBody(false), UseRelativeIDs(false) {}
   explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
       : Context(C), TheModule(nullptr), Buffer(nullptr), BufferOwned(false),
         LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
@@ -239,10 +239,6 @@ class BitcodeReader : public GVMaterializer {
 
   void FreeState();
 
-  /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
-  /// when the reader is destroyed.
-  void setBufferOwned(bool Owned) { BufferOwned = Owned; }
-
   bool isMaterializable(const GlobalValue *GV) const override;
   bool isDematerializable(const GlobalValue *GV) const override;
   std::error_code Materialize(GlobalValue *GV) override;

From d681e31b46bdfad5bcc925241472c272f9796aa5 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 18 Jun 2014 19:05:24 +0000
Subject: [PATCH 073/157] Change IRObjectFile to parse the bitcode lazily.

The main point of this class is to provide a cheap object interface to a bitcode
file, so it has to be as lazy as possible.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211207 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Object/IRObjectFile.cpp | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index 48820361d4ff..004d8de065d8 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -23,7 +23,8 @@ using namespace object;
 IRObjectFile::IRObjectFile(MemoryBuffer *Object, std::error_code &EC,
                            LLVMContext &Context, bool BufferOwned)
     : SymbolicFile(Binary::ID_IR, Object, BufferOwned) {
-  ErrorOr<Module*> MOrErr = parseBitcodeFile(Object, Context);
+  ErrorOr<Module *> MOrErr =
+      getLazyBitcodeModule(Object, Context, /*BufferOwned*/ false);
   if ((EC = MOrErr.getError()))
     return;
 
@@ -104,11 +105,21 @@ std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
   return object_error::success;
 }
 
+static bool isDeclaration(const GlobalValue &V) {
+  if (V.hasAvailableExternallyLinkage())
+    return true;
+
+  if (V.isMaterializable())
+    return false;
+
+  return V.isDeclaration();
+}
+
 uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
   const GlobalValue &GV = getGV(Symb);
 
   uint32_t Res = BasicSymbolRef::SF_None;
-  if (GV.isDeclaration() || GV.hasAvailableExternallyLinkage())
+  if (isDeclaration(GV))
     Res |= BasicSymbolRef::SF_Undefined;
   if (GV.hasPrivateLinkage())
     Res |= BasicSymbolRef::SF_FormatSpecific;

From fff95d57a46e446b8224d1f8982e129fe3238070 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 18 Jun 2014 19:08:47 +0000
Subject: [PATCH 074/157] Make getBaseObject static.

Thanks to David Majnemer for noticing.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211208 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/IR/Globals.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index 160ddc9e81a7..5410cc031dad 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -59,7 +59,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
   setDLLStorageClass(Src->getDLLStorageClass());
 }
 
-const GlobalObject *getBaseObject(const Constant &C) {
+static const GlobalObject *getBaseObject(const Constant &C) {
   // FIXME: We should probably return a base + offset pair for non-zero GEPs.
   return dyn_cast<GlobalObject>(C.stripPointerCasts());
 }

From b2791542c2c5df2912848b8bdf06fb1093d4ac12 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 18 Jun 2014 20:07:35 +0000
Subject: [PATCH 075/157] Revert a C API difference that I incorrectly
 introduced.

LLVMGetBitcodeModuleInContext should not take ownership on error. I will
try to localize this odd api requirement, but this should get the bots green.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211213 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Bitcode/Reader/BitcodeReader.cpp | 1 +
 lib/Bitcode/Reader/BitcodeReader.h   | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 04fc18ec21f0..9c398277d42d 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -3380,6 +3380,7 @@ ErrorOr<Module *> llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
   BitcodeReader *R = new BitcodeReader(Buffer, Context, BufferOwned);
   M->setMaterializer(R);
   if (std::error_code EC = R->ParseBitcodeInto(M)) {
+    R->releaseBuffer(); // Never take ownership on error.
     delete M;  // Also deletes R.
     return EC;
   }
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 51a8c6a11130..6aa3e0e5adf7 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -239,6 +239,10 @@ class BitcodeReader : public GVMaterializer {
 
   void FreeState();
 
+  void releaseBuffer() {
+    Buffer = nullptr;
+  }
+
   bool isMaterializable(const GlobalValue *GV) const override;
   bool isDematerializable(const GlobalValue *GV) const override;
   std::error_code Materialize(GlobalValue *GV) override;

From 1f502bd9d7d2c1f98ad93a09ffe435e11a95aedd Mon Sep 17 00:00:00 2001
From: Zachary Turner <zturner@google.com>
Date: Wed, 18 Jun 2014 20:17:35 +0000
Subject: [PATCH 076/157] Replace Execution Engine's mutex with
 std::recursive_mutex.

This change has a bit of a trickle down effect due to the fact that
there are a number of derived implementations of ExecutionEngine,
and that the mutex is not tightly encapsulated so is used by other
classes directly.

Reviewed by: rnk

Differential Revision: http://reviews.llvm.org/D4196

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211214 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../llvm/ExecutionEngine/ExecutionEngine.h    |  7 ++--
 include/llvm/IR/ValueMap.h                    | 12 +++---
 lib/ExecutionEngine/ExecutionEngine.cpp       | 16 ++++----
 lib/ExecutionEngine/JIT/JIT.cpp               | 38 +++++++++----------
 lib/ExecutionEngine/JIT/JITEmitter.cpp        | 22 +++++------
 lib/ExecutionEngine/MCJIT/MCJIT.cpp           | 37 +++++++++---------
 unittests/IR/ValueMapTest.cpp                 |  8 ++--
 7 files changed, 69 insertions(+), 71 deletions(-)

diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index e5dab6191ab6..d349af810650 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -22,10 +22,10 @@
 #include "llvm/IR/ValueMap.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Mutex.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include <map>
+#include <mutex>
 #include <string>
 #include <vector>
 
@@ -42,7 +42,6 @@ class JITEventListener;
 class JITMemoryManager;
 class MachineCodeInfo;
 class Module;
-class MutexGuard;
 class ObjectCache;
 class RTDyldMemoryManager;
 class Triple;
@@ -59,7 +58,7 @@ class ExecutionEngineState {
 public:
   struct AddressMapConfig : public ValueMapConfig<const GlobalValue*> {
     typedef ExecutionEngineState *ExtraData;
-    static sys::Mutex *getMutex(ExecutionEngineState *EES);
+    static std::recursive_mutex *getMutex(ExecutionEngineState *EES);
     static void onDelete(ExecutionEngineState *EES, const GlobalValue *Old);
     static void onRAUW(ExecutionEngineState *, const GlobalValue *,
                        const GlobalValue *);
@@ -164,7 +163,7 @@ class ExecutionEngine {
   /// lock - This lock protects the ExecutionEngine, MCJIT, JIT, JITResolver and
   /// JITEmitter classes.  It must be held while changing the internal state of
   /// any of those classes.
-  sys::Mutex lock;
+  std::recursive_mutex lock;
 
   //===--------------------------------------------------------------------===//
   //  ExecutionEngine Startup
diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h
index f196f334b605..17b0fe01b116 100644
--- a/include/llvm/IR/ValueMap.h
+++ b/include/llvm/IR/ValueMap.h
@@ -28,9 +28,9 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Mutex.h"
 #include "llvm/Support/type_traits.h"
 #include <iterator>
+#include <mutex>
 
 namespace llvm {
 
@@ -45,7 +45,7 @@ class ValueMapConstIterator;
 /// This class defines the default behavior for configurable aspects of
 /// ValueMap<>.  User Configs should inherit from this class to be as compatible
 /// as possible with future versions of ValueMap.
-template<typename KeyT, typename MutexT = sys::Mutex>
+template<typename KeyT, typename MutexT = std::recursive_mutex>
 struct ValueMapConfig {
   typedef MutexT mutex_type;
 
@@ -216,11 +216,11 @@ class ValueMapCallbackVH : public CallbackVH {
     ValueMapCallbackVH Copy(*this);
     typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data);
     if (M)
-      M->acquire();
+      M->lock();
     Config::onDelete(Copy.Map->Data, Copy.Unwrap());  // May destroy *this.
     Copy.Map->Map.erase(Copy);  // Definitely destroys *this.
     if (M)
-      M->release();
+      M->unlock();
   }
   void allUsesReplacedWith(Value *new_key) override {
     assert(isa<KeySansPointerT>(new_key) &&
@@ -229,7 +229,7 @@ class ValueMapCallbackVH : public CallbackVH {
     ValueMapCallbackVH Copy(*this);
     typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data);
     if (M)
-      M->acquire();
+      M->lock();
 
     KeyT typed_new_key = cast<KeySansPointerT>(new_key);
     // Can destroy *this:
@@ -245,7 +245,7 @@ class ValueMapCallbackVH : public CallbackVH {
       }
     }
     if (M)
-      M->release();
+      M->unlock();
   }
 };
 
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 9154fe2f5ff4..553ceb4575d5 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -166,7 +166,7 @@ void *ExecutionEngineState::RemoveMapping(const GlobalValue *ToUnmap) {
 }
 
 void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
         << "\' to [" << Addr << "]\n";);
@@ -184,14 +184,14 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
 }
 
 void ExecutionEngine::clearAllGlobalMappings() {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   EEState.getGlobalAddressMap().clear();
   EEState.getGlobalAddressReverseMap().clear();
 }
 
 void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
     EEState.RemoveMapping(FI);
@@ -201,7 +201,7 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
 }
 
 void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   ExecutionEngineState::GlobalAddressMapTy &Map =
     EEState.getGlobalAddressMap();
@@ -228,7 +228,7 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
 }
 
 void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   ExecutionEngineState::GlobalAddressMapTy::iterator I =
     EEState.getGlobalAddressMap().find(GV);
@@ -236,7 +236,7 @@ void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
 }
 
 const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // If we haven't computed the reverse mapping yet, do so first.
   if (EEState.getGlobalAddressReverseMap().empty()) {
@@ -555,7 +555,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
   if (Function *F = const_cast<Function*>(dyn_cast<Function>(GV)))
     return getPointerToFunction(F);
 
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   if (void *P = EEState.getGlobalAddressMap()[GV])
     return P;
 
@@ -1346,7 +1346,7 @@ ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
   : EE(EE), GlobalAddressMap(this) {
 }
 
-sys::Mutex *
+std::recursive_mutex *
 ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
   return &EES->EE.lock;
 }
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 83ec9784b98e..463fa299b3f3 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -96,18 +96,18 @@ namespace {
 /// bugpoint or gdb users to search for a function by name without any context.
 class JitPool {
   SmallPtrSet<JIT*, 1> JITs;  // Optimize for process containing just 1 JIT.
-  mutable sys::Mutex Lock;
+  mutable std::recursive_mutex Lock;
 public:
   void Add(JIT *jit) {
-    MutexGuard guard(Lock);
+    std::lock_guard<std::recursive_mutex> guard(Lock);
     JITs.insert(jit);
   }
   void Remove(JIT *jit) {
-    MutexGuard guard(Lock);
+    std::lock_guard<std::recursive_mutex> guard(Lock);
     JITs.erase(jit);
   }
   void *getPointerToNamedFunction(const char *Name) const {
-    MutexGuard guard(Lock);
+    std::lock_guard<std::recursive_mutex> guard(Lock);
     assert(JITs.size() != 0 && "No Jit registered");
     //search function in every instance of JIT
     for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(),
@@ -150,7 +150,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
   AllJits->Add(this);
 
   // Add target data
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   FunctionPassManager &PM = jitstate->getPM();
   M->setDataLayout(TM.getDataLayout());
   PM.add(new DataLayoutPass(M));
@@ -177,7 +177,7 @@ JIT::~JIT() {
 /// addModule - Add a new Module to the JIT.  If we previously removed the last
 /// Module, we need re-initialize jitstate with a valid Module.
 void JIT::addModule(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   if (Modules.empty()) {
     assert(!jitstate && "jitstate should be NULL if Modules vector is empty!");
@@ -206,7 +206,7 @@ void JIT::addModule(Module *M) {
 bool JIT::removeModule(Module *M) {
   bool result = ExecutionEngine::removeModule(M);
 
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   if (jitstate && jitstate->getModule() == M) {
     delete jitstate;
@@ -408,13 +408,13 @@ GenericValue JIT::runFunction(Function *F,
 void JIT::RegisterJITEventListener(JITEventListener *L) {
   if (!L)
     return;
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   EventListeners.push_back(L);
 }
 void JIT::UnregisterJITEventListener(JITEventListener *L) {
   if (!L)
     return;
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   std::vector<JITEventListener*>::reverse_iterator I=
       std::find(EventListeners.rbegin(), EventListeners.rend(), L);
   if (I != EventListeners.rend()) {
@@ -426,14 +426,14 @@ void JIT::NotifyFunctionEmitted(
     const Function &F,
     void *Code, size_t Size,
     const JITEvent_EmittedFunctionDetails &Details) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details);
   }
 }
 
 void JIT::NotifyFreeingMachineCode(void *OldPtr) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyFreeingMachineCode(OldPtr);
   }
@@ -444,7 +444,7 @@ void JIT::NotifyFreeingMachineCode(void *OldPtr) {
 /// GlobalAddress[F] with the address of F's machine code.
 ///
 void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   class MCIListener : public JITEventListener {
     MachineCodeInfo *const MCI;
@@ -505,7 +505,7 @@ void *JIT::getPointerToFunction(Function *F) {
   if (void *Addr = getPointerToGlobalIfAvailable(F))
     return Addr;   // Check if function already code gen'd
 
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // Now that this thread owns the lock, make sure we read in the function if it
   // exists in this Module.
@@ -534,7 +534,7 @@ void *JIT::getPointerToFunction(Function *F) {
 }
 
 void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   BasicBlockAddressMapTy::iterator I =
     getBasicBlockAddressMap().find(BB);
@@ -546,7 +546,7 @@ void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
 }
 
 void JIT::clearPointerToBasicBlock(const BasicBlock *BB) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   getBasicBlockAddressMap().erase(BB);
 }
 
@@ -555,7 +555,7 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
   (void)getPointerToFunction(BB->getParent());
 
   // resolve basic block address
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   BasicBlockAddressMapTy::iterator I =
     getBasicBlockAddressMap().find(BB);
@@ -592,7 +592,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name,
 /// variable, possibly emitting it to memory if needed.  This is used by the
 /// Emitter.
 void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   void *Ptr = getPointerToGlobalIfAvailable(GV);
   if (Ptr) return Ptr;
@@ -666,7 +666,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) {
   size_t S = getDataLayout()->getTypeAllocSize(GlobalType);
   size_t A = getDataLayout()->getPreferredAlignment(GV);
   if (GV->isThreadLocal()) {
-    MutexGuard locked(lock);
+    std::lock_guard<std::recursive_mutex> locked(lock);
     Ptr = TJI.allocateThreadLocalMemory(S);
   } else if (TJI.allocateSeparateGVMemory()) {
     if (A <= 8) {
@@ -687,7 +687,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) {
 }
 
 void JIT::addPendingFunction(Function *F) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   jitstate->getPendingFunctions().push_back(F);
 }
 
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 50b8c10b638b..1e1f5d55802c 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -40,7 +40,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Memory.h"
-#include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetJITInfo.h"
@@ -50,6 +49,7 @@
 #ifndef NDEBUG
 #include <iomanip>
 #endif
+#include <mutex>
 using namespace llvm;
 
 #define DEBUG_TYPE "jit"
@@ -230,22 +230,22 @@ namespace {
     std::map<void*, JITResolver*> Map;
 
     /// Guards Map from concurrent accesses.
-    mutable sys::Mutex Lock;
+    mutable std::recursive_mutex Lock;
 
   public:
     /// Registers a Stub to be resolved by Resolver.
     void RegisterStubResolver(void *Stub, JITResolver *Resolver) {
-      MutexGuard guard(Lock);
+      std::lock_guard<std::recursive_mutex> guard(Lock);
       Map.insert(std::make_pair(Stub, Resolver));
     }
     /// Unregisters the Stub when it's invalidated.
     void UnregisterStubResolver(void *Stub) {
-      MutexGuard guard(Lock);
+      std::lock_guard<std::recursive_mutex> guard(Lock);
       Map.erase(Stub);
     }
     /// Returns the JITResolver instance that owns the Stub.
     JITResolver *getResolverFromStub(void *Stub) const {
-      MutexGuard guard(Lock);
+      std::lock_guard<std::recursive_mutex> guard(Lock);
       // The address given to us for the stub may not be exactly right, it might
       // be a little bit after the stub.  As such, use upper_bound to find it.
       // This is the same trick as in LookupFunctionFromCallSite from
@@ -258,7 +258,7 @@ namespace {
     /// True if any stubs refer to the given resolver. Only used in an assert().
     /// O(N)
     bool ResolverHasStubs(JITResolver* Resolver) const {
-      MutexGuard guard(Lock);
+      std::lock_guard<std::recursive_mutex> guard(Lock);
       for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(),
              E = Map.end(); I != E; ++I) {
         if (I->second == Resolver)
@@ -494,7 +494,7 @@ JITResolver::~JITResolver() {
 /// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
 /// if it has already been created.
 void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
-  MutexGuard locked(TheJIT->lock);
+  std::lock_guard<std::recursive_mutex> guard(TheJIT->lock);
 
   // If we already have a stub for this function, recycle it.
   return state.getFunctionToLazyStubMap().lookup(F);
@@ -503,7 +503,7 @@ void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
 /// getFunctionStub - This returns a pointer to a function stub, creating
 /// one on demand as needed.
 void *JITResolver::getLazyFunctionStub(Function *F) {
-  MutexGuard locked(TheJIT->lock);
+  std::lock_guard<std::recursive_mutex> guard(TheJIT->lock);
 
   // If we already have a lazy stub for this function, recycle it.
   void *&Stub = state.getFunctionToLazyStubMap()[F];
@@ -564,7 +564,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
 /// getGlobalValueIndirectSym - Return a lazy pointer containing the specified
 /// GV address.
 void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
-  MutexGuard locked(TheJIT->lock);
+  std::lock_guard<std::recursive_mutex> guard(TheJIT->lock);
 
   // If we already have a stub for this global variable, recycle it.
   void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV];
@@ -622,7 +622,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
     // Only lock for getting the Function. The call getPointerToFunction made
     // in this function might trigger function materializing, which requires
     // JIT lock to be unlocked.
-    MutexGuard locked(JR->TheJIT->lock);
+    std::lock_guard<std::recursive_mutex> guard(JR->TheJIT->lock);
 
     // The address given to us for the stub may not be exactly right, it might
     // be a little bit after the stub.  As such, use upper_bound to find it.
@@ -654,7 +654,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
   }
 
   // Reacquire the lock to update the GOT map.
-  MutexGuard locked(JR->TheJIT->lock);
+  std::lock_guard<std::recursive_mutex> locked(JR->TheJIT->lock);
 
   // We might like to remove the call site from the CallSiteToFunction map, but
   // we can't do that! Multiple threads could be stuck, waiting to acquire the
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index e9ba96a6496f..f149edcb7b04 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -26,7 +26,6 @@
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/MutexGuard.h"
 #include "llvm/Target/TargetLowering.h"
 
 using namespace llvm;
@@ -66,7 +65,7 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
 }
 
 MCJIT::~MCJIT() {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   // FIXME: We are managing our modules, so we do not want the base class
   // ExecutionEngine to manage them as well. To avoid double destruction
   // of the first (and only) module added in ExecutionEngine constructor
@@ -102,12 +101,12 @@ MCJIT::~MCJIT() {
 }
 
 void MCJIT::addModule(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   OwnedModules.addModule(M);
 }
 
 bool MCJIT::removeModule(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   return OwnedModules.removeModule(M);
 }
 
@@ -129,12 +128,12 @@ void MCJIT::addArchive(object::Archive *A) {
 
 
 void MCJIT::setObjectCache(ObjectCache* NewCache) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   ObjCache = NewCache;
 }
 
 ObjectBufferStream* MCJIT::emitObject(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // This must be a module which has already been added but not loaded to this
   // MCJIT instance, since these conditions are tested by our caller,
@@ -174,7 +173,7 @@ ObjectBufferStream* MCJIT::emitObject(Module *M) {
 
 void MCJIT::generateCodeForModule(Module *M) {
   // Get a thread lock to make sure we aren't trying to load multiple times
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // This must be a module which has already been added to this MCJIT instance.
   assert(OwnedModules.ownsModule(M) &&
@@ -214,7 +213,7 @@ void MCJIT::generateCodeForModule(Module *M) {
 }
 
 void MCJIT::finalizeLoadedModules() {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // Resolve any outstanding relocations.
   Dyld.resolveRelocations();
@@ -230,7 +229,7 @@ void MCJIT::finalizeLoadedModules() {
 
 // FIXME: Rename this.
 void MCJIT::finalizeObject() {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   for (ModulePtrSet::iterator I = OwnedModules.begin_added(),
                               E = OwnedModules.end_added();
@@ -243,7 +242,7 @@ void MCJIT::finalizeObject() {
 }
 
 void MCJIT::finalizeModule(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // This must be a module which has already been added to this MCJIT instance.
   assert(OwnedModules.ownsModule(M) && "MCJIT::finalizeModule: Unknown module.");
@@ -268,7 +267,7 @@ uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) {
 
 Module *MCJIT::findModuleForSymbol(const std::string &Name,
                                    bool CheckFunctionsOnly) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // If it hasn't already been generated, see if it's in one of our modules.
   for (ModulePtrSet::iterator I = OwnedModules.begin_added(),
@@ -292,7 +291,7 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name,
 uint64_t MCJIT::getSymbolAddress(const std::string &Name,
                                  bool CheckFunctionsOnly)
 {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // First, check to see if we already have this symbol.
   uint64_t Addr = getExistingSymbolAddress(Name);
@@ -336,7 +335,7 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
 }
 
 uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   uint64_t Result = getSymbolAddress(Name, false);
   if (Result != 0)
     finalizeLoadedModules();
@@ -344,7 +343,7 @@ uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
 }
 
 uint64_t MCJIT::getFunctionAddress(const std::string &Name) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   uint64_t Result = getSymbolAddress(Name, true);
   if (Result != 0)
     finalizeLoadedModules();
@@ -353,7 +352,7 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) {
 
 // Deprecated.  Use getFunctionAddress instead.
 void *MCJIT::getPointerToFunction(Function *F) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
     bool AbortOnFailure = !F->hasExternalWeakLinkage();
@@ -552,13 +551,13 @@ void *MCJIT::getPointerToNamedFunction(const std::string &Name,
 void MCJIT::RegisterJITEventListener(JITEventListener *L) {
   if (!L)
     return;
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   EventListeners.push_back(L);
 }
 void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
   if (!L)
     return;
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   SmallVector<JITEventListener*, 2>::reverse_iterator I=
       std::find(EventListeners.rbegin(), EventListeners.rend(), L);
   if (I != EventListeners.rend()) {
@@ -567,14 +566,14 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
   }
 }
 void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   MemMgr.notifyObjectLoaded(this, &Obj);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyObjectEmitted(Obj);
   }
 }
 void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyFreeingObject(Obj);
   }
diff --git a/unittests/IR/ValueMapTest.cpp b/unittests/IR/ValueMapTest.cpp
index 248740aeb726..51acd2fc641a 100644
--- a/unittests/IR/ValueMapTest.cpp
+++ b/unittests/IR/ValueMapTest.cpp
@@ -186,19 +186,19 @@ struct LockMutex : ValueMapConfig<KeyT, MutexT> {
   };
   static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
     *Data.CalledRAUW = true;
-    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
+    EXPECT_FALSE(Data.M->try_lock()) << "Mutex should already be locked.";
   }
   static void onDelete(const ExtraData &Data, KeyT Old) {
     *Data.CalledDeleted = true;
-    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
+    EXPECT_FALSE(Data.M->try_lock()) << "Mutex should already be locked.";
   }
   static MutexT *getMutex(const ExtraData &Data) { return Data.M; }
 };
 #if LLVM_ENABLE_THREADS
 TYPED_TEST(ValueMapTest, LocksMutex) {
-  sys::Mutex M(false);  // Not recursive.
+  std::mutex M;  // Not recursive.
   bool CalledRAUW = false, CalledDeleted = false;
-  typedef LockMutex<TypeParam*, sys::Mutex> ConfigType;
+  typedef LockMutex<TypeParam*, std::mutex> ConfigType;
   typename ConfigType::ExtraData Data = {&M, &CalledRAUW, &CalledDeleted};
   ValueMap<TypeParam*, int, ConfigType> VM(Data);
   VM[this->BitcastV.get()] = 7;

From 791ff5af548986d9ba11f6847421c4478a85f4c7 Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compnerd@compnerd.org>
Date: Wed, 18 Jun 2014 20:57:28 +0000
Subject: [PATCH 077/157] MCAsmParser: full support for gas' '.if{cond}
 expression' directives

Patch by Janne Grunau!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211218 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/MC/MCParser/AsmParser.cpp       | 49 +++++++++++++++++++----
 test/MC/AsmParser/conditional_asm.s | 60 +++++++++++++++++++++++++++++
 test/MC/AsmParser/if-diagnostics.s  | 29 ++++++++++++++
 3 files changed, 130 insertions(+), 8 deletions(-)
 create mode 100644 test/MC/AsmParser/if-diagnostics.s

diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index cbff7beccae0..932d0f3318ec 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -345,8 +345,9 @@ class AsmParser : public MCAsmParser {
     DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE,
     DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT,
     DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC,
-    DK_IF, DK_IFNE, DK_IFB, DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF,
-    DK_IFNDEF, DK_IFNOTDEF, DK_ELSEIF, DK_ELSE, DK_ENDIF,
+    DK_IF, DK_IFEQ, DK_IFGE, DK_IFGT, DK_IFLE, DK_IFLT, DK_IFNE, DK_IFB,
+    DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF,
+    DK_ELSEIF, DK_ELSE, DK_ENDIF,
     DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS,
     DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA,
     DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER,
@@ -433,8 +434,8 @@ class AsmParser : public MCAsmParser {
   bool parseDirectiveInclude(); // ".include"
   bool parseDirectiveIncbin(); // ".incbin"
 
-  // ".if" or ".ifne"
-  bool parseDirectiveIf(SMLoc DirectiveLoc);
+  // ".if", ".ifeq", ".ifge", ".ifgt" , ".ifle", ".iflt" or ".ifne"
+  bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
   // ".ifb" or ".ifnb", depending on ExpectBlank.
   bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
   // ".ifc" or ".ifnc", depending on ExpectEqual.
@@ -1229,8 +1230,13 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) {
   default:
     break;
   case DK_IF:
+  case DK_IFEQ:
+  case DK_IFGE:
+  case DK_IFGT:
+  case DK_IFLE:
+  case DK_IFLT:
   case DK_IFNE:
-    return parseDirectiveIf(IDLoc);
+    return parseDirectiveIf(IDLoc, DirKind);
   case DK_IFB:
     return parseDirectiveIfb(IDLoc, true);
   case DK_IFNB:
@@ -3792,9 +3798,8 @@ bool AsmParser::parseDirectiveIncbin() {
 }
 
 /// parseDirectiveIf
-/// ::= .if expression
-/// ::= .ifne expression
-bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) {
+/// ::= .if{,eq,ge,gt,le,lt,ne} expression
+bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
   TheCondStack.push_back(TheCondState);
   TheCondState.TheCond = AsmCond::IfCond;
   if (TheCondState.Ignore) {
@@ -3809,6 +3814,29 @@ bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) {
 
     Lex();
 
+    switch (DirKind) {
+    default:
+      llvm_unreachable("unsupported directive");
+    case DK_IF:
+    case DK_IFNE:
+      break;
+    case DK_IFEQ:
+      ExprValue = ExprValue == 0;
+      break;
+    case DK_IFGE:
+      ExprValue = ExprValue >= 0;
+      break;
+    case DK_IFGT:
+      ExprValue = ExprValue > 0;
+      break;
+    case DK_IFLE:
+      ExprValue = ExprValue <= 0;
+      break;
+    case DK_IFLT:
+      ExprValue = ExprValue < 0;
+      break;
+    }
+
     TheCondState.CondMet = ExprValue;
     TheCondState.Ignore = !TheCondState.CondMet;
   }
@@ -4111,6 +4139,11 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".bundle_lock"] = DK_BUNDLE_LOCK;
   DirectiveKindMap[".bundle_unlock"] = DK_BUNDLE_UNLOCK;
   DirectiveKindMap[".if"] = DK_IF;
+  DirectiveKindMap[".ifeq"] = DK_IFEQ;
+  DirectiveKindMap[".ifge"] = DK_IFGE;
+  DirectiveKindMap[".ifgt"] = DK_IFGT;
+  DirectiveKindMap[".ifle"] = DK_IFLE;
+  DirectiveKindMap[".iflt"] = DK_IFLT;
   DirectiveKindMap[".ifne"] = DK_IFNE;
   DirectiveKindMap[".ifb"] = DK_IFB;
   DirectiveKindMap[".ifnb"] = DK_IFNB;
diff --git a/test/MC/AsmParser/conditional_asm.s b/test/MC/AsmParser/conditional_asm.s
index b9bee33c6a16..ecbceb1dc362 100644
--- a/test/MC/AsmParser/conditional_asm.s
+++ b/test/MC/AsmParser/conditional_asm.s
@@ -11,6 +11,66 @@
     .endif
 .endif
 
+# CHECK: .byte 0
+# CHECK-NOT: .byte 1
+.ifeq 32 - 32
+        .byte 0
+.else
+        .byte 1
+.endif
+
+# CHECK: .byte 0
+# CHECK: .byte 1
+# CHECK-NOT: .byte 2
+.ifge 32 - 31
+        .byte 0
+.endif
+.ifge 32 - 32
+        .byte 1
+.endif
+.ifge 32 - 33
+        .byte 2
+.endif
+
+# CHECK: .byte 0
+# CHECK-NOT: .byte 1
+# CHECK-NOT: .byte 2
+.ifgt 32 - 31
+        .byte 0
+.endif
+.ifgt 32 - 32
+        .byte 1
+.endif
+.ifgt 32 - 33
+        .byte 2
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+# CHECK: .byte 2
+.ifle 32 - 31
+        .byte 0
+.endif
+.ifle 32 - 32
+        .byte 1
+.endif
+.ifle 32 - 33
+        .byte 2
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK-NOT: .byte 1
+# CHECK: .byte 2
+.iflt 32 - 31
+        .byte 0
+.endif
+.iflt 32 - 32
+        .byte 1
+.endif
+.iflt 32 - 33
+        .byte 2
+.endif
+
 # CHECK: .byte 1
 # CHECK-NOT: .byte 0
 .ifne 32 - 32
diff --git a/test/MC/AsmParser/if-diagnostics.s b/test/MC/AsmParser/if-diagnostics.s
new file mode 100644
index 000000000000..d102a5686d98
--- /dev/null
+++ b/test/MC/AsmParser/if-diagnostics.s
@@ -0,0 +1,29 @@
+// RUN: not llvm-mc -triple i386 %s -o /dev/null 2>&1 | FileCheck %s
+
+.if
+.endif
+
+// CHECK: error: unknown token in expression
+// CHECK: .if
+// CHECK:   ^
+
+.ifeq 0, 3
+.endif
+
+// CHECK:error: unexpected token in '.if' directive
+// CHECK: .ifeq 0, 3
+// CHECK:        ^
+
+.iflt "string1"
+.endif
+
+// CHECK: error: expected absolute expression
+// CHECK: .iflt "string1"
+// CHECK:       ^
+
+.ifge test
+.endif
+
+// CHECK: error: expected absolute expression
+// CHECK: .ifge test
+// CHECK:       ^

From 889069443b66c07d698a6813e22d2498a30c749f Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compnerd@compnerd.org>
Date: Wed, 18 Jun 2014 20:57:32 +0000
Subject: [PATCH 078/157] MC: do not add comment string to the AsmToken in
 AsmLexer::LexLineComment

Fixes macros with varargs if the macro instantiation has a trailing comment.

Patch by Janne Grunau!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211219 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/MC/MCParser/AsmLexer.cpp |  4 ++--
 test/MC/AsmParser/vararg.s   | 10 ++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index bca516eca027..7991ef5fe09b 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -201,8 +201,8 @@ AsmToken AsmLexer::LexLineComment() {
     CurChar = getNextChar();
 
   if (CurChar == EOF)
-    return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
-  return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
+    return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+  return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
 }
 
 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
diff --git a/test/MC/AsmParser/vararg.s b/test/MC/AsmParser/vararg.s
index b27668ea3376..e3236b072d12 100644
--- a/test/MC/AsmParser/vararg.s
+++ b/test/MC/AsmParser/vararg.s
@@ -17,6 +17,12 @@
 .endif
 .endm
 
+.macro ifcc4 arg0, arg1:vararg
+.if cc
+            movl \arg1, \arg0
+.endif
+.endm
+
 .text
 
 // CHECK: movl %esp, %ebp
@@ -25,6 +31,8 @@
 // CHECK: movl %ecx, %ebx
 // CHECK: movl %ecx, %eax
 // CHECK: movl %eax, %ecx
+// CHECK: movl %ecx, %eax
+// CHECK: movl %eax, %ecx
 .set cc,1
   ifcc  movl    %esp, %ebp
         subl $0, %esp
@@ -33,6 +41,8 @@
   ifcc2 %ecx, %ebx
   ifcc3 %ecx %eax
   ifcc3 %eax, %ecx
+  ifcc4 %eax %ecx  ## test
+  ifcc4 %ecx, %eax ## test
 
 // CHECK-NOT movl
 // CHECK: subl $1, %esp

From d8fc64e0d1f09abb3265a6ff264629ca33a52c6a Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 18 Jun 2014 21:08:17 +0000
Subject: [PATCH 079/157] Simplify code.

We can delete the objects earlier now that we are copying the names to a buffer.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211221 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/llvm-ar/llvm-ar.cpp | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index 196240a0c2d4..920a4701b273 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -692,7 +692,6 @@ static void writeSymbolTable(
   std::string NameBuf;
   raw_string_ostream NameOS(NameBuf);
   unsigned NumSyms = 0;
-  std::vector<object::SymbolicFile *> DeleteIt;
   LLVMContext &Context = getGlobalContext();
   for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(),
                                               E = Members.end();
@@ -703,9 +702,8 @@ static void writeSymbolTable(
             MemberBuffer, false, sys::fs::file_magic::unknown, &Context);
     if (!ObjOrErr)
       continue;  // FIXME: check only for "not an object file" errors.
-    object::SymbolicFile *Obj = ObjOrErr.get();
+    std::unique_ptr<object::SymbolicFile> Obj(ObjOrErr.get());
 
-    DeleteIt.push_back(Obj);
     if (!StartOffset) {
       printMemberHeader(Out, "", sys::TimeValue::now(), 0, 0, 0, 0);
       StartOffset = Out.tell();
@@ -731,13 +729,6 @@ static void writeSymbolTable(
   }
   Out << NameOS.str();
 
-  for (std::vector<object::SymbolicFile *>::iterator I = DeleteIt.begin(),
-                                                     E = DeleteIt.end();
-       I != E; ++I) {
-    object::SymbolicFile *O = *I;
-    delete O;
-  }
-
   if (StartOffset == 0)
     return;
 

From 6c59006684aa23429900d175e0f0261c7cd594e4 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 18 Jun 2014 21:14:57 +0000
Subject: [PATCH 080/157] Add a symbols() range and use a range loop.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211222 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Object/SymbolicFile.h | 4 ++++
 tools/llvm-ar/llvm-ar.cpp          | 8 +++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h
index 113373694556..40015ec9f6db 100644
--- a/include/llvm/Object/SymbolicFile.h
+++ b/include/llvm/Object/SymbolicFile.h
@@ -136,6 +136,10 @@ class SymbolicFile : public Binary {
   basic_symbol_iterator symbol_end() const {
     return symbol_end_impl();
   }
+  typedef iterator_range<basic_symbol_iterator> basic_symbol_iterator_range;
+  basic_symbol_iterator_range symbols() const {
+    return basic_symbol_iterator_range(symbol_begin(), symbol_end());
+  }
 
   // construction aux.
   static ErrorOr<SymbolicFile *> createIRObjectFile(MemoryBuffer *Object,
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index 920a4701b273..80a977c30bc8 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -710,17 +710,15 @@ static void writeSymbolTable(
       print32BE(Out, 0);
     }
 
-    for (object::basic_symbol_iterator I = Obj->symbol_begin(),
-                                       E = Obj->symbol_end();
-         I != E; ++I) {
-      uint32_t Symflags = I->getFlags();
+    for (const object::BasicSymbolRef &S : Obj->symbols()) {
+      uint32_t Symflags = S.getFlags();
       if (Symflags & object::SymbolRef::SF_FormatSpecific)
         continue;
       if (!(Symflags & object::SymbolRef::SF_Global))
         continue;
       if (Symflags & object::SymbolRef::SF_Undefined)
         continue;
-      failIfError(I->printName(NameOS));
+      failIfError(S.printName(NameOS));
       NameOS << '\0';
       ++NumSyms;
       MemberOffsetRefs.push_back(std::make_pair(Out.tell(), MemberNum));

From 311ea125062983e1949bc1efda1dfa8e853ee70d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 18 Jun 2014 21:40:43 +0000
Subject: [PATCH 081/157] Use LL suffix for literal that should be 64-bits.

This hopefully fixes Windows

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211225 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index dc39bee6511e..45f10839040a 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -1640,7 +1640,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
   SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
 
   SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
-  const SDValue FractMask = DAG.getConstant((1L << FractBits) - 1, MVT::i64);
+  const SDValue FractMask = DAG.getConstant((1LL << FractBits) - 1, MVT::i64);
 
   SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
   SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);

From f286d63757ae5d16f8211227a435e6ad4faa2c35 Mon Sep 17 00:00:00 2001
From: Marek Olsak <marek.olsak@amd.com>
Date: Wed, 18 Jun 2014 22:00:29 +0000
Subject: [PATCH 082/157] R600/SI: add gather4 and getlod intrinsics (v3)

This contains all the previous patches + getlod support on top of it.
It doesn't use SDNodes anymore, so it's quite small.
It also adds v16i8 to SReg_128, which is used for the sampler descriptor.

Reviewed-by: Tom Stellard

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211228 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/SIInstrInfo.td       |  47 +++
 lib/Target/R600/SIInstructions.td    | 102 ++++--
 lib/Target/R600/SIIntrinsics.td      |  50 +++
 lib/Target/R600/SIRegisterInfo.td    |   2 +-
 test/CodeGen/R600/llvm.SI.gather4.ll | 508 +++++++++++++++++++++++++++
 test/CodeGen/R600/llvm.SI.getlod.ll  |  44 +++
 6 files changed, 727 insertions(+), 26 deletions(-)
 create mode 100644 test/CodeGen/R600/llvm.SI.gather4.ll
 create mode 100644 test/CodeGen/R600/llvm.SI.getlod.ll

diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index eb9746779374..c4994a250a8e 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -712,6 +712,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> {
   defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
 }
 
+class MIMG_Gather_Helper <bits<7> op, string asm,
+                          RegisterClass dst_rc,
+                          RegisterClass src_rc> : MIMG <
+  op,
+  (outs dst_rc:$vdata),
+  (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
+       SReg_256:$srsrc, SReg_128:$ssamp),
+  asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+     #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
+  []> {
+  let mayLoad = 1;
+  let mayStore = 0;
+
+  // DMASK was repurposed for GATHER4. 4 components are always
+  // returned and DMASK works like a swizzle - it selects
+  // the component to fetch. The only useful DMASK values are
+  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+  // (red,red,red,red) etc.) The ISA document doesn't mention
+  // this.
+  // Therefore, disable all code which updates DMASK by setting these two:
+  let MIMG = 0;
+  let hasPostISelHook = 0;
+}
+
+multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
+                                    RegisterClass dst_rc,
+                                    int channels> {
+  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>,
+            MIMG_Mask<asm#"_V1", channels>;
+  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
+            MIMG_Mask<asm#"_V2", channels>;
+  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
+            MIMG_Mask<asm#"_V4", channels>;
+  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
+            MIMG_Mask<asm#"_V8", channels>;
+  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
+            MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Gather <bits<7> op, string asm> {
+  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>;
+  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
+  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
+  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
+}
+
 //===----------------------------------------------------------------------===//
 // Vector instruction mappings
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 428e49c6431c..60eb8f978180 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -987,31 +987,31 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
 //def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
 //def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
 //def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
-//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
-//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
-//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
-//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
-//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
-//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
-//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
-//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
-//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
-//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
-//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
-//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
-//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
-//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
-//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
-//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
-//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
-//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
-//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
-//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
-//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
-//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
-//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
-//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
-//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
+defm IMAGE_GATHER4          : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
+defm IMAGE_GATHER4_CL       : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
+defm IMAGE_GATHER4_L        : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
+defm IMAGE_GATHER4_B        : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
+defm IMAGE_GATHER4_B_CL     : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
+defm IMAGE_GATHER4_LZ       : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
+defm IMAGE_GATHER4_C        : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
+defm IMAGE_GATHER4_C_CL     : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
+defm IMAGE_GATHER4_C_L      : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
+defm IMAGE_GATHER4_C_B      : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
+defm IMAGE_GATHER4_C_B_CL   : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
+defm IMAGE_GATHER4_C_LZ     : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
+defm IMAGE_GATHER4_O        : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
+defm IMAGE_GATHER4_CL_O     : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
+defm IMAGE_GATHER4_L_O      : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
+defm IMAGE_GATHER4_B_O      : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
+defm IMAGE_GATHER4_B_CL_O   : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
+defm IMAGE_GATHER4_LZ_O     : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
+defm IMAGE_GATHER4_C_O      : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
+defm IMAGE_GATHER4_C_CL_O   : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
+defm IMAGE_GATHER4_C_L_O    : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
+defm IMAGE_GATHER4_C_B_O    : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
+defm IMAGE_GATHER4_C_LZ_O   : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
+defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "IMAGE_GET_LOD">;
 //def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
 //def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
 //def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
@@ -1773,6 +1773,58 @@ def : SextInReg <i16, 16>;
 /********** Image sampling patterns **********/
 /********** ======================= **********/
 
+class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
+  (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, i32:$dmask, i32:$unorm,
+        i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
+  (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
+          (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
+          $addr, $rsrc, $sampler)
+>;
+
+// Only the variants which make sense are defined.
+def : SampleRawPattern<int_SI_gather4,           IMAGE_GATHER4_V4_V2,        v2i32>;
+def : SampleRawPattern<int_SI_gather4,           IMAGE_GATHER4_V4_V4,        v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl,        IMAGE_GATHER4_CL_V4_V4,     v4i32>;
+def : SampleRawPattern<int_SI_gather4_l,         IMAGE_GATHER4_L_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_b,         IMAGE_GATHER4_B_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz,        IMAGE_GATHER4_LZ_V4_V2,     v2i32>;
+def : SampleRawPattern<int_SI_gather4_lz,        IMAGE_GATHER4_LZ_V4_V4,     v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c,         IMAGE_GATHER4_C_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l,       IMAGE_GATHER4_C_L_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_l,       IMAGE_GATHER4_C_L_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b,       IMAGE_GATHER4_C_B_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_b,       IMAGE_GATHER4_C_B_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl,    IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz,      IMAGE_GATHER4_C_LZ_V4_V4,   v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_o,         IMAGE_GATHER4_O_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_l_o,       IMAGE_GATHER4_L_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_l_o,       IMAGE_GATHER4_L_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_o,       IMAGE_GATHER4_B_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_o,       IMAGE_GATHER4_B_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl_o,    IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz_o,      IMAGE_GATHER4_LZ_O_V4_V4,   v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c_o,       IMAGE_GATHER4_C_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_o,       IMAGE_GATHER4_C_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl_o,    IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l_o,     IMAGE_GATHER4_C_L_O_V4_V8,  v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_o,     IMAGE_GATHER4_C_B_O_V4_V8,  v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl_o,  IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
+
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
+
 /* SIsample for simple 1D texture lookup */
 def : Pat <
   (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 00e32c03a99e..df690a4f4b87 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -56,11 +56,61 @@ let TargetPrefix = "SI", isTarget = 1 in {
 
   class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
 
+  // Fully-flexible SAMPLE instruction.
+  class SampleRaw : Intrinsic <
+    [llvm_v4f32_ty],    // vdata(VGPR)
+    [llvm_anyint_ty,    // vaddr(VGPR)
+     llvm_v32i8_ty,     // rsrc(SGPR)
+     llvm_v16i8_ty,     // sampler(SGPR)
+     llvm_i32_ty,       // dmask(imm)
+     llvm_i32_ty,       // unorm(imm)
+     llvm_i32_ty,       // r128(imm)
+     llvm_i32_ty,       // da(imm)
+     llvm_i32_ty,       // glc(imm)
+     llvm_i32_ty,       // slc(imm)
+     llvm_i32_ty,       // tfe(imm)
+     llvm_i32_ty],      // lwe(imm)
+    [IntrNoMem]>;
+
   def int_SI_sample : Sample;
   def int_SI_sampleb : Sample;
   def int_SI_sampled : Sample;
   def int_SI_samplel : Sample;
 
+  // Basic gather4
+  def int_SI_gather4 : SampleRaw;
+  def int_SI_gather4_cl : SampleRaw;
+  def int_SI_gather4_l : SampleRaw;
+  def int_SI_gather4_b : SampleRaw;
+  def int_SI_gather4_b_cl : SampleRaw;
+  def int_SI_gather4_lz : SampleRaw;
+
+  // Gather4 with comparison
+  def int_SI_gather4_c : SampleRaw;
+  def int_SI_gather4_c_cl : SampleRaw;
+  def int_SI_gather4_c_l : SampleRaw;
+  def int_SI_gather4_c_b : SampleRaw;
+  def int_SI_gather4_c_b_cl : SampleRaw;
+  def int_SI_gather4_c_lz : SampleRaw;
+
+  // Gather4 with offsets
+  def int_SI_gather4_o : SampleRaw;
+  def int_SI_gather4_cl_o : SampleRaw;
+  def int_SI_gather4_l_o : SampleRaw;
+  def int_SI_gather4_b_o : SampleRaw;
+  def int_SI_gather4_b_cl_o : SampleRaw;
+  def int_SI_gather4_lz_o : SampleRaw;
+
+  // Gather4 with comparison and offsets
+  def int_SI_gather4_c_o : SampleRaw;
+  def int_SI_gather4_c_cl_o : SampleRaw;
+  def int_SI_gather4_c_l_o : SampleRaw;
+  def int_SI_gather4_c_b_o : SampleRaw;
+  def int_SI_gather4_c_b_cl_o : SampleRaw;
+  def int_SI_gather4_c_lz_o : SampleRaw;
+
+  def int_SI_getlod : SampleRaw;
+
   def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index f1f01deaf361..8974b6300625 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
   (add SGPR_64Regs, VCCReg, EXECReg)
 >;
 
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
 
 def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
 
diff --git a/test/CodeGen/R600/llvm.SI.gather4.ll b/test/CodeGen/R600/llvm.SI.gather4.ll
new file mode 100644
index 000000000000..8402faaa4dca
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.gather4.ll
@@ -0,0 +1,508 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK-LABEL: @gather4_v2
+;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_v2() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4
+;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_cl
+;CHECK: IMAGE_GATHER4_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_l
+;CHECK: IMAGE_GATHER4_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_l() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b
+;CHECK: IMAGE_GATHER4_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_cl
+;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_cl_v8
+;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_cl_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_lz_v2
+;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_lz_v2() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_lz
+;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_lz() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+;CHECK-LABEL: @gather4_o
+;CHECK: IMAGE_GATHER4_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_cl_o
+;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_cl_o_v8
+;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_cl_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_l_o
+;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_l_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_l_o_v8
+;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_l_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_o
+;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_o_v8
+;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_cl_o
+;CHECK: IMAGE_GATHER4_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_lz_o
+;CHECK: IMAGE_GATHER4_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_lz_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+;CHECK-LABEL: @gather4_c
+;CHECK: IMAGE_GATHER4_C {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_cl
+;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_cl_v8
+;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_cl_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_l
+;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_l() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_l_v8
+;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_l_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b
+;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_v8
+;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_cl
+;CHECK: IMAGE_GATHER4_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_lz
+;CHECK: IMAGE_GATHER4_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_lz() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+;CHECK-LABEL: @gather4_c_o
+;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_o_v8
+;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_cl_o
+;CHECK: IMAGE_GATHER4_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_l_o
+;CHECK: IMAGE_GATHER4_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_l_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_o
+;CHECK: IMAGE_GATHER4_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_cl_o
+;CHECK: IMAGE_GATHER4_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_lz_o
+;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_lz_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_lz_o_v8
+;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_lz_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/llvm.SI.getlod.ll b/test/CodeGen/R600/llvm.SI.getlod.ll
new file mode 100644
index 000000000000..a7a17ec3fffa
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.getlod.ll
@@ -0,0 +1,44 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK-LABEL: @getlod
+;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @getlod() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
+  ret void
+}
+
+;CHECK-LABEL: @getlod_v2
+;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @getlod_v2() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
+  ret void
+}
+
+;CHECK-LABEL: @getlod_v4
+;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @getlod_v4() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
+  ret void
+}
+
+
+declare <4 x float> @llvm.SI.getlod.i32(i32, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }

From ce09bda96ee00a1024952faf85e3fa01e0d6bc72 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 18 Jun 2014 22:03:45 +0000
Subject: [PATCH 083/157] R600: Handle fnearbyint

The difference from rint isn't really relevant here,
so treat them as equivalent. OpenCL doesn't have nearbyint,
so this is sort of pointless other than for completeness.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211229 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 12 ++++++
 lib/Target/R600/AMDGPUISelLowering.h   |  1 +
 test/CodeGen/R600/fnearbyint.ll        | 57 ++++++++++++++++++++++++++
 3 files changed, 70 insertions(+)
 create mode 100644 test/CodeGen/R600/fnearbyint.ll

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 45f10839040a..fd24af062474 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -325,6 +325,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
     setOperationAction(ISD::FTRUNC, VT, Expand);
     setOperationAction(ISD::FMUL, VT, Expand);
     setOperationAction(ISD::FRINT, VT, Expand);
+    setOperationAction(ISD::FNEARBYINT, VT, Expand);
     setOperationAction(ISD::FSQRT, VT, Expand);
     setOperationAction(ISD::FSIN, VT, Expand);
     setOperationAction(ISD::FSUB, VT, Expand);
@@ -334,6 +335,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
   }
 
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
+
   setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::SELECT_CC);
 
@@ -501,6 +505,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::FCEIL: return LowerFCEIL(Op, DAG);
   case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
   case ISD::FRINT: return LowerFRINT(Op, DAG);
+  case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
   case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
 
@@ -1683,6 +1688,13 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
 }
 
+SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
+  // FNEARBYINT and FRINT are the same, except in their handling of FP
+  // exceptions. Those aren't really meaningful for us, and OpenCL only has
+  // rint, so just treat them as equivalent.
+  return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
+}
+
 SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
   SDLoc SL(Op);
   SDValue Src = Op.getOperand(0);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 5be3070f589b..b2bb2579dcb3 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -54,6 +54,7 @@ class AMDGPUTargetLowering : public TargetLowering {
   SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/R600/fnearbyint.ll
new file mode 100644
index 000000000000..1c1d7315189f
--- /dev/null
+++ b/test/CodeGen/R600/fnearbyint.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s
+
+; This should have the exactly the same output as the test for rint,
+; so no need to check anything.
+
+declare float @llvm.nearbyint.f32(float) #0
+declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #0
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #0
+declare double @llvm.nearbyint.f64(double) #0
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) #0
+
+
+define void @fnearbyint_f32(float addrspace(1)* %out, float %in) #1 {
+entry:
+  %0 = call float @llvm.nearbyint.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+define void @fnearbyint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 {
+entry:
+  %0 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+define void @fnearbyint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #1 {
+entry:
+  %0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define void @nearbyint_f64(double addrspace(1)* %out, double %in) {
+entry:
+  %0 = call double @llvm.nearbyint.f64(double %in)
+  store double %0, double addrspace(1)* %out
+  ret void
+}
+define void @nearbyint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+entry:
+  %0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %in)
+  store <2 x double> %0, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+define void @nearbyint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+entry:
+  %0 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %in)
+  store <4 x double> %0, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind }

From fb2b9fb894b6c9be8a290bdf69c29f0f5511ba3c Mon Sep 17 00:00:00 2001
From: Kevin Enderby <enderby@apple.com>
Date: Wed, 18 Jun 2014 22:04:40 +0000
Subject: [PATCH 084/157] Teach llvm-size to know about Mach-O universal files
 (aka fat files) and fat files containing archives.

Also fix a bug in MachOUniversalBinary::ObjectForArch::ObjectForArch()
where it needed a >= when comparing the Index with the number of
objects in a fat file.  As the index starts at 0.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211230 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Object/MachOUniversal.cpp       |  2 +-
 test/Object/size-trivial-macho.test | 12 +++++++
 tools/llvm-size/llvm-size.cpp       | 52 +++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index e414de8bcf17..05729ef7467e 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -53,7 +53,7 @@ static T getUniversalBinaryStruct(const char *Ptr) {
 MachOUniversalBinary::ObjectForArch::ObjectForArch(
     const MachOUniversalBinary *Parent, uint32_t Index)
     : Parent(Parent), Index(Index) {
-  if (!Parent || Index > Parent->getNumberOfObjects()) {
+  if (!Parent || Index >= Parent->getNumberOfObjects()) {
     clear();
   } else {
     // Parse object header.
diff --git a/test/Object/size-trivial-macho.test b/test/Object/size-trivial-macho.test
index 960fb25fbecb..1ed611b0de22 100644
--- a/test/Object/size-trivial-macho.test
+++ b/test/Object/size-trivial-macho.test
@@ -10,6 +10,10 @@ RUN: llvm-size -format darwin %p/Inputs/macho-archive-x86_64.a \
 RUN:         | FileCheck %s -check-prefix mAR
 RUN: llvm-size -format darwin -x -l %p/Inputs/hello-world.macho-x86_64 \
 RUN:         | FileCheck %s -check-prefix mxl
+RUN: llvm-size %p/Inputs/macho-universal.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix u
+RUN: llvm-size %p/Inputs/macho-universal-archive.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix uAR
 
 A: section              size   addr
 A: __text                 12      0
@@ -65,3 +69,11 @@ mxl: 	Section __la_symbol_ptr: 0x8 (addr 0x100001010 offset 4112)
 mxl: 	total 0x18
 mxl: Segment __LINKEDIT: 0x1000 (vmaddr 0x100002000 fileoff 8192)
 mxl: total 0x100003000
+
+u: __TEXT	__DATA	__OBJC	others	dec	hex
+u: 4096	0	0	4294971392	4294975488	100002000	{{.*}}/macho-universal.x86_64.i386:x86_64
+u: 4096	0	0	8192	12288	3000	{{.*}}/macho-universal.x86_64.i386:i386
+
+uAR: __TEXT	__DATA	__OBJC	others	dec	hex
+uAR: 136	0	0	32	168	a8	{{.*}}/macho-universal-archive.x86_64.i386:x86_64(hello.o)
+uAR: 5	4	0	0	9	9	{{.*}}/macho-universal-archive.x86_64.i386:i386(foo.o)
diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp
index 5e4e4aa932bb..b3aaac8d27dd 100644
--- a/tools/llvm-size/llvm-size.cpp
+++ b/tools/llvm-size/llvm-size.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
@@ -456,6 +457,57 @@ static void PrintFileSectionSizes(StringRef file) {
         }
       }
     }
+  } else if (MachOUniversalBinary *UB =
+             dyn_cast<MachOUniversalBinary>(binary.get())) {
+    // This is a Mach-O universal binary. Iterate over each object and
+    // display its sizes.
+    bool moreThanOneArch = UB->getNumberOfObjects() > 1;
+    for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
+                                               E = UB->end_objects();
+         I != E; ++I) {
+      std::unique_ptr<ObjectFile> UO;
+      std::unique_ptr<Archive> UA;
+      if (!I->getAsObjectFile(UO)) {
+        if (ObjectFile *o = dyn_cast<ObjectFile>(&*UO.get())) {
+          if (OutputFormat == sysv)
+            outs() << o->getFileName() << "  :\n";
+          PrintObjectSectionSizes(o);
+          if (OutputFormat == berkeley) {
+            MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o);
+            if (!MachO || moreThanOneFile || moreThanOneArch)
+              outs() << o->getFileName();
+            outs() << "\n";
+          }
+        }
+      }
+      else if (!I->getAsArchive(UA)) {
+        // This is an archive. Iterate over each member and display its sizes.
+        for (object::Archive::child_iterator i = UA->child_begin(),
+                                             e = UA->child_end(); i != e; ++i) {
+          ErrorOr<std::unique_ptr<Binary>> ChildOrErr = i->getAsBinary();
+          if (std::error_code EC = ChildOrErr.getError()) {
+            errs() << ToolName << ": " << file << ": " << EC.message() << ".\n";
+            continue;
+          }
+          if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get())) {
+            MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o);
+            if (OutputFormat == sysv)
+              outs() << o->getFileName() << "   (ex " << UA->getFileName()
+                     << "):\n";
+            else if(MachO && OutputFormat == darwin)
+              outs() << UA->getFileName() << "(" << o->getFileName() << "):\n";
+            PrintObjectSectionSizes(o);
+            if (OutputFormat == berkeley) {
+              if (MachO)
+                outs() << UA->getFileName() << "(" << o->getFileName() << ")\n";
+              else
+                outs() << o->getFileName() << " (ex " << UA->getFileName()
+                       << ")\n";
+            }
+          }
+        }
+      }
+    }
   } else if (ObjectFile *o = dyn_cast<ObjectFile>(binary.get())) {
     if (OutputFormat == sysv)
       outs() << o->getFileName() << "  :\n";

From 2ea6d93c5ea813994852e35f01314ccbcec02bce Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 18 Jun 2014 22:11:03 +0000
Subject: [PATCH 085/157] Use stdint macros for specifying size of constants

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211231 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index fd24af062474..34c2b2bf61d0 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -1636,7 +1636,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
                             DAG.getConstant(1023, MVT::i32));
 
   // Extract the sign bit.
-  const SDValue SignBitMask = DAG.getConstant(1ul << 31, MVT::i32);
+  const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, MVT::i32);
   SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
 
   // Extend back to to 64-bits.
@@ -1645,7 +1645,8 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
   SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
 
   SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
-  const SDValue FractMask = DAG.getConstant((1LL << FractBits) - 1, MVT::i64);
+  const SDValue FractMask
+    = DAG.getConstant((UINT64_C(1) << FractBits) - 1, MVT::i64);
 
   SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
   SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);

From 887a5c7f5da73ada835985d1f9552ff1716517a7 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Wed, 18 Jun 2014 22:48:09 +0000
Subject: [PATCH 086/157] Move ARMJITInfo off of the TargetMachine and down
 onto the subtarget. This required untangling a mess of headers that included
 around.

This a recommit of r210953 with a fix for the removed accessor
for JITInfo.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211233 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMCodeEmitter.cpp         |  1 +
 lib/Target/ARM/ARMJITInfo.cpp             |  8 ++++++++
 lib/Target/ARM/ARMJITInfo.h               |  8 +-------
 lib/Target/ARM/ARMMachineFunctionInfo.cpp | 10 ++++++++++
 lib/Target/ARM/ARMMachineFunctionInfo.h   | 11 +----------
 lib/Target/ARM/ARMSubtarget.cpp           |  2 +-
 lib/Target/ARM/ARMSubtarget.h             |  3 +++
 lib/Target/ARM/ARMTargetMachine.cpp       |  2 +-
 lib/Target/ARM/ARMTargetMachine.h         |  7 ++-----
 9 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 2fd7eddd8748..5fb6ebfeaae1 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -15,6 +15,7 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
 #include "ARMRelocations.h"
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 8821c2dd09f8..6d1114d51aab 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "ARMJITInfo.h"
 #include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
 #include "ARMRelocations.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
@@ -334,3 +335,10 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
     }
   }
 }
+
+void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) {
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+  JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+  IsPIC = isPIC;
+}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index ee4c863543e7..27e2a2013404 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -14,7 +14,6 @@
 #ifndef ARMJITINFO_H
 #define ARMJITINFO_H
 
-#include "ARMMachineFunctionInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -103,12 +102,7 @@ namespace llvm {
     /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
     /// jump table ids to jump table bases map; remember if codegen relocation
     /// model is PIC.
-    void Initialize(const MachineFunction &MF, bool isPIC) {
-      const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-      ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
-      JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
-      IsPIC = isPIC;
-    }
+    void Initialize(const MachineFunction &MF, bool isPIC);
 
     /// getConstantPoolEntryAddr - The ARM target puts all constant
     /// pool entries into constant islands. This returns the address of the
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index af445e2f35aa..892b269fc181 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -12,3 +12,13 @@
 using namespace llvm;
 
 void ARMFunctionInfo::anchor() { }
+
+ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
+    : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+      hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+      StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false),
+      RestoreSPFromFP(false), LRSpilledForFarJump(false),
+      FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+      GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0),
+      PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
+      GlobalBaseReg(0) {}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 7934761af25b..44a9e3495b90 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -130,16 +130,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
     JumpTableUId(0), PICLabelUId(0),
     VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
 
-  explicit ARMFunctionInfo(MachineFunction &MF) :
-    isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
-    hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
-    StByValParamsPadding(0),
-    ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
-    LRSpilledForFarJump(false),
-    FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
-    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
-    JumpTableUId(0), PICLabelUId(0),
-    VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+  explicit ARMFunctionInfo(MachineFunction &MF);
 
   bool isThumbFunction() const { return isThumb; }
   bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index fc842512ef00..875f1e83f875 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -148,7 +148,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
       ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
       TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN),
       DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
-      TSInfo(DL) {}
+      TSInfo(DL), JITInfo() {}
 
 void ARMSubtarget::initializeEnvironment() {
   HasV4TOps = false;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 7da80ec0d494..ae62a6f876fd 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,6 +14,7 @@
 #ifndef ARMSUBTARGET_H
 #define ARMSUBTARGET_H
 
+#include "ARMJITInfo.h"
 #include "ARMSelectionDAGInfo.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/ADT/Triple.h"
@@ -256,10 +257,12 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
 
   const DataLayout *getDataLayout() const { return &DL; }
   const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+  ARMJITInfo *getJITInfo() { return &JITInfo; }
 
 private:
   const DataLayout DL;
   ARMSelectionDAGInfo TSInfo;
+  ARMJITInfo JITInfo;
 
   void initializeEnvironment();
   void resetSubtargetFeatures(StringRef CPU, StringRef FS);
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index a93824230d70..7a3836ae62b4 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -52,7 +52,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
                                            CodeGenOpt::Level OL,
                                            bool isLittle)
   : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, CPU, FS, isLittle, Options), JITInfo() {
+    Subtarget(TT, CPU, FS, isLittle, Options) {
 
   // Default to triple-appropriate float ABI
   if (Options.FloatABIType == FloatABI::Default)
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 154927786082..737c2fae1a80 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -32,10 +32,6 @@ namespace llvm {
 class ARMBaseTargetMachine : public LLVMTargetMachine {
 protected:
   ARMSubtarget        Subtarget;
-
-private:
-  ARMJITInfo          JITInfo;
-
 public:
   ARMBaseTargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS,
@@ -44,7 +40,6 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
                        CodeGenOpt::Level OL,
                        bool isLittle);
 
-  ARMJITInfo *getJITInfo() override { return &JITInfo; }
   const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
   const ARMTargetLowering *getTargetLowering() const override {
     // Implemented by derived classes
@@ -56,6 +51,8 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
   const DataLayout *getDataLayout() const override {
     return getSubtargetImpl()->getDataLayout();
   }
+  ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
+
   /// \brief Register ARM analysis passes with a pass manager.
   void addAnalysisPasses(PassManagerBase &PM) override;
 

From 1260b844fd71720566eabd4cc5738f2abe747871 Mon Sep 17 00:00:00 2001
From: Nikola Smiljanic <popizdeh@gmail.com>
Date: Thu, 19 Jun 2014 00:26:49 +0000
Subject: [PATCH 087/157] PR10140 - StringPool's PooledStringPtr has non-const
 operator== causing bad OR-result.

Mark conversion operator explicit and const qualify comparison operators.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211244 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/StringPool.h |  6 +++---
 unittests/Support/CMakeLists.txt  |  1 +
 unittests/Support/StringPool.cpp  | 31 +++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 3 deletions(-)
 create mode 100644 unittests/Support/StringPool.cpp

diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index 7e1394cb2335..7306ce2240b2 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -128,10 +128,10 @@ namespace llvm {
     }
 
     inline const char *operator*() const { return begin(); }
-    inline operator bool() const { return S != nullptr; }
+    inline explicit operator bool() const { return S != nullptr; }
 
-    inline bool operator==(const PooledStringPtr &That) { return S == That.S; }
-    inline bool operator!=(const PooledStringPtr &That) { return S != That.S; }
+    inline bool operator==(const PooledStringPtr &That) const { return S == That.S; }
+    inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; }
   };
 
 } // End llvm namespace
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index 0ea931033339..c50acdfb8e64 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -30,6 +30,7 @@ add_llvm_unittest(SupportTests
   ProgramTest.cpp
   RegexTest.cpp
   SourceMgrTest.cpp
+  StringPool.cpp
   SwapByteOrderTest.cpp
   ThreadLocalTest.cpp
   TimeValueTest.cpp
diff --git a/unittests/Support/StringPool.cpp b/unittests/Support/StringPool.cpp
new file mode 100644
index 000000000000..7b7805f91710
--- /dev/null
+++ b/unittests/Support/StringPool.cpp
@@ -0,0 +1,31 @@
+//===- llvm/unittest/Support/ThreadLocalTest.cpp - Therad Local tests   ---===//
+//
+//		       The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StringPool.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(PooledStringPtrTest, OperatorEquals) {
+  StringPool pool;
+  const PooledStringPtr a = pool.intern("a");
+  const PooledStringPtr b = pool.intern("b");
+  EXPECT_FALSE(a == b);
+}
+
+TEST(PooledStringPtrTest, OperatorNotEquals) {
+  StringPool pool;
+  const PooledStringPtr a = pool.intern("a");
+  const PooledStringPtr b = pool.intern("b");
+  EXPECT_TRUE(a != b);
+}
+
+}

From dd8406a6b7f0cbf9082c4bdb2cc8c3b3d5da6eec Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Thu, 19 Jun 2014 01:09:49 +0000
Subject: [PATCH 088/157] Fix breakage from r211244 by using LLVM_EXPLICIT to
 avoid using explicit operators under MSVC where they're not supported.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211246 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/StringPool.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index 7306ce2240b2..3e0465340c3b 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -29,6 +29,7 @@
 #ifndef LLVM_SUPPORT_STRINGPOOL_H
 #define LLVM_SUPPORT_STRINGPOOL_H
 
+#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/StringMap.h"
 #include <cassert>
 #include <new>
@@ -128,7 +129,7 @@ namespace llvm {
     }
 
     inline const char *operator*() const { return begin(); }
-    inline explicit operator bool() const { return S != nullptr; }
+    inline LLVM_EXPLICIT operator bool() const { return S != nullptr; }
 
     inline bool operator==(const PooledStringPtr &That) const { return S == That.S; }
     inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; }

From d9b35435b89015d154b0e20f4d4796d936237f84 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 19 Jun 2014 01:19:19 +0000
Subject: [PATCH 089/157] R600/SI: Add intrinsics for various math
 instructions.

These will be used for custom lowering and for library
implementations of various math functions, so it's useful
to expose these as builtins.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211247 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/IR/IntrinsicsR600.td             | 36 ++++++++++++
 lib/Target/R600/AMDGPUISelLowering.cpp        | 32 +++++++++-
 lib/Target/R600/AMDGPUISelLowering.h          | 12 ++++
 lib/Target/R600/AMDGPUInstrInfo.td            | 29 ++++++++++
 lib/Target/R600/AMDGPUInstructions.td         | 10 ++++
 lib/Target/R600/AMDGPUIntrinsics.td           |  2 -
 lib/Target/R600/R600Instructions.td           |  2 +-
 lib/Target/R600/SIInsertWaits.cpp             |  2 +
 lib/Target/R600/SIInstructions.td             | 37 +++++++++---
 .../InstCombine/InstCombineCalls.cpp          | 14 +++++
 test/CodeGen/R600/big_alu.ll                  | 12 ++--
 test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll    | 27 +++++++++
 test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll     | 27 +++++++++
 test/CodeGen/R600/llvm.AMDGPU.div_scale.ll    | 23 ++++++++
 test/CodeGen/R600/llvm.AMDGPU.rcp.ll          | 58 +++++++++++++++++++
 test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll   | 29 ++++++++++
 test/CodeGen/R600/pv.ll                       |  4 +-
 test/CodeGen/R600/sgpr-copy.ll                |  4 +-
 test/CodeGen/R600/si-sgpr-spill.ll            | 18 +++---
 .../Transforms/InstCombine/r600-intrinsics.ll | 47 +++++++++++++++
 20 files changed, 393 insertions(+), 32 deletions(-)
 create mode 100644 test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
 create mode 100644 test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
 create mode 100644 test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
 create mode 100644 test/CodeGen/R600/llvm.AMDGPU.rcp.ll
 create mode 100644 test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
 create mode 100644 test/Transforms/InstCombine/r600-intrinsics.ll

diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
index ecb5668d8e95..09607d5834d6 100644
--- a/include/llvm/IR/IntrinsicsR600.td
+++ b/include/llvm/IR/IntrinsicsR600.td
@@ -33,4 +33,40 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
                                        "__builtin_r600_read_tgid">;
 defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
                                        "__builtin_r600_read_tidig">;
+
 } // End TargetPrefix = "r600"
+
+let TargetPrefix = "AMDGPU" in {
+def int_AMDGPU_div_scale :
+  Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_div_scale">;
+
+def int_AMDGPU_div_fmas :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_div_fmas">;
+
+def int_AMDGPU_div_fixup :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_div_fixup">;
+
+def int_AMDGPU_trig_preop :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_trig_preop">;
+
+def int_AMDGPU_rcp :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_rcp">;
+
+def int_AMDGPU_rsq :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_rsq">;
+
+
+} // End TargetPrefix = "AMDGPU"
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 34c2b2bf61d0..1aa92fadbeea 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -842,6 +842,28 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
                          Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
 
+    case Intrinsic::AMDGPU_div_scale:
+      return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2));
+
+    case Intrinsic::AMDGPU_div_fmas:
+      return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+    case Intrinsic::AMDGPU_div_fixup:
+      return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+    case Intrinsic::AMDGPU_trig_preop:
+      return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2));
+
+    case Intrinsic::AMDGPU_rcp:
+      return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
+
+    case Intrinsic::AMDGPU_rsq:
+      return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
+
     case AMDGPUIntrinsic::AMDGPU_imax:
       return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
                                                   Op.getOperand(2));
@@ -2042,6 +2064,14 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(FMIN)
   NODE_NAME_CASE(SMIN)
   NODE_NAME_CASE(UMIN)
+  NODE_NAME_CASE(URECIP)
+  NODE_NAME_CASE(DIV_SCALE)
+  NODE_NAME_CASE(DIV_FMAS)
+  NODE_NAME_CASE(DIV_FIXUP)
+  NODE_NAME_CASE(TRIG_PREOP)
+  NODE_NAME_CASE(RCP)
+  NODE_NAME_CASE(RSQ)
+  NODE_NAME_CASE(DOT4)
   NODE_NAME_CASE(BFE_U32)
   NODE_NAME_CASE(BFE_I32)
   NODE_NAME_CASE(BFI)
@@ -2051,8 +2081,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(MUL_I24)
   NODE_NAME_CASE(MAD_U24)
   NODE_NAME_CASE(MAD_I24)
-  NODE_NAME_CASE(URECIP)
-  NODE_NAME_CASE(DOT4)
   NODE_NAME_CASE(EXPORT)
   NODE_NAME_CASE(CONST_ADDRESS)
   NODE_NAME_CASE(REGISTER_LOAD)
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index b2bb2579dcb3..e2000a04ba4e 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -175,6 +175,9 @@ enum {
   DWORDADDR,
   FRACT,
   CLAMP,
+
+  // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
+  // Denormals handled on some parts.
   COS_HW,
   SIN_HW,
   FMAX,
@@ -184,6 +187,15 @@ enum {
   SMIN,
   UMIN,
   URECIP,
+  DIV_SCALE,
+  DIV_FMAS,
+  DIV_FIXUP,
+  TRIG_PREOP, // 1 ULP max error for f64
+
+  // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
+  //            For f64, max error 2^29 ULP, handles denormals.
+  RCP,
+  RSQ,
   DOT4,
   BFE_U32, // Extract range of bits with zero extension to 32-bits.
   BFE_I32, // Extract range of bits with sign extension to 32-bits.
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 942a9e8ff351..d0ee40a67872 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -19,6 +19,14 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
 ]>;
 
+def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
+>;
+
+def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
+  [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
+>;
+
 //===----------------------------------------------------------------------===//
 // AMDGPU DAG Nodes
 //
@@ -29,6 +37,12 @@ def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
 // out = a - floor(a)
 def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
 
+// out = 1.0 / a
+def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a)
+def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
+
 // out = max(a, b) a and b are floats
 def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
   [SDNPCommutative, SDNPAssociative]
@@ -78,6 +92,21 @@ def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
 // e is rounding error
 def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
 
+// Special case divide preop and flags.
+def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
+
+//  Special case divide FMA with scale and flags (src0 = Quotient,
+//  src1 = Denominator, src2 = Numerator).
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
+
+// Single or double precision division fixup.
+// Special case divide fixup and flags(src0 = Quotient, src1 =
+// Denominator, src2 = Numerator).
+def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+
+// Look Up 2.0 / pi src0 with segment select src1[4:0]
+def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
+
 def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
                           SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
                           [SDNPHasChain, SDNPMayLoad]>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 8bfc11cd468c..14bfd8cc18d5 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -519,6 +519,16 @@ multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> {
   >;
 }
 
+class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
+  (fdiv FP_ONE, vt:$src),
+  (RcpInst $src)
+>;
+
+class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
+  (AMDGPUrcp (fsqrt vt:$src)),
+  (RsqInst $src)
+>;
+
 include "R600Instructions.td"
 include "R700Instructions.td"
 include "EvergreenInstructions.td"
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index 6dc7612d46fb..538b4cd8af28 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -30,8 +30,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 58c704d8ec85..47b7da0955d8 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1083,7 +1083,7 @@ class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
 }
 
 class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
-  inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
+  inst, "RECIPSQRT_CLAMPED", AMDGPUrsq
 > {
   let Itinerary = TransALU;
 }
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index a17fed7e7ea7..173332674ff1 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -341,6 +341,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
   return Result;
 }
 
+// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
+// around other non-memory instructions.
 bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
   bool Changes = false;
 
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 60eb8f978180..26024dc4f2df 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1116,22 +1116,23 @@ defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
 defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
   [(set f32:$dst, (flog2 f32:$src0))]
 >;
+
 defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
 defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
 defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
-  [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
+  [(set f32:$dst, (AMDGPUrcp f32:$src0))]
 >;
 defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
 defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
 defm V_RSQ_LEGACY_F32 : VOP1_32 <
   0x0000002d, "V_RSQ_LEGACY_F32",
-  [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
+  [(set f32:$dst, (AMDGPUrsq f32:$src0))]
 >;
 defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32",
   [(set f32:$dst, (fdiv FP_ONE, (fsqrt f32:$src0)))]
 >;
 defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
-  [(set f64:$dst, (fdiv FP_ONE, f64:$src0))]
+  [(set f64:$dst, (AMDGPUrcp f64:$src0))]
 >;
 defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
 defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64",
@@ -1417,8 +1418,12 @@ defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
 //def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
 defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
 ////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
-defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
-def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32",
+  [(set f32:$dst, (AMDGPUdiv_fixup f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64",
+  [(set f64:$dst, (AMDGPUdiv_fixup f64:$src0, f64:$src1, f64:$src2))]
+>;
 
 def V_LSHL_B64 : VOP3_64_32 <0x00000161, "V_LSHL_B64",
   [(set i64:$dst, (shl i64:$src0, i32:$src1))]
@@ -1452,12 +1457,19 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
 
 defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
 def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
-defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
-def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
+
+defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32",
+  [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64",
+  [(set f64:$dst, (AMDGPUdiv_fmas f64:$src0, f64:$src1, f64:$src2))]
+>;
 //def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
 //def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
 //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
-def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+def V_TRIG_PREOP_F64 : VOP3_64_32 <0x00000174, "V_TRIG_PREOP_F64",
+  [(set f64:$dst, (AMDGPUtrig_preop f64:$src0, i32:$src1))]
+>;
 
 //===----------------------------------------------------------------------===//
 // Pseudo Instructions
@@ -1748,6 +1760,15 @@ def : Pat <
   (S_BARRIER)
 >;
 
+//===----------------------------------------------------------------------===//
+// VOP1 Patterns
+//===----------------------------------------------------------------------===//
+
+def : RcpPat<V_RCP_F32_e32, f32>;
+def : RcpPat<V_RCP_F64_e32, f64>;
+def : RsqPat<V_RSQ_F32_e32, f32>;
+def : RsqPat<V_RSQ_F64_e32, f64>;
+
 //===----------------------------------------------------------------------===//
 // VOP2 Patterns
 //===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d4bdd75fa82a..ff7456415814 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -922,6 +922,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
 
+  case Intrinsic::AMDGPU_rcp: {
+    if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
+      const APFloat &ArgVal = C->getValueAPF();
+      APFloat Val(ArgVal.getSemantics(), 1.0);
+      APFloat::opStatus Status = Val.divide(ArgVal,
+                                            APFloat::rmNearestTiesToEven);
+      // Only do this if it was exact and therefore not dependent on the
+      // rounding mode.
+      if (Status == APFloat::opOK)
+        return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
+    }
+
+    break;
+  }
   case Intrinsic::stackrestore: {
     // If the save is right next to the restore, remove the restore.  This can
     // happen when variable allocas are DCE'd.
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
index 6b683769fe06..511e8ef62951 100644
--- a/test/CodeGen/R600/big_alu.ll
+++ b/test/CodeGen/R600/big_alu.ll
@@ -101,7 +101,7 @@ IF137:                                            ; preds = %main_body
   %88 = insertelement <4 x float> %87, float %32, i32 2
   %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
   %90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89)
-  %91 = call float @llvm.AMDGPU.rsq(float %90)
+  %91 = call float @llvm.AMDGPU.rsq.f32(float %90)
   %92 = fmul float %30, %91
   %93 = fmul float %31, %91
   %94 = fmul float %32, %91
@@ -344,7 +344,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
   %325 = insertelement <4 x float> %324, float %318, i32 2
   %326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3
   %327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326)
-  %328 = call float @llvm.AMDGPU.rsq(float %327)
+  %328 = call float @llvm.AMDGPU.rsq.f32(float %327)
   %329 = fmul float %314, %328
   %330 = fmul float %316, %328
   %331 = fmul float %318, %328
@@ -377,7 +377,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
   %358 = insertelement <4 x float> %357, float %45, i32 2
   %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3
   %360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359)
-  %361 = call float @llvm.AMDGPU.rsq(float %360)
+  %361 = call float @llvm.AMDGPU.rsq.f32(float %360)
   %362 = fmul float %45, %361
   %363 = call float @fabs(float %362)
   %364 = fmul float %176, 0x3FECCCCCC0000000
@@ -403,7 +403,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
   %384 = insertelement <4 x float> %383, float %45, i32 2
   %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3
   %386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385)
-  %387 = call float @llvm.AMDGPU.rsq(float %386)
+  %387 = call float @llvm.AMDGPU.rsq.f32(float %386)
   %388 = fmul float %45, %387
   %389 = call float @fabs(float %388)
   %390 = fmul float %176, 0x3FF51EB860000000
@@ -1041,7 +1041,7 @@ IF179:                                            ; preds = %ENDIF175
   %896 = insertelement <4 x float> %895, float %45, i32 2
   %897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3
   %898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897)
-  %899 = call float @llvm.AMDGPU.rsq(float %898)
+  %899 = call float @llvm.AMDGPU.rsq.f32(float %898)
   %900 = fmul float %45, %899
   %901 = call float @fabs(float %900)
   %902 = fmul float %176, 0x3FECCCCCC0000000
@@ -1150,7 +1150,7 @@ ENDIF178:                                         ; preds = %ENDIF175, %IF179
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
+declare float @llvm.AMDGPU.rsq.f32(float) #1
 
 ; Function Attrs: readnone
 declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
new file mode 100644
index 000000000000..c8c73573e073
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
+
+; SI-LABEL: @test_div_fixup_f32:
+; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
+  %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_div_fixup_f64:
+; SI: V_DIV_FIXUP_F64
+define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
+  %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
new file mode 100644
index 000000000000..4f1e827c2cbd
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.fmas.f32(float, float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.fmas.f64(double, double, double) nounwind readnone
+
+; SI-LABEL: @test_div_fmas_f32:
+; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: V_DIV_FMAS_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
+  %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_div_fmas_f64:
+; SI: V_DIV_FMAS_F64
+define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
+  %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
new file mode 100644
index 000000000000..1bcbe2f859ed
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -0,0 +1,23 @@
+; XFAIL: *
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.scale.f32(float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.scale.f64(double, double) nounwind readnone
+
+; SI-LABEL @test_div_scale_f32:
+define void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
+  %a = load float addrspace(1)* %aptr, align 4
+  %b = load float addrspace(1)* %bptr, align 4
+  %result = call float @llvm.AMDGPU.div.scale.f32(float %a, float %b) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64:
+define void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %b = load double addrspace(1)* %bptr, align 8
+  %result = call double @llvm.AMDGPU.div.scale.f64(double %a, double %b) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
new file mode 100644
index 000000000000..ca5260dc5bc8
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+
+declare float @llvm.sqrt.f32(float) nounwind readnone
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+; FUNC-LABEL: @rcp_f32
+; SI: V_RCP_F32_e32
+define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rcp_f64
+; SI: V_RCP_F64_e32
+define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
+  %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @rcp_pat_f32
+; SI: V_RCP_F32_e32
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rcp = fdiv float 1.0, %src
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rcp_pat_f64
+; SI: V_RCP_F64_e32
+define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
+  %rcp = fdiv double 1.0, %src
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @rsq_rcp_pat_f32
+; SI: V_RSQ_F32_e32
+define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+  %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
+  %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rsq_rcp_pat_f64
+; SI: V_RSQ_F64_e32
+define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
+  %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
+  %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
new file mode 100644
index 000000000000..1c736d447ea9
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
+
+; SI-LABEL: @test_trig_preop_f64:
+; SI-DAG: BUFFER_LOAD_DWORD [[SEG:v[0-9]+]]
+; SI-DAG: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %b = load i32 addrspace(1)* %bptr, align 4
+  %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @test_trig_preop_f64_imm_segment:
+; SI: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index f322bc71c6bd..55eb56d3fb1d 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -103,7 +103,7 @@ main_body:
   %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
   %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)
   %97 = call float @fabs(float %96)
-  %98 = call float @llvm.AMDGPU.rsq(float %97)
+  %98 = call float @llvm.AMDGPU.rsq.f32(float %97)
   %99 = fmul float %4, %98
   %100 = fmul float %5, %98
   %101 = fmul float %6, %98
@@ -225,7 +225,7 @@ declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 declare float @fabs(float) #2
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
+declare float @llvm.AMDGPU.rsq.f32(float) #1
 
 ; Function Attrs: readnone
 declare float @llvm.AMDIL.clamp.(float, float, float) #1
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index c581d86b99bd..c7d5bf90644e 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -70,7 +70,7 @@ main_body:
   %55 = fadd float %54, %53
   %56 = fmul float %45, %45
   %57 = fadd float %55, %56
-  %58 = call float @llvm.AMDGPU.rsq(float %57)
+  %58 = call float @llvm.AMDGPU.rsq.f32(float %57)
   %59 = fmul float %43, %58
   %60 = fmul float %44, %58
   %61 = fmul float %45, %58
@@ -212,7 +212,7 @@ declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #3
+declare float @llvm.AMDGPU.rsq.f32(float) #3
 
 ; Function Attrs: readnone
 declare float @llvm.AMDIL.exp.(float) #3
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
index b34a757d9b65..53a096513bbc 100644
--- a/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -203,7 +203,7 @@ main_body:
   %198 = fadd float %197, %196
   %199 = fmul float %97, %97
   %200 = fadd float %198, %199
-  %201 = call float @llvm.AMDGPU.rsq(float %200)
+  %201 = call float @llvm.AMDGPU.rsq.f32(float %200)
   %202 = fmul float %95, %201
   %203 = fmul float %96, %201
   %204 = fmul float %202, %29
@@ -384,7 +384,7 @@ IF67:                                             ; preds = %LOOP65
   %355 = fadd float %354, %353
   %356 = fmul float %352, %352
   %357 = fadd float %355, %356
-  %358 = call float @llvm.AMDGPU.rsq(float %357)
+  %358 = call float @llvm.AMDGPU.rsq.f32(float %357)
   %359 = fmul float %350, %358
   %360 = fmul float %351, %358
   %361 = fmul float %352, %358
@@ -512,7 +512,7 @@ IF67:                                             ; preds = %LOOP65
   %483 = fadd float %482, %481
   %484 = fmul float %109, %109
   %485 = fadd float %483, %484
-  %486 = call float @llvm.AMDGPU.rsq(float %485)
+  %486 = call float @llvm.AMDGPU.rsq.f32(float %485)
   %487 = fmul float %107, %486
   %488 = fmul float %108, %486
   %489 = fmul float %109, %486
@@ -541,7 +541,7 @@ IF67:                                             ; preds = %LOOP65
   %512 = fadd float %511, %510
   %513 = fmul float %97, %97
   %514 = fadd float %512, %513
-  %515 = call float @llvm.AMDGPU.rsq(float %514)
+  %515 = call float @llvm.AMDGPU.rsq.f32(float %514)
   %516 = fmul float %95, %515
   %517 = fmul float %96, %515
   %518 = fmul float %97, %515
@@ -658,7 +658,7 @@ declare i32 @llvm.SI.tid() #2
 declare float @ceil(float) #3
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #2
+declare float @llvm.AMDGPU.rsq.f32(float) #2
 
 ; Function Attrs: nounwind readnone
 declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
@@ -887,7 +887,7 @@ main_body:
   %212 = fadd float %211, %210
   %213 = fmul float %209, %209
   %214 = fadd float %212, %213
-  %215 = call float @llvm.AMDGPU.rsq(float %214)
+  %215 = call float @llvm.AMDGPU.rsq.f32(float %214)
   %216 = fmul float %205, %215
   %217 = fmul float %207, %215
   %218 = fmul float %209, %215
@@ -1123,7 +1123,7 @@ IF189:                                            ; preds = %LOOP
   %434 = fsub float -0.000000e+00, %433
   %435 = fadd float 0x3FF00068E0000000, %434
   %436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00)
-  %437 = call float @llvm.AMDGPU.rsq(float %436)
+  %437 = call float @llvm.AMDGPU.rsq.f32(float %436)
   %438 = fmul float %437, %436
   %439 = fsub float -0.000000e+00, %436
   %440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00)
@@ -1147,7 +1147,7 @@ IF189:                                            ; preds = %LOOP
   %458 = fadd float %457, %456
   %459 = fmul float %455, %455
   %460 = fadd float %458, %459
-  %461 = call float @llvm.AMDGPU.rsq(float %460)
+  %461 = call float @llvm.AMDGPU.rsq.f32(float %460)
   %462 = fmul float %451, %461
   %463 = fmul float %453, %461
   %464 = fmul float %455, %461
@@ -1257,7 +1257,7 @@ ENDIF197:                                         ; preds = %IF189, %IF198
   %559 = fadd float %558, %557
   %560 = fmul float %556, %556
   %561 = fadd float %559, %560
-  %562 = call float @llvm.AMDGPU.rsq(float %561)
+  %562 = call float @llvm.AMDGPU.rsq.f32(float %561)
   %563 = fmul float %562, %561
   %564 = fsub float -0.000000e+00, %561
   %565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00)
diff --git a/test/Transforms/InstCombine/r600-intrinsics.ll b/test/Transforms/InstCombine/r600-intrinsics.ll
new file mode 100644
index 000000000000..1db6b0d28bf5
--- /dev/null
+++ b/test/Transforms/InstCombine/r600-intrinsics.ll
@@ -0,0 +1,47 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_1
+; CHECK-NEXT: ret float 1.000000e+00
+define float @test_constant_fold_rcp_f32_1() nounwind {
+  %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_1
+; CHECK-NEXT:  ret double 1.000000e+00
+define double @test_constant_fold_rcp_f64_1() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_half
+; CHECK-NEXT: ret float 2.000000e+00
+define float @test_constant_fold_rcp_f32_half() nounwind {
+  %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_half
+; CHECK-NEXT:  ret double 2.000000e+00
+define double @test_constant_fold_rcp_f64_half() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_43
+; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01)
+define float @test_constant_fold_rcp_f32_43() nounwind {
+ %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_43
+; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01)
+define double @test_constant_fold_rcp_f64_43() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone
+  ret double %val
+}
+

From f9ec8fe70c5084a15372cc0b126218f699794e24 Mon Sep 17 00:00:00 2001
From: David Majnemer <david.majnemer@gmail.com>
Date: Thu, 19 Jun 2014 01:25:43 +0000
Subject: [PATCH 090/157] MS asm: Properly handle quoted symbol names

We would get confused by '@' characters in symbol names, we would
mistake the text following them for the variant kind.

When an identifier a string, the variant kind will never show up inside
of it.  Instead, check to see if there is a variant following the
string.

This fixes PR19965.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211249 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/MC/MCParser/AsmParser.cpp             | 14 +++++++++++++-
 lib/Target/X86/AsmParser/X86AsmParser.cpp |  6 ++++--
 test/MC/X86/intel-syntax.s                |  8 ++++++++
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 932d0f3318ec..1ba02d181dbb 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -812,7 +812,19 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     // Parse symbol variant
     std::pair<StringRef, StringRef> Split;
     if (!MAI.useParensForSymbolVariant()) {
-      Split = Identifier.split('@');
+      if (FirstTokenKind == AsmToken::String) {
+        if (Lexer.is(AsmToken::At)) {
+          Lexer.Lex(); // eat @
+          SMLoc AtLoc = getLexer().getLoc();
+          StringRef VName;
+          if (parseIdentifier(VName))
+            return Error(AtLoc, "expected symbol variant after '@'");
+
+          Split = std::make_pair(Identifier, VName);
+        }
+      } else {
+        Split = Identifier.split('@');
+      }
     } else if (Lexer.is(AsmToken::LParen)) {
       Lexer.Lex(); // eat (
       StringRef VName;
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b30eeeb0e037..3e57914f9e94 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1061,7 +1061,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
     if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
       break;
 
-    switch (getLexer().getKind()) {
+    AsmToken::TokenKind TK = getLexer().getKind();
+    switch (TK) {
     default: {
       if (SM.isValidEndState()) {
         Done = true;
@@ -1073,13 +1074,14 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
       Done = true;
       break;
     }
+    case AsmToken::String:
     case AsmToken::Identifier: {
       // This could be a register or a symbolic displacement.
       unsigned TmpReg;
       const MCExpr *Val;
       SMLoc IdentLoc = Tok.getLoc();
       StringRef Identifier = Tok.getString();
-      if(!ParseRegister(TmpReg, IdentLoc, End)) {
+      if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
         SM.onRegister(TmpReg);
         UpdateLocLex = false;
         break;
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 540282a74c73..796891880b12 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -599,3 +599,11 @@ fxrstor64 opaque ptr [rax]
 // CHECK: movq _g0+8, %rcx
 mov rbx, qword ptr [_g0]
 mov rcx, qword ptr [_g0 + 8]
+
+"?half@?0??bar@@YAXXZ@4NA":
+	.quad   4602678819172646912
+
+fadd   "?half@?0??bar@@YAXXZ@4NA"
+fadd   "?half@?0??bar@@YAXXZ@4NA"@IMGREL
+// CHECK: fadds   "?half@?0??bar@@YAXXZ@4NA"
+// CHECK: fadds   "?half@?0??bar@@YAXXZ@4NA"@IMGREL32

From bdb4aca20211b9ed31874c1be0d814fe26f10a24 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nicholas@mxc.ca>
Date: Thu, 19 Jun 2014 03:28:28 +0000
Subject: [PATCH 091/157] Remove redundant code in InstCombineShift, no
 functionality change because instsimplify already does this and instcombine
 calls instsimplify a few lines above. Patch by Suyog Sarda!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211250 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineShifts.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index cc6665c947d7..2495747da5fe 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -789,11 +789,6 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
     // have a sign-extend idiom.
     Value *X;
     if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
-      // If the left shift is just shifting out partial signbits, delete the
-      // extension.
-      if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
-        return ReplaceInstUsesWith(I, X);
-
       // If the input is an extension from the shifted amount value, e.g.
       //   %x = zext i8 %A to i32
       //   %y = shl i32 %x, 24

From 650b6ea893c118bf0c6e40cd3002954cc34df2bb Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nicholas@mxc.ca>
Date: Thu, 19 Jun 2014 03:35:49 +0000
Subject: [PATCH 092/157] Make instsimplify's analysis of icmp eq/ne use
 computeKnownBits to determine whether the icmp is always true or false. Patch
 by Suyog Sarda!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211251 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Analysis/InstructionSimplify.cpp        | 19 +++++++++++++++++++
 test/Transforms/InstCombine/align-2d-gep.ll |  2 +-
 test/Transforms/InstSimplify/compare.ll     | 19 +++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 3684fda854fc..b06d9948ac19 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -2241,6 +2241,25 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
+  // If a bit is known to be zero for A and known to be one for B,
+  // then A and B cannot be equal.
+  if (ICmpInst::isEquality(Pred)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+      uint32_t BitWidth = CI->getBitWidth();
+      APInt LHSKnownZero(BitWidth, 0);
+      APInt LHSKnownOne(BitWidth, 0);
+      computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
+      APInt RHSKnownZero(BitWidth, 0);
+      APInt RHSKnownOne(BitWidth, 0);
+      computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
+      if (((LHSKnownOne & RHSKnownZero) != 0) ||
+          ((LHSKnownZero & RHSKnownOne) != 0))
+        return (Pred == ICmpInst::ICMP_EQ)
+                   ? ConstantInt::getFalse(CI->getContext())
+                   : ConstantInt::getTrue(CI->getContext());
+    }
+  }
+
   // Special logic for binary operators.
   BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
   BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
diff --git a/test/Transforms/InstCombine/align-2d-gep.ll b/test/Transforms/InstCombine/align-2d-gep.ll
index 5bca46d5a21d..f6a877684ce4 100644
--- a/test/Transforms/InstCombine/align-2d-gep.ll
+++ b/test/Transforms/InstCombine/align-2d-gep.ll
@@ -31,7 +31,7 @@ bb1:
   store <2 x double><double 0.0, double 0.0>, <2 x double>* %r, align 8
 
   %indvar.next = add i64 %j, 2
-  %exitcond = icmp eq i64 %indvar.next, 557
+  %exitcond = icmp eq i64 %indvar.next, 556
   br i1 %exitcond, label %bb11, label %bb1
 
 bb11:
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 105e244ed8cf..89fd6362d78d 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -883,3 +883,22 @@ define i1 @returns_nonnull() {
 ; CHECK: ret i1 false
 }
 
+; If a bit is known to be zero for A and known to be one for B,
+; then A and B cannot be equal.
+define i1 @icmp_eq_const(i32 %a) nounwind {
+  %b = mul nsw i32 %a, -2
+  %c = icmp eq i32 %b, 1
+  ret i1 %c
+
+; CHECK-LABEL: @icmp_eq_const
+; CHECK-NEXT: ret i1 false 
+}
+
+define i1 @icmp_ne_const(i32 %a) nounwind {
+  %b = mul nsw i32 %a, -2
+  %c = icmp ne i32 %b, 1
+  ret i1 %c
+
+; CHECK-LABEL: @icmp_ne_const
+; CHECK-NEXT: ret i1 true
+}

From fe3a219355b58428b93461c6c0517a9b90ebe962 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nicholas@mxc.ca>
Date: Thu, 19 Jun 2014 03:51:46 +0000
Subject: [PATCH 093/157] Move optimization of some cases of (A & C1)|(B & C2)
 from instcombine to instsimplify. Patch by Rahul Jain, plus some last minute
 changes by me -- you can blame me for any bugs.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211252 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Analysis/InstructionSimplify.cpp          | 32 ++++++++++++++++
 .../InstCombine/InstCombineAndOrXor.cpp       | 23 ------------
 test/Transforms/InstSimplify/apint-or.ll      | 37 +++++++++++++++++++
 3 files changed, 69 insertions(+), 23 deletions(-)
 create mode 100644 test/Transforms/InstSimplify/apint-or.ll

diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b06d9948ac19..8aa6e5a190bf 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1625,6 +1625,38 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
                                          MaxRecurse))
       return V;
 
+  // (A & C)|(B & D)
+  Value *C = nullptr, *D = nullptr;
+  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+      match(Op1, m_And(m_Value(B), m_Value(D)))) {
+    ConstantInt *C1 = dyn_cast<ConstantInt>(C);
+    ConstantInt *C2 = dyn_cast<ConstantInt>(D);
+    if (C1 && C2 && (C1->getValue() == ~C2->getValue())) {
+      // (A & C1)|(B & C2)
+      // If we have: ((V + N) & C1) | (V & C2)
+      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+      // replace with V+N.
+      Value *V1, *V2;
+      if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+
+          match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+        // Add commutes, try both ways.
+        if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+          return A;
+        if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+          return A;
+      }
+      // Or commutes, try both ways.
+      if ((C1->getValue() & (C1->getValue() + 1)) == 0 &&
+          match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+        // Add commutes, try both ways.
+        if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+          return B;
+        if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+          return B;
+      }
+    }
+  }
+
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4f5d65ab785f..b23a606e0889 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1996,29 +1996,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     C1 = dyn_cast<ConstantInt>(C);
     C2 = dyn_cast<ConstantInt>(D);
     if (C1 && C2) {  // (A & C1)|(B & C2)
-      // If we have: ((V + N) & C1) | (V & C2)
-      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
-      // replace with V+N.
-      if (C1->getValue() == ~C2->getValue()) {
-        if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
-            match(A, m_Add(m_Value(V1), m_Value(V2)))) {
-          // Add commutes, try both ways.
-          if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
-            return ReplaceInstUsesWith(I, A);
-          if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
-            return ReplaceInstUsesWith(I, A);
-        }
-        // Or commutes, try both ways.
-        if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
-            match(B, m_Add(m_Value(V1), m_Value(V2)))) {
-          // Add commutes, try both ways.
-          if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
-            return ReplaceInstUsesWith(I, B);
-          if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
-            return ReplaceInstUsesWith(I, B);
-        }
-      }
-
       if ((C1->getValue() & C2->getValue()) == 0) {
         // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
         // iff (C1&C2) == 0 and (N&~C1) == 0
diff --git a/test/Transforms/InstSimplify/apint-or.ll b/test/Transforms/InstSimplify/apint-or.ll
new file mode 100644
index 000000000000..5d314db7133d
--- /dev/null
+++ b/test/Transforms/InstSimplify/apint-or.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -instsimplify -S | not grep or
+
+; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0.
+define i39 @test1(i39 %V, i39 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
+    %N = and i39 %M, 274877906944
+    %A = add i39 %V, %N
+    %B = and i39 %A, %C1
+    %D = and i39 %V, 274877906943
+    %R = or i39 %B, %D
+    ret i39 %R
+; CHECK-LABEL @test1
+; CHECK-NEXT: and {{.*}}, -274877906944
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+; Test the case where Integer BitWidth > 64 && BitWidth <= 1024. 
+define i399 @test2(i399 %V, i399 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
+    %N = and i399 %M, 18446742974197923840
+    %A = add i399 %V, %N
+    %B = and i399 %A, %C1
+    %D = and i399 %V, 274877906943
+    %R = or i399 %B, %D
+    ret i399 %R
+; CHECK-LABEL @test2
+; CHECK-NEXT: and {{.*}}, 18446742974197923840
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}

From 64429cefba00547d37b50383536a62047fd93380 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 19 Jun 2014 04:24:43 +0000
Subject: [PATCH 094/157] R600: Add a few tests I forgot to add.

These belong with r210827

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211253 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/R600/llvm.AMDGPU.abs.ll   | 48 ++++++++++++++++++++++++++
 test/CodeGen/R600/llvm.AMDGPU.clamp.ll | 28 +++++++++++++++
 test/CodeGen/R600/llvm.AMDGPU.fract.ll | 27 +++++++++++++++
 3 files changed, 103 insertions(+)
 create mode 100644 test/CodeGen/R600/llvm.AMDGPU.abs.ll
 create mode 100644 test/CodeGen/R600/llvm.AMDGPU.clamp.ll
 create mode 100644 test/CodeGen/R600/llvm.AMDGPU.fract.ll

diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
new file mode 100644
index 000000000000..a0a47b7c4701
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
+
+; Legacy name
+declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
+
+; FUNC-LABEL: @s_abs_i32
+; SI: S_SUB_I32
+; SI: S_MAX_I32
+; SI: S_ENDPGM
+
+; EG: SUB_INT
+; EG: MAX_INT
+define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind {
+  %abs = call i32 @llvm.AMDGPU.abs(i32 %src) nounwind readnone
+  store i32 %abs, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_abs_i32
+; SI: V_SUB_I32_e32
+; SI: V_MAX_I32_e32
+; SI: S_ENDPGM
+
+; EG: SUB_INT
+; EG: MAX_INT
+define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+  %val = load i32 addrspace(1)* %src, align 4
+  %abs = call i32 @llvm.AMDGPU.abs(i32 %val) nounwind readnone
+  store i32 %abs, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @abs_i32_legacy_amdil
+; SI: V_SUB_I32_e32
+; SI: V_MAX_I32_e32
+; SI: S_ENDPGM
+
+; EG: SUB_INT
+; EG: MAX_INT
+define void @abs_i32_legacy_amdil(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+  %val = load i32 addrspace(1)* %src, align 4
+  %abs = call i32 @llvm.AMDIL.abs.i32(i32 %val) nounwind readnone
+  store i32 %abs, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
new file mode 100644
index 000000000000..d608953a0dd2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone
+declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone
+
+; FUNC-LABEL: @clamp_0_1_f32
+; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
+; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0
+; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: S_ENDPGM
+
+; EG: MOV_SAT
+define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
+  %clamp = call float @llvm.AMDGPU.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
+  store float %clamp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @clamp_0_1_amdil_legacy_f32
+; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
+; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0
+; SI: BUFFER_STORE_DWORD [[RESULT]]
+define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
+  %clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
+  store float %clamp, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
new file mode 100644
index 000000000000..72ec1c57571e
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
+
+; Legacy name
+declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
+
+; FUNC-LABEL: @fract_f32
+; SI: V_FRACT_F32
+; EG: FRACT
+define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
+  %val = load float addrspace(1)* %src, align 4
+  %fract = call float @llvm.AMDGPU.fract.f32(float %val) nounwind readnone
+  store float %fract, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fract_f32_legacy_amdil
+; SI: V_FRACT_F32
+; EG: FRACT
+define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
+  %val = load float addrspace(1)* %src, align 4
+  %fract = call float @llvm.AMDIL.fraction.f32(float %val) nounwind readnone
+  store float %fract, float addrspace(1)* %out, align 4
+  ret void
+}

From bd01df2487e451fcdac23875f2a101b8e93fbe6e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Thu, 19 Jun 2014 06:10:58 +0000
Subject: [PATCH 095/157] Convert some assert(0) to llvm_unreachable or fold an
 'if' condition into the assert.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211254 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/StreamableMemoryObject.h      |  7 ++++---
 lib/MC/ELFObjectWriter.cpp                         |  3 +--
 lib/TableGen/Record.cpp                            | 10 ++--------
 lib/Target/AArch64/AArch64ISelLowering.cpp         |  4 +---
 lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp  |  3 +--
 .../Disassembler/AArch64ExternalSymbolizer.cpp     |  3 +--
 .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp  |  8 ++------
 lib/Target/ARM/A15SDOptimizer.cpp                  |  3 +--
 lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp   |  3 +--
 lib/Target/Hexagon/HexagonInstrInfo.cpp            |  7 +++----
 lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h      |  2 +-
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 14 +++++---------
 lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp  |  2 +-
 lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp  |  2 +-
 .../Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp      |  2 +-
 15 files changed, 26 insertions(+), 47 deletions(-)

diff --git a/include/llvm/Support/StreamableMemoryObject.h b/include/llvm/Support/StreamableMemoryObject.h
index 9c9e55c0a75a..6e71ad47c8dd 100644
--- a/include/llvm/Support/StreamableMemoryObject.h
+++ b/include/llvm/Support/StreamableMemoryObject.h
@@ -13,6 +13,7 @@
 
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataStream.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryObject.h"
 #include <cassert>
 #include <memory>
@@ -115,7 +116,7 @@ class StreamingMemoryObject : public StreamableMemoryObject {
     // requiring that the bitcode size be known, or otherwise ensuring that
     // the memory doesn't go away/get reallocated, but it's
     // not currently necessary. Users that need the pointer don't stream.
-    assert(0 && "getPointer in streaming memory objects not allowed");
+    llvm_unreachable("getPointer in streaming memory objects not allowed");
     return nullptr;
   }
   bool isValidAddress(uint64_t address) const override;
@@ -154,8 +155,8 @@ class StreamingMemoryObject : public StreamableMemoryObject {
                                         kChunkSize);
       BytesRead += bytes;
       if (bytes < kChunkSize) {
-        if (ObjectSize && BytesRead < Pos)
-          assert(0 && "Unexpected short read fetching bitcode");
+        assert((!ObjectSize || BytesRead >= Pos) &&
+               "Unexpected short read fetching bitcode");
         if (BytesRead <= Pos) { // reached EOF/ran out of bytes
           ObjectSize = BytesRead;
           EOFReached = true;
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index a98a13e0cd42..87f6ec0f3d13 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -1574,8 +1574,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
     break;
 
   default:
-    assert(0 && "FIXME: sh_type value not supported!");
-    break;
+    llvm_unreachable("FIXME: sh_type value not supported!");
   }
 
   if (TargetObjectWriter->getEMachine() == ELF::EM_ARM &&
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index c553a21c261e..f7843dc8360b 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -811,20 +811,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   }
   case HEAD: {
     if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
-      if (LHSl->getSize() == 0) {
-        assert(0 && "Empty list in car");
-        return nullptr;
-      }
+      assert(LHSl->getSize() != 0 && "Empty list in car");
       return LHSl->getElement(0);
     }
     break;
   }
   case TAIL: {
     if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
-      if (LHSl->getSize() == 0) {
-        assert(0 && "Empty list in cdr");
-        return nullptr;
-      }
+      assert(LHSl->getSize() != 0 && "Empty list in cdr");
       // Note the +1.  We can't just pass the result of getValues()
       // directly.
       ArrayRef<Init *>::iterator begin = LHSl->getValues().begin()+1;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index e45ca4dbc0d9..bb5290208cb5 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -823,8 +823,7 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 #ifndef NDEBUG
     MI->dump();
 #endif
-    assert(0 && "Unexpected instruction for custom inserter!");
-    break;
+    llvm_unreachable("Unexpected instruction for custom inserter!");
 
   case AArch64::F128CSEL:
     return EmitF128CSEL(MI, BB);
@@ -833,7 +832,6 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case TargetOpcode::PATCHPOINT:
     return emitPatchPoint(MI, BB);
   }
-  llvm_unreachable("Unexpected instruction for custom inserter!");
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f861df0bf99f..5d363a00dc0f 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3447,8 +3447,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
   case Match_MnemonicFail:
     return Error(Loc, "unrecognized instruction mnemonic");
   default:
-    assert(0 && "unexpected error code!");
-    return Error(Loc, "invalid instruction format");
+    llvm_unreachable("unexpected error code!");
   }
 }
 
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 24663684a3fd..2057c51346af 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -37,8 +37,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
   case LLVMDisassembler_VariantKind_ARM64_TLVP:
   case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
   default:
-    assert(0 && "bad LLVMDisassembler_VariantKind");
-    return MCSymbolRefExpr::VK_None;
+    llvm_unreachable("bad LLVMDisassembler_VariantKind");
   }
 }
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 464a18cdbc04..f0513575edb4 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -218,13 +218,9 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                                         const MCSubtargetInfo &STI) const {
   if (MO.isReg())
     return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
-  else {
-    assert(MO.isImm() && "did not expect relocated expression");
-    return static_cast<unsigned>(MO.getImm());
-  }
 
-  assert(0 && "Unable to encode MCOperand!");
-  return 0;
+  assert(MO.isImm() && "did not expect relocated expression");
+  return static_cast<unsigned>(MO.getImm());
 }
 
 template<unsigned FixupKind> uint32_t
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 94faf6f60db3..92eaf9e1c9b3 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -321,8 +321,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
       return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
   }
 
-  assert(0 && "Unhandled update pattern!");
-  return 0;
+  llvm_unreachable("Unhandled update pattern!");
 }
 
 // Return true if this MachineInstr inserts a scalar (SPR) value into
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 5b51a52f828a..b8ee55574972 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1047,8 +1047,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
   // we have a movt or a movw, but that led to misleadingly results.
   // This is now disallowed in the the AsmParser in validateInstruction()
   // so this should never happen.
-  assert(0 && "expression without :upper16: or :lower16:");
-  return 0;
+  llvm_unreachable("expression without :upper16: or :lower16:");
 }
 
 uint32_t ARMMCCodeEmitter::
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index ea6367a89bce..1c95e06c8923 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1538,14 +1538,13 @@ int HexagonInstrInfo::GetDotOldOp(const int opc) const {
   int NewOp = opc;
   if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
     NewOp = Hexagon::getPredOldOpcode(NewOp);
-    if (NewOp < 0)
-      assert(0 && "Couldn't change predicate new instruction to its old form.");
+    assert(NewOp >= 0 &&
+           "Couldn't change predicate new instruction to its old form.");
   }
 
   if (isNewValueStore(NewOp)) { // Convert into non-new-value format
     NewOp = Hexagon::getNonNVStore(NewOp);
-    if (NewOp < 0)
-      assert(0 && "Couldn't change new-value store to its old form.");
+    assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
   }
   return NewOp;
 }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index bc695e6d4f74..d5c3dbc47880 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -65,7 +65,7 @@ class MipsAsmBackend : public MCAsmBackend {
                              const MCRelaxableFragment *DF,
                              const MCAsmLayout &Layout) const override {
     // FIXME.
-    assert(0 && "RelaxInstruction() unimplemented");
+    llvm_unreachable("RelaxInstruction() unimplemented");
     return false;
   }
 
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 5e7cefed0ace..dc1344fb8d3f 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -172,17 +172,13 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
                                         SmallVectorImpl<MCFixup> &Fixup,
                                         const MCSubtargetInfo &STI) const {
   if (MO.isReg()) {
-    if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
+    if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags))
       return MRI.getEncodingValue(MO.getReg());
-    } else {
-      return getHWReg(MO.getReg());
-    }
-  } else if (MO.isImm()) {
-    return MO.getImm();
-  } else {
-    assert(0);
-    return 0;
+    return getHWReg(MO.getReg());
   }
+
+  assert(MO.isImm());
+  return MO.getImm();
 }
 
 #include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 261fb3838d37..5975a517994a 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -173,6 +173,6 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
 bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
                                   raw_ostream &O)
 {
-  assert(0 && "FIXME: Implement SparcInstPrinter::printGetPCX.");
+  llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
   return true;
 }
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 0fbac218cb6b..dcd81e3d6249 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -196,7 +196,7 @@ namespace {
                               const MCRelaxableFragment *DF,
                               const MCAsmLayout &Layout) const override {
       // FIXME.
-      assert(0 && "fixupNeedsRelaxation() unimplemented");
+      llvm_unreachable("fixupNeedsRelaxation() unimplemented");
       return false;
     }
     void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index b19ad7b45ca6..eea9626c17b0 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -133,7 +133,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
   if (Expr->EvaluateAsAbsolute(Res))
     return Res;
 
-  assert(0 && "Unhandled expression!");
+  llvm_unreachable("Unhandled expression!");
   return 0;
 }
 

From e793899a078457a1b0b11657ad07978ad9c0c687 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Thu, 19 Jun 2014 06:22:01 +0000
Subject: [PATCH 096/157] 80-column fixups.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211255 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/llvm-mc/llvm-mc.cpp | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index a6b74a7b1baa..60e566c16357 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -52,7 +52,8 @@ static cl::opt<bool>
 ShowEncoding("show-encoding", cl::desc("Show instruction encodings"));
 
 static cl::opt<bool>
-CompressDebugSections("compress-debug-sections", cl::desc("Compress DWARF debug sections"));
+CompressDebugSections("compress-debug-sections",
+                      cl::desc("Compress DWARF debug sections"));
 
 static cl::opt<bool>
 ShowInst("show-inst", cl::desc("Show internal instruction representation"));
@@ -240,7 +241,8 @@ static void setDwarfDebugProducer(void) {
   DwarfDebugProducer += getenv("DEBUG_PRODUCER");
 }
 
-static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, tool_output_file *Out) {
+static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI,
+                      tool_output_file *Out) {
 
   AsmLexer Lexer(MAI);
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(0));
@@ -320,7 +322,8 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, tool_output_file *Out)
 
 static int AssembleInput(const char *ProgName, const Target *TheTarget,
                          SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
-                         MCAsmInfo &MAI, MCSubtargetInfo &STI, MCInstrInfo &MCII) {
+                         MCAsmInfo &MAI, MCSubtargetInfo &STI,
+                         MCInstrInfo &MCII) {
   std::unique_ptr<MCAsmParser> Parser(
       createMCAsmParser(SrcMgr, Ctx, Str, MAI));
   std::unique_ptr<MCTargetAsmParser> TAP(
@@ -391,7 +394,8 @@ int main(int argc, char **argv) {
 
   if (CompressDebugSections) {
     if (!zlib::isAvailable()) {
-      errs() << ProgName << ": build tools with zlib to enable -compress-debug-sections";
+      errs() << ProgName
+             << ": build tools with zlib to enable -compress-debug-sections";
       return 1;
     }
     MAI->setCompressDebugSections(true);
@@ -479,7 +483,8 @@ int main(int argc, char **argv) {
     Res = AsLexInput(SrcMgr, *MAI, Out.get());
     break;
   case AC_Assemble:
-    Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, *MCII);
+    Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
+                        *MCII);
     break;
   case AC_MDisassemble:
     assert(IP && "Expected assembly output");

From 054bd5d288c9e9a480583e017dcea5df6d11af3a Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Thu, 19 Jun 2014 06:22:05 +0000
Subject: [PATCH 097/157] Remove unnecessary include.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211256 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 996dc2122f49..d30588e5d4f1 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -47,7 +47,6 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"

From 179bb4e0eef028baf4015a12f962964739d3b542 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Thu, 19 Jun 2014 06:22:08 +0000
Subject: [PATCH 098/157] Move -dwarf-version to an MC level command line
 option so it's used by all of the MC level tools and codegen. Fix up all uses
 in the compiler to use this and set it on the context accordingly.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211257 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/MC/MCTargetOptions.h             |  4 +++-
 include/llvm/MC/MCTargetOptionsCommandFlags.h |  4 ++++
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp         |  5 +----
 lib/MC/MCTargetOptions.cpp                    |  3 ++-
 tools/llvm-mc/llvm-mc.cpp                     | 14 +++++++-------
 5 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h
index 80cc8befb7a7..eb4348ed3ec2 100644
--- a/include/llvm/MC/MCTargetOptions.h
+++ b/include/llvm/MC/MCTargetOptions.h
@@ -29,6 +29,7 @@ class MCTargetOptions {
   bool ShowMCEncoding : 1;
   bool ShowMCInst : 1;
   bool AsmVerbose : 1;
+  int DwarfVersion;
   MCTargetOptions();
 };
 
@@ -41,7 +42,8 @@ inline bool operator==(const MCTargetOptions &LHS, const MCTargetOptions &RHS) {
           ARE_EQUAL(MCUseDwarfDirectory) &&
           ARE_EQUAL(ShowMCEncoding) &&
           ARE_EQUAL(ShowMCInst) &&
-          ARE_EQUAL(AsmVerbose));
+          ARE_EQUAL(AsmVerbose) &&
+	  ARE_EQUAL(DwarfVersion));
 #undef ARE_EQUAL
 }
 
diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.h b/include/llvm/MC/MCTargetOptionsCommandFlags.h
index 17a117a2a3bd..344983dec5f2 100644
--- a/include/llvm/MC/MCTargetOptionsCommandFlags.h
+++ b/include/llvm/MC/MCTargetOptionsCommandFlags.h
@@ -33,11 +33,15 @@ cl::opt<bool> RelaxAll("mc-relax-all",
                        cl::desc("When used with filetype=obj, "
                                 "relax all fixups in the emitted object file"));
 
+cl::opt<int> DwarfVersion("dwarf-version", cl::desc("Dwarf version"),
+                          cl::init(0));
+
 static inline MCTargetOptions InitMCTargetOptionsFromFlags() {
   MCTargetOptions Options;
   Options.SanitizeAddress =
       (AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress);
   Options.MCRelaxAll = RelaxAll;
+  Options.DwarfVersion = DwarfVersion;
   return Options;
 }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index a88aebd2d22f..7aaf731b6f8b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -98,10 +98,6 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
                             clEnumVal(Disable, "Disabled"), clEnumValEnd),
                  cl::init(Default));
 
-static cl::opt<unsigned>
-DwarfVersionNumber("dwarf-version", cl::Hidden,
-                   cl::desc("Generate DWARF for dwarf version."), cl::init(0));
-
 static const char *const DWARFGroupName = "DWARF Emission";
 static const char *const DbgTimerName = "DWARF Debug Writer";
 
@@ -209,6 +205,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   else
     HasDwarfPubSections = DwarfPubSections == Enable;
 
+  unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
   DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
                                     : MMI->getModule()->getDwarfVersion();
 
diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp
index 8e946d57f7fb..efd724a15df6 100644
--- a/lib/MC/MCTargetOptions.cpp
+++ b/lib/MC/MCTargetOptions.cpp
@@ -14,6 +14,7 @@ namespace llvm {
 MCTargetOptions::MCTargetOptions()
     : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
       MCSaveTempLabels(false), MCUseDwarfDirectory(false),
-      ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false) {}
+      ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
+      DwarfVersion(0) {}
 
 } // end namespace llvm
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 60e566c16357..31b3d295b41f 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -150,9 +150,6 @@ static cl::opt<bool>
 GenDwarfForAssembly("g", cl::desc("Generate dwarf debugging info for assembly "
                                   "source files"));
 
-static cl::opt<int>
-DwarfVersion("dwarf-version", cl::desc("Dwarf version"), cl::init(4));
-
 static cl::opt<std::string>
 DebugCompilationDir("fdebug-compilation-dir",
                     cl::desc("Specifies the debug info's compilation dir"));
@@ -323,12 +320,12 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI,
 static int AssembleInput(const char *ProgName, const Target *TheTarget,
                          SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
                          MCAsmInfo &MAI, MCSubtargetInfo &STI,
-                         MCInstrInfo &MCII) {
+                         MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
   std::unique_ptr<MCAsmParser> Parser(
       createMCAsmParser(SrcMgr, Ctx, Str, MAI));
   std::unique_ptr<MCTargetAsmParser> TAP(
-      TheTarget->createMCAsmParser(STI, *Parser, MCII,
-                                   InitMCTargetOptionsFromFlags()));
+      TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions));
+
   if (!TAP) {
     errs() << ProgName
            << ": error: this target does not support assembly parsing.\n";
@@ -359,6 +356,7 @@ int main(int argc, char **argv) {
   cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
 
   cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
+  MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
   TripleName = Triple::normalize(TripleName);
   setDwarfDebugFlags(argc, argv);
 
@@ -411,6 +409,8 @@ int main(int argc, char **argv) {
     Ctx.setAllowTemporaryLabels(false);
 
   Ctx.setGenDwarfForAssembly(GenDwarfForAssembly);
+  // Default to 4 for dwarf version.
+  unsigned DwarfVersion = MCOptions.DwarfVersion ? MCOptions.DwarfVersion : 4;
   if (DwarfVersion < 2 || DwarfVersion > 4) {
     errs() << ProgName << ": Dwarf version " << DwarfVersion
            << " is not supported." << '\n';
@@ -484,7 +484,7 @@ int main(int argc, char **argv) {
     break;
   case AC_Assemble:
     Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
-                        *MCII);
+                        *MCII, MCOptions);
     break;
   case AC_MDisassemble:
     assert(IP && "Expected assembly output");

From 6c2e8874b0f1fd993a17ad27c8348ae531cd5464 Mon Sep 17 00:00:00 2001
From: David Majnemer <david.majnemer@gmail.com>
Date: Thu, 19 Jun 2014 07:14:33 +0000
Subject: [PATCH 099/157] InstCombine: Stop two transforms dueling

InstCombineMulDivRem has:
// Canonicalize (X+C1)*CI -> X*CI+C1*CI.

InstCombineAddSub has:
// W*X + Y*Z --> W * (X+Z)  iff W == Y

These two transforms could fight with each other if C1*CI would not fold
away to something simpler than a ConstantExpr mul.

The InstCombineMulDivRem transform only acted on ConstantInts until
r199602 when it was changed to operate on all Constants in order to
let it fire on ConstantVectors.

To fix this, make this transform more careful by checking to see if we
actually folded away C1*CI.

This fixes PR20079.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211258 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineMulDivRem.cpp |  7 +++++--
 test/Transforms/InstCombine/pr20079.ll              | 13 +++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 test/Transforms/InstCombine/pr20079.ll

diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 9996ebc2e744..497c0b49ab3f 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -203,8 +203,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       Value *X;
       Constant *C1;
       if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) {
-        Value *Add = Builder->CreateMul(X, Op1);
-        return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, Op1));
+        Value *Mul = Builder->CreateMul(C1, Op1);
+        // Only go forward with the transform if C1*CI simplifies to a tidier
+        // constant.
+        if (!match(Mul, m_Mul(m_Value(), m_Value())))
+          return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul);
       }
     }
   }
diff --git a/test/Transforms/InstCombine/pr20079.ll b/test/Transforms/InstCombine/pr20079.ll
new file mode 100644
index 000000000000..3c86ecc5f30b
--- /dev/null
+++ b/test/Transforms/InstCombine/pr20079.ll
@@ -0,0 +1,13 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+@b = internal global [1 x i32] zeroinitializer, align 4
+@c = internal global i32 0, align 4
+
+; CHECK-LABEL: @fn1
+; CHECK: [[ADD:%.*]] = add i32 %a, -1
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD]], sub (i32 0, i32 zext (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @b, i64 0, i64 0), i32* @c) to i32))
+; CHECK-NEXT: ret i32 [[AND]]
+define i32 @fn1(i32 %a) {
+  %xor = add i32 %a, -1
+  %mul = mul nsw i32 %xor, zext (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @b, i64 0, i64 0), i32* @c) to i32)
+  ret i32 %mul
+}

From ba928e254e78a06b9e4743dd933df3103c80ed92 Mon Sep 17 00:00:00 2001
From: Alp Toker <alp@nuanti.com>
Date: Thu, 19 Jun 2014 07:25:18 +0000
Subject: [PATCH 100/157] Remove OwningPtr.h and associated tests

llvm::OwningPtr is superseded by std::unique_ptr.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211259 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/ADT/OwningPtr.h    | 165 -------------------
 unittests/ADT/CMakeLists.txt    |   1 -
 unittests/ADT/OwningPtrTest.cpp | 273 --------------------------------
 utils/llvm.natvis               |   8 -
 4 files changed, 447 deletions(-)
 delete mode 100644 include/llvm/ADT/OwningPtr.h
 delete mode 100644 unittests/ADT/OwningPtrTest.cpp

diff --git a/include/llvm/ADT/OwningPtr.h b/include/llvm/ADT/OwningPtr.h
deleted file mode 100644
index 5e83358fc071..000000000000
--- a/include/llvm/ADT/OwningPtr.h
+++ /dev/null
@@ -1,165 +0,0 @@
-//===- llvm/ADT/OwningPtr.h - Smart ptr that owns the pointee ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines and implements the OwningPtr class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADT_OWNINGPTR_H
-#define LLVM_ADT_OWNINGPTR_H
-
-#include "llvm/Support/Compiler.h"
-#include <cassert>
-#include <cstddef>
-#include <memory>
-
-namespace llvm {
-
-/// OwningPtr smart pointer - OwningPtr mimics a built-in pointer except that it
-/// guarantees deletion of the object pointed to, either on destruction of the
-/// OwningPtr or via an explicit reset().  Once created, ownership of the
-/// pointee object can be taken away from OwningPtr by using the take method.
-template<class T>
-class OwningPtr {
-  OwningPtr(OwningPtr const &) LLVM_DELETED_FUNCTION;
-  OwningPtr &operator=(OwningPtr const &) LLVM_DELETED_FUNCTION;
-  T *Ptr;
-public:
-  explicit OwningPtr(T *P = 0) : Ptr(P) {}
-
-  OwningPtr(OwningPtr &&Other) : Ptr(Other.take()) {}
-
-  OwningPtr &operator=(OwningPtr &&Other) {
-    reset(Other.take());
-    return *this;
-  }
-
-  OwningPtr(std::unique_ptr<T> Other) : Ptr(Other.release()) {}
-
-  OwningPtr &operator=(std::unique_ptr<T> Other) {
-    reset(Other.release());
-    return *this;
-  }
-
-#if LLVM_HAS_RVALUE_REFERENCE_THIS
-  operator std::unique_ptr<T>() && { return std::unique_ptr<T>(take()); }
-#endif
-
-  ~OwningPtr() {
-    delete Ptr;
-  }
-
-  /// reset - Change the current pointee to the specified pointer.  Note that
-  /// calling this with any pointer (including a null pointer) deletes the
-  /// current pointer.
-  void reset(T *P = 0) {
-    if (P == Ptr) return;
-    T *Tmp = Ptr;
-    Ptr = P;
-    delete Tmp;
-  }
-
-  /// take - Reset the owning pointer to null and return its pointer.  This does
-  /// not delete the pointer before returning it.
-  T *take() {
-    T *Tmp = Ptr;
-    Ptr = nullptr;
-    return Tmp;
-  }
-
-  T *release() { return take(); }
-
-  std::unique_ptr<T> take_unique() { return std::unique_ptr<T>(take()); }
-
-  T &operator*() const {
-    assert(Ptr && "Cannot dereference null pointer");
-    return *Ptr;
-  }
-
-  T *operator->() const { return Ptr; }
-  T *get() const { return Ptr; }
-  LLVM_EXPLICIT operator bool() const { return Ptr != nullptr; }
-  bool operator!() const { return Ptr == nullptr; }
-  bool isValid() const { return Ptr != nullptr; }
-
-  void swap(OwningPtr &RHS) {
-    T *Tmp = RHS.Ptr;
-    RHS.Ptr = Ptr;
-    Ptr = Tmp;
-  }
-};
-
-template<class T>
-inline void swap(OwningPtr<T> &a, OwningPtr<T> &b) {
-  a.swap(b);
-}
-
-/// OwningArrayPtr smart pointer - OwningArrayPtr provides the same
-///  functionality as OwningPtr, except that it works for array types.
-template<class T>
-class OwningArrayPtr {
-  OwningArrayPtr(OwningArrayPtr const &) LLVM_DELETED_FUNCTION;
-  OwningArrayPtr &operator=(OwningArrayPtr const &) LLVM_DELETED_FUNCTION;
-  T *Ptr;
-public:
-  explicit OwningArrayPtr(T *P = 0) : Ptr(P) {}
-
-  OwningArrayPtr(OwningArrayPtr &&Other) : Ptr(Other.take()) {}
-
-  OwningArrayPtr &operator=(OwningArrayPtr &&Other) {
-    reset(Other.take());
-    return *this;
-  }
-
-  ~OwningArrayPtr() {
-    delete [] Ptr;
-  }
-
-  /// reset - Change the current pointee to the specified pointer.  Note that
-  /// calling this with any pointer (including a null pointer) deletes the
-  /// current pointer.
-  void reset(T *P = 0) {
-    if (P == Ptr) return;
-    T *Tmp = Ptr;
-    Ptr = P;
-    delete [] Tmp;
-  }
-
-  /// take - Reset the owning pointer to null and return its pointer.  This does
-  /// not delete the pointer before returning it.
-  T *take() {
-    T *Tmp = Ptr;
-    Ptr = 0;
-    return Tmp;
-  }
-
-  T &operator[](std::ptrdiff_t i) const {
-    assert(Ptr && "Cannot dereference null pointer");
-    return Ptr[i];
-  }
-
-  T *get() const { return Ptr; }
-  LLVM_EXPLICIT operator bool() const { return Ptr != 0; }
-  bool operator!() const { return Ptr == nullptr; }
-
-  void swap(OwningArrayPtr &RHS) {
-    T *Tmp = RHS.Ptr;
-    RHS.Ptr = Ptr;
-    Ptr = Tmp;
-  }
-};
-
-template<class T>
-inline void swap(OwningArrayPtr<T> &a, OwningArrayPtr<T> &b) {
-  a.swap(b);
-}
-
-} // end namespace llvm
-
-#endif
diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt
index 51197231f2b5..0f214f3d0c80 100644
--- a/unittests/ADT/CMakeLists.txt
+++ b/unittests/ADT/CMakeLists.txt
@@ -23,7 +23,6 @@ set(ADTSources
   MakeUniqueTest.cpp
   MapVectorTest.cpp
   OptionalTest.cpp
-  OwningPtrTest.cpp
   PackedVectorTest.cpp
   PointerIntPairTest.cpp
   PointerUnionTest.cpp
diff --git a/unittests/ADT/OwningPtrTest.cpp b/unittests/ADT/OwningPtrTest.cpp
deleted file mode 100644
index d83a9471cc56..000000000000
--- a/unittests/ADT/OwningPtrTest.cpp
+++ /dev/null
@@ -1,273 +0,0 @@
-//===- llvm/unittest/ADT/OwningPtrTest.cpp - OwningPtr unit tests ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/OwningPtr.h"
-#include "gtest/gtest.h"
-using namespace llvm;
-
-namespace {
-
-struct TrackDestructor {
-  static unsigned Destructions;
-  int val;
-  explicit TrackDestructor(int val) : val(val) {}
-  ~TrackDestructor() { ++Destructions; }
-  static void ResetCounts() { Destructions = 0; }
-
-private:
-  TrackDestructor(const TrackDestructor &other) LLVM_DELETED_FUNCTION;
-  TrackDestructor &
-  operator=(const TrackDestructor &other) LLVM_DELETED_FUNCTION;
-  TrackDestructor(TrackDestructor &&other) LLVM_DELETED_FUNCTION;
-  TrackDestructor &operator=(TrackDestructor &&other) LLVM_DELETED_FUNCTION;
-};
-
-unsigned TrackDestructor::Destructions = 0;
-
-// Test fixture
-class OwningPtrTest : public testing::Test {};
-
-TEST_F(OwningPtrTest, DefaultConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> O;
-    EXPECT_FALSE(O);
-    EXPECT_TRUE(!O);
-    EXPECT_FALSE(O.get());
-    EXPECT_FALSE(O.isValid());
-  }
-  EXPECT_EQ(0u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, PtrConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> O(new TrackDestructor(3));
-    EXPECT_TRUE((bool)O);
-    EXPECT_FALSE(!O);
-    EXPECT_TRUE(O.get());
-    EXPECT_TRUE(O.isValid());
-    EXPECT_EQ(3, (*O).val);
-    EXPECT_EQ(3, O->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, Reset) {
-  TrackDestructor::ResetCounts();
-  OwningPtr<TrackDestructor> O(new TrackDestructor(3));
-  EXPECT_EQ(0u, TrackDestructor::Destructions);
-  O.reset();
-  EXPECT_FALSE((bool)O);
-  EXPECT_TRUE(!O);
-  EXPECT_FALSE(O.get());
-  EXPECT_FALSE(O.isValid());
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, Take) {
-  TrackDestructor::ResetCounts();
-  TrackDestructor *T = nullptr;
-  {
-    OwningPtr<TrackDestructor> O(new TrackDestructor(3));
-    T = O.take();
-    EXPECT_FALSE((bool)O);
-    EXPECT_TRUE(!O);
-    EXPECT_FALSE(O.get());
-    EXPECT_FALSE(O.isValid());
-    EXPECT_TRUE(T);
-    EXPECT_EQ(3, T->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  delete T;
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, Release) {
-  TrackDestructor::ResetCounts();
-  TrackDestructor *T = nullptr;
-  {
-    OwningPtr<TrackDestructor> O(new TrackDestructor(3));
-    T = O.release();
-    EXPECT_FALSE((bool)O);
-    EXPECT_TRUE(!O);
-    EXPECT_FALSE(O.get());
-    EXPECT_FALSE(O.isValid());
-    EXPECT_TRUE(T);
-    EXPECT_EQ(3, T->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  delete T;
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, MoveConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B = std::move(A);
-    EXPECT_FALSE((bool)A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_FALSE(A.isValid());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, MoveAssignment) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B(new TrackDestructor(4));
-    B = std::move(A);
-    EXPECT_FALSE(A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_FALSE(A.isValid());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(1u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(2u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, Swap) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B(new TrackDestructor(4));
-    B.swap(A);
-    EXPECT_TRUE((bool)A);
-    EXPECT_FALSE(!A);
-    EXPECT_TRUE(A.get());
-    EXPECT_TRUE(A.isValid());
-    EXPECT_EQ(4, (*A).val);
-    EXPECT_EQ(4, A->val);
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(2u, TrackDestructor::Destructions);
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B(new TrackDestructor(4));
-    swap(A, B);
-    EXPECT_TRUE((bool)A);
-    EXPECT_FALSE(!A);
-    EXPECT_TRUE(A.get());
-    EXPECT_TRUE(A.isValid());
-    EXPECT_EQ(4, (*A).val);
-    EXPECT_EQ(4, A->val);
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(2u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, UniqueToOwningConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    std::unique_ptr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B = std::move(A);
-    EXPECT_FALSE(A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, UniqueToOwningAssignment) {
-  TrackDestructor::ResetCounts();
-  {
-    std::unique_ptr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B(new TrackDestructor(4));
-    B = std::move(A);
-    EXPECT_FALSE(A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(1u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(2u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, TakeUniqueConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    std::unique_ptr<TrackDestructor> B = A.take_unique();
-    EXPECT_FALSE(A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_FALSE(A.isValid());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-#if LLVM_HAS_RVALUE_REFERENCE_THIS
-TEST_F(OwningPtrTest, OwningToUniqueConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    std::unique_ptr<TrackDestructor> B = std::move(A);
-    EXPECT_FALSE(A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_FALSE(A.isValid());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-#endif
-}
diff --git a/utils/llvm.natvis b/utils/llvm.natvis
index 9874ce58d250..6da0b2512a92 100644
--- a/utils/llvm.natvis
+++ b/utils/llvm.natvis
@@ -108,14 +108,6 @@ or create a symbolic link so it updates automatically.
     </Expand>
   </Type>
 
-  <Type Name="llvm::OwningPtr&lt;*&gt;">
-    <DisplayString Condition="Ptr == 0">empty</DisplayString>
-    <DisplayString Condition="Ptr != 0">OwningPtr {*Ptr}</DisplayString>
-    <Expand>
-      <ExpandedItem Condition="Ptr != 0">Ptr</ExpandedItem>
-    </Expand>
-  </Type>
-
   <Type Name="llvm::SmallPtrSet&lt;*,*&gt;">
     <DisplayString Condition="CurArray == SmallArray">{{ [Small Mode] size={NumElements}, capacity={CurArraySize} }}</DisplayString>
     <DisplayString Condition="CurArray != SmallArray">{{ [Big Mode] size={NumElements}, capacity={CurArraySize} }}</DisplayString>

From c28beb254dabd5f522bafdbdd59a4e22f0a539ec Mon Sep 17 00:00:00 2001
From: Alp Toker <alp@nuanti.com>
Date: Thu, 19 Jun 2014 07:25:25 +0000
Subject: [PATCH 101/157] CommandLine: bail out when options get multiply
 registered

These errors are strictly unrecoverable and indicate serious issues such as
conflicting option names or an incorrectly linked LLVM distribution.

With this change, the errors actually get detected so tests don't pass
silently.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211260 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Support/CommandLine.cpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index e09d4b6e47b4..c2b739fa7360 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -145,6 +145,7 @@ void OptionCategory::registerCategory() {
 static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
                           SmallVectorImpl<Option*> &SinkOpts,
                           StringMap<Option*> &OptionsMap) {
+  bool HadErrors = false;
   SmallVector<const char*, 16> OptionNames;
   Option *CAOpt = nullptr;  // The ConsumeAfter option if it exists.
   for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
@@ -158,8 +159,9 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
     for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
       // Add argument to the argument map!
       if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) {
-        errs() << ProgramName << ": CommandLine Error: Argument '"
-             << OptionNames[i] << "' defined more than once!\n";
+        errs() << ProgramName << ": CommandLine Error: Option '"
+               << OptionNames[i] << "' registered more than once!\n";
+        HadErrors = true;
       }
     }
 
@@ -171,8 +173,10 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
     else if (O->getMiscFlags() & cl::Sink) // Remember sink options
       SinkOpts.push_back(O);
     else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) {
-      if (CAOpt)
+      if (CAOpt) {
         O->error("Cannot specify more than one option with cl::ConsumeAfter!");
+        HadErrors = true;
+      }
       CAOpt = O;
     }
   }
@@ -182,6 +186,12 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
 
   // Make sure that they are in order of registration not backwards.
   std::reverse(PositionalOpts.begin(), PositionalOpts.end());
+
+  // Fail hard if there were errors. These are strictly unrecoverable and
+  // indicate serious issues such as conflicting option names or an incorrectly
+  // linked LLVM distribution.
+  if (HadErrors)
+    report_fatal_error("inconsistency in registered CommandLine options");
 }
 
 
From 83175090522ebd6513e45033c342200cd645f89c Mon Sep 17 00:00:00 2001
From: Dinesh Dwivedi <dinesh.d@samsung.com>
Date: Thu, 19 Jun 2014 08:29:18 +0000
Subject: [PATCH 102/157] Refactored and updated
 SimplifyUsingDistributiveLaws() to  * Find factorization opportunities using
 identity values.  * Find factorization opportunities by treating shl(X, C) as
 mul (X, shl(C))  * Keep NSW flag while simplifying instruction using
 factorization.

This fixes PR19263.

Differential Revision: http://reviews.llvm.org/D3799


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211261 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineAddSub.cpp         |  52 -----
 .../InstCombine/InstructionCombining.cpp      | 195 +++++++++++++-----
 test/Transforms/InstCombine/add2.ll           |  68 ++++++
 3 files changed, 210 insertions(+), 105 deletions(-)

diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index f6658964ae85..77b2404b49d4 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -865,30 +865,6 @@ Value *FAddCombine::createAddendVal
   return createFMul(OpndVal, Coeff.getValue(Instr->getType()));
 }
 
-// dyn_castFoldableMul - If this value is a multiply that can be folded into
-// other computations (because it has a constant operand), return the
-// non-constant operand of the multiply, and set CST to point to the multiplier.
-// Otherwise, return null.
-//
-static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) {
-  if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy())
-    return nullptr;
-
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return nullptr;
-
-  if (I->getOpcode() == Instruction::Mul)
-    if ((CST = dyn_cast<Constant>(I->getOperand(1))))
-      return I->getOperand(0);
-  if (I->getOpcode() == Instruction::Shl)
-    if ((CST = dyn_cast<Constant>(I->getOperand(1)))) {
-      // The multiplier is really 1 << CST.
-      CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST);
-      return I->getOperand(0);
-    }
-  return nullptr;
-}
-
 // If one of the operands only has one non-zero bit, and if the other
 // operand has a known-zero bit in a more significant place than it (not
 // including the sign bit) the ripple may go up to and fill the zero, but
@@ -1089,24 +1065,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     if (Value *V = dyn_castNegVal(RHS))
       return BinaryOperator::CreateSub(LHS, V);
 
-
-  {
-    Constant *C2;
-    if (Value *X = dyn_castFoldableMul(LHS, C2)) {
-      if (X == RHS) // X*C + X --> X * (C+1)
-        return BinaryOperator::CreateMul(RHS, AddOne(C2));
-
-      // X*C1 + X*C2 --> X * (C1+C2)
-      Constant *C1;
-      if (X == dyn_castFoldableMul(RHS, C1))
-        return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
-    }
-
-    // X + X*C --> X * (C+1)
-    if (dyn_castFoldableMul(RHS, C2) == LHS)
-      return BinaryOperator::CreateMul(LHS, AddOne(C2));
-  }
-
   // A+B --> A|B iff A and B have no bits set in common.
   if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
     APInt LHSKnownOne(IT->getBitWidth(), 0);
@@ -1593,16 +1551,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     }
   }
 
-  Constant *C1;
-  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
-    if (X == Op1)  // X*C - X --> X * (C-1)
-      return BinaryOperator::CreateMul(Op1, SubOne(C1));
-
-    Constant *C2;   // X*C1 - X*C2 -> X * (C1-C2)
-    if (X == dyn_castFoldableMul(Op1, C2))
-      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
-  }
-
   // Optimize pointer differences into the same array into a size.  Consider:
   //  &A[10] - &A[0]: we should compile this to "10".
   if (DL) {
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 991ad796a7e3..88d7a0d59eac 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -395,6 +395,127 @@ static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
   return false;
 }
 
+/// This function returns identity value for given opcode, which can be used to
+/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
+static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) {
+  if (isa<Constant>(V))
+    return nullptr;
+
+  if (OpCode == Instruction::Mul)
+    return ConstantInt::get(V->getType(), 1);
+
+  // TODO: We can handle other cases e.g. Instruction::And, Instruction::Or etc.
+
+  return nullptr;
+}
+
+/// This function factors binary ops which can be combined using distributive
+/// laws. This also factor SHL as MUL e.g. SHL(X, 2) ==> MUL(X, 4).
+Instruction::BinaryOps getBinOpsForFactorization(BinaryOperator *Op,
+                                                 Value *&LHS, Value *&RHS) {
+  if (!Op)
+    return Instruction::BinaryOpsEnd;
+
+  if (Op->getOpcode() == Instruction::Shl) {
+    if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) {
+      // The multiplier is really 1 << CST.
+      RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST);
+      LHS = Op->getOperand(0);
+      return Instruction::Mul;
+    }
+  }
+
+  // TODO: We can add other conversions e.g. shr => div etc.
+
+  LHS = Op->getOperand(0);
+  RHS = Op->getOperand(1);
+  return Op->getOpcode();
+}
+
+/// This tries to simplify binary operations by factorizing out common terms
+/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
+static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
+                               const DataLayout *DL, BinaryOperator &I,
+                               Instruction::BinaryOps InnerOpcode, Value *A,
+                               Value *B, Value *C, Value *D) {
+
+  // If any of A, B, C, D are null, we can not factor I, return early.
+  // Checking A and C should be enough.
+  if (!A || !C || !B || !D)
+    return nullptr;
+
+  Value *SimplifiedInst = nullptr;
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
+
+  // Does "X op' Y" always equal "Y op' X"?
+  bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+  // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+  if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+    // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+    // commutative case, "(A op' B) op (C op' A)"?
+    if (A == C || (InnerCommutative && A == D)) {
+      if (A != C)
+        std::swap(C, D);
+      // Consider forming "A op' (B op D)".
+      // If "B op D" simplifies then it can be formed with no cost.
+      Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+      // If "B op D" doesn't simplify then only go on if both of the existing
+      // operations "A op' B" and "C op' D" will be zapped as no longer used.
+      if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+        V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
+      if (V) {
+        SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V);
+      }
+    }
+
+  // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+  if (!SimplifiedInst && RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+    // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+    // commutative case, "(A op' B) op (B op' D)"?
+    if (B == D || (InnerCommutative && B == C)) {
+      if (B != D)
+        std::swap(C, D);
+      // Consider forming "(A op C) op' B".
+      // If "A op C" simplifies then it can be formed with no cost.
+      Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+
+      // If "A op C" doesn't simplify then only go on if both of the existing
+      // operations "A op' B" and "C op' D" will be zapped as no longer used.
+      if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+        V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
+      if (V) {
+        SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B);
+      }
+    }
+
+  if (SimplifiedInst) {
+    ++NumFactor;
+    SimplifiedInst->takeName(&I);
+
+    // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag.
+    // TODO: Check for NUW.
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
+      if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
+        bool HasNSW = false;
+        if (isa<OverflowingBinaryOperator>(&I))
+          HasNSW = I.hasNoSignedWrap();
+
+        if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+          if (isa<OverflowingBinaryOperator>(Op0))
+            HasNSW &= Op0->hasNoSignedWrap();
+
+        if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+          if (isa<OverflowingBinaryOperator>(Op1))
+            HasNSW &= Op1->hasNoSignedWrap();
+        BO->setHasNoSignedWrap(HasNSW);
+      }
+    }
+  }
+  return SimplifiedInst;
+}
+
 /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
 /// which some other binary operation distributes over either by factorizing
 /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
@@ -404,65 +525,33 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
   BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
   BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
-  Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
 
   // Factorization.
-  if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
-    // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
-    // a common term.
-    Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
-    Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
-    Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+  Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
+  Instruction::BinaryOps LHSOpcode = getBinOpsForFactorization(Op0, A, B);
+  Instruction::BinaryOps RHSOpcode = getBinOpsForFactorization(Op1, C, D);
+
+  // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
+  // a common term.
+  if (LHSOpcode == RHSOpcode) {
+    if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D))
+      return V;
+  }
 
-    // Does "X op' Y" always equal "Y op' X"?
-    bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
-
-    // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
-    if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
-      // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
-      // commutative case, "(A op' B) op (C op' A)"?
-      if (A == C || (InnerCommutative && A == D)) {
-        if (A != C)
-          std::swap(C, D);
-        // Consider forming "A op' (B op D)".
-        // If "B op D" simplifies then it can be formed with no cost.
-        Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
-        // If "B op D" doesn't simplify then only go on if both of the existing
-        // operations "A op' B" and "C op' D" will be zapped as no longer used.
-        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
-          V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
-        if (V) {
-          ++NumFactor;
-          V = Builder->CreateBinOp(InnerOpcode, A, V);
-          V->takeName(&I);
-          return V;
-        }
-      }
+  // The instruction has the form "(A op' B) op (C)".  Try to factorize common
+  // term.
+  if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS,
+                                  getIdentityValue(LHSOpcode, RHS)))
+    return V;
 
-    // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
-    if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
-      // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
-      // commutative case, "(A op' B) op (B op' D)"?
-      if (B == D || (InnerCommutative && B == C)) {
-        if (B != D)
-          std::swap(C, D);
-        // Consider forming "(A op C) op' B".
-        // If "A op C" simplifies then it can be formed with no cost.
-        Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
-        // If "A op C" doesn't simplify then only go on if both of the existing
-        // operations "A op' B" and "C op' D" will be zapped as no longer used.
-        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
-          V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
-        if (V) {
-          ++NumFactor;
-          V = Builder->CreateBinOp(InnerOpcode, V, B);
-          V->takeName(&I);
-          return V;
-        }
-      }
-  }
+  // The instruction has the form "(B) op (C op' D)".  Try to factorize common
+  // term.
+  if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS,
+                                  getIdentityValue(RHSOpcode, LHS), C, D))
+    return V;
 
   // Expansion.
+  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
   if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
     // The instruction has the form "(A op' B) op C".  See if expanding it out
     // to "(A op C) op' (B op C)" results in simplifications.
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index aaf3a7a235bf..c728b4437af4 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -86,3 +86,71 @@ define i16 @test9(i16 %a) {
 ; CHECK-NEXT:  %d = mul i16 %a, -32767
 ; CHECK-NEXT:  ret i16 %d
 }
+
+define i16 @add_nsw_mul_nsw(i16 %x) {
+ %add1 = add nsw i16 %x, %x
+ %add2 = add nsw i16 %add1, %x
+ ret i16 %add2
+; CHECK-LABEL: @add_nsw_mul_nsw(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 3
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_1(i16 %x) {
+ %mul1 = mul nsw i16 %x, 8
+ %add2 = add nsw i16 %x, %mul1
+ ret i16 %add2
+; CHECK-LABEL: @mul_add_to_mul_1(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_2(i16 %x) {
+ %mul1 = mul nsw i16 %x, 8
+ %add2 = add nsw i16 %mul1, %x
+ ret i16 %add2
+; CHECK-LABEL: @mul_add_to_mul_2(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_3(i16 %a) {
+ %mul1 = mul i16 %a, 2
+ %mul2 = mul i16 %a, 3
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_3(
+; CHECK-NEXT: %add = mul i16 %a, 5
+; CHECK-NEXT: ret i16 %add
+}
+
+define i16 @mul_add_to_mul_4(i16 %a) {
+ %mul1 = mul nsw i16 %a, 2
+ %mul2 = mul nsw i16 %a, 7
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_4(
+; CHECK-NEXT: %add = mul nsw i16 %a, 9
+; CHECK-NEXT: ret i16 %add
+}
+
+define i16 @mul_add_to_mul_5(i16 %a) {
+ %mul1 = mul nsw i16 %a, 3
+ %mul2 = mul nsw i16 %a, 7
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_5(
+; CHECK-NEXT: %add = mul nsw i16 %a, 10
+; CHECK-NEXT: ret i16 %add
+}
+
+define i32 @mul_add_to_mul_6(i32 %x, i32 %y) {
+  %mul1 = mul nsw i32 %x, %y
+  %mul2 = mul nsw i32 %mul1, 5
+  %add = add nsw i32 %mul1, %mul2
+  ret i32 %add
+; CHECK-LABEL: @mul_add_to_mul_6(
+; CHECK-NEXT: %mul1 = mul nsw i32 %x, %y
+; CHECK-NEXT: %add = mul nsw i32 %mul1, 6
+; CHECK-NEXT: ret i32 %add
+}

From cfdf8052865b01e8b8d321640c3f51ff938cc3c4 Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Thu, 19 Jun 2014 10:29:41 +0000
Subject: [PATCH 103/157] [X86] Teach how to combine horizontal binop even in
 the presence of undefs.

Before this change, the backend was unable to fold a build_vector dag
node with UNDEF operands into a single horizontal add/sub.

This patch teaches how to combine a build_vector with UNDEF operands into a
horizontal add/sub when possible. The algorithm conservatively avoids to combine
a build_vector with only a single non-UNDEF operand.

Added test haddsub-undef.ll to verify that we correctly fold horizontal binop
even in the presence of UNDEFs.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211265 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp | 155 ++++++++++----
 test/CodeGen/X86/haddsub-undef.ll  | 325 +++++++++++++++++++++++++++++
 2 files changed, 440 insertions(+), 40 deletions(-)
 create mode 100644 test/CodeGen/X86/haddsub-undef.ll

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 851607eac96e..a7b6e707816f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6077,21 +6077,35 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
 /// This function only analyzes elements of \p N whose indices are
 /// in range [BaseIdx, LastIdx).
 static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
+                              SelectionDAG &DAG,
                               unsigned BaseIdx, unsigned LastIdx,
                               SDValue &V0, SDValue &V1) {
+  EVT VT = N->getValueType(0);
+
   assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
-  assert(N->getValueType(0).isVector() &&
-         N->getValueType(0).getVectorNumElements() >= LastIdx &&
+  assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
          "Invalid Vector in input!");
   
   bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
   bool CanFold = true;
   unsigned ExpectedVExtractIdx = BaseIdx;
   unsigned NumElts = LastIdx - BaseIdx;
+  V0 = DAG.getUNDEF(VT);
+  V1 = DAG.getUNDEF(VT);
 
   // Check if N implements a horizontal binop.
   for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
     SDValue Op = N->getOperand(i + BaseIdx);
+
+    // Skip UNDEFs.
+    if (Op->getOpcode() == ISD::UNDEF) {
+      // Update the expected vector extract index.
+      if (i * 2 == NumElts)
+        ExpectedVExtractIdx = BaseIdx;
+      ExpectedVExtractIdx += 2;
+      continue;
+    }
+
     CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
 
     if (!CanFold)
@@ -6112,12 +6126,15 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
 
     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
     unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
- 
-    if (i == 0)
-      V0 = Op0.getOperand(0);
-    else if (i * 2 == NumElts) {
-      V1 = Op0.getOperand(0);
-      ExpectedVExtractIdx = BaseIdx;
+
+    if (i * 2 < NumElts) {
+      if (V0.getOpcode() == ISD::UNDEF)
+        V0 = Op0.getOperand(0);
+    } else {
+      if (V1.getOpcode() == ISD::UNDEF)
+        V1 = Op0.getOperand(0);
+      if (i * 2 == NumElts)
+        ExpectedVExtractIdx = BaseIdx;
     }
 
     SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
@@ -6163,9 +6180,14 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
 ///   Example:
 ///     HADD V0_LO, V1_LO
 ///     HADD V0_HI, V1_HI
+///
+/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
+/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
+/// the upper 128-bits of the result.
 static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
                                      SDLoc DL, SelectionDAG &DAG,
-                                     unsigned X86Opcode, bool Mode) {
+                                     unsigned X86Opcode, bool Mode,
+                                     bool isUndefLO, bool isUndefHI) {
   EVT VT = V0.getValueType();
   assert(VT.is256BitVector() && VT == V1.getValueType() &&
          "Invalid nodes in input!");
@@ -6177,13 +6199,24 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
   SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
   EVT NewVT = V0_LO.getValueType();
 
-  SDValue LO, HI;
+  SDValue LO = DAG.getUNDEF(NewVT);
+  SDValue HI = DAG.getUNDEF(NewVT);
+
   if (Mode) {
-    LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
-    HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
+    // Don't emit a horizontal binop if the result is expected to be UNDEF.
+    if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
+      LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
+    if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
+      HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
   } else {
-    LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
-    HI = DAG.getNode(X86Opcode, DL, NewVT, V1_HI, V1_HI);
+    // Don't emit a horizontal binop if the result is expected to be UNDEF.
+    if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
+                       V1_LO->getOpcode() != ISD::UNDEF))
+      LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
+
+    if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
+                       V1_HI->getOpcode() != ISD::UNDEF))
+      HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
   }
 
   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
@@ -6198,19 +6231,37 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
   SDValue InVec0, InVec1;
 
   // Try to match horizontal ADD/SUB.
+  unsigned NumUndefsLO = 0;
+  unsigned NumUndefsHI = 0;
+  unsigned Half = NumElts/2;
+
+  // Count the number of UNDEF operands in the build_vector in input.
+  for (unsigned i = 0, e = Half; i != e; ++i)
+    if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
+      NumUndefsLO++;
+
+  for (unsigned i = Half, e = NumElts; i != e; ++i)
+    if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
+      NumUndefsHI++;
+
+  // Early exit if this is either a build_vector of all UNDEFs or all the
+  // operands but one are UNDEF.
+  if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
+    return SDValue();
+
   if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
     // Try to match an SSE3 float HADD/HSUB.
-    if (isHorizontalBinOp(BV, ISD::FADD, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
     
-    if (isHorizontalBinOp(BV, ISD::FSUB, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
   } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
     // Try to match an SSSE3 integer HADD/HSUB.
-    if (isHorizontalBinOp(BV, ISD::ADD, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
     
-    if (isHorizontalBinOp(BV, ISD::SUB, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
   }
   
@@ -6221,16 +6272,20 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
     // Try to match an AVX horizontal add/sub of packed single/double
     // precision floating point values from 256-bit vectors.
     SDValue InVec2, InVec3;
-    if (isHorizontalBinOp(BV, ISD::FADD, 0, NumElts/2, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::FADD, NumElts/2, NumElts, InVec2, InVec3) &&
-        InVec0.getNode() == InVec2.getNode() &&
-        InVec1.getNode() == InVec3.getNode())
+    if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
+        isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
+        ((InVec0.getOpcode() == ISD::UNDEF ||
+          InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+        ((InVec1.getOpcode() == ISD::UNDEF ||
+          InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
 
-    if (isHorizontalBinOp(BV, ISD::FSUB, 0, NumElts/2, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::FSUB, NumElts/2, NumElts, InVec2, InVec3) &&
-        InVec0.getNode() == InVec2.getNode() &&
-        InVec1.getNode() == InVec3.getNode())
+    if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
+        isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
+        ((InVec0.getOpcode() == ISD::UNDEF ||
+          InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+        ((InVec1.getOpcode() == ISD::UNDEF ||
+          InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
   } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
     // Try to match an AVX2 horizontal add/sub of signed integers.
@@ -6238,15 +6293,19 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
     unsigned X86Opcode;
     bool CanFold = true;
 
-    if (isHorizontalBinOp(BV, ISD::ADD, 0, NumElts/2, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::ADD, NumElts/2, NumElts, InVec2, InVec3) &&
-        InVec0.getNode() == InVec2.getNode() &&
-        InVec1.getNode() == InVec3.getNode())
+    if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
+        isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
+        ((InVec0.getOpcode() == ISD::UNDEF ||
+          InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+        ((InVec1.getOpcode() == ISD::UNDEF ||
+          InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
       X86Opcode = X86ISD::HADD;
-    else if (isHorizontalBinOp(BV, ISD::SUB, 0, NumElts/2, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::SUB, NumElts/2, NumElts, InVec2, InVec3) &&
-        InVec0.getNode() == InVec2.getNode() &&
-        InVec1.getNode() == InVec3.getNode())
+    else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
+        isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
+        ((InVec0.getOpcode() == ISD::UNDEF ||
+          InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+        ((InVec1.getOpcode() == ISD::UNDEF ||
+          InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
       X86Opcode = X86ISD::HSUB;
     else
       CanFold = false;
@@ -6257,29 +6316,45 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
       if (Subtarget->hasAVX2())
         return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
  
+      // Do not try to expand this build_vector into a pair of horizontal
+      // add/sub if we can emit a pair of scalar add/sub.
+      if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
+        return SDValue();
+
       // Convert this build_vector into a pair of horizontal binop followed by
       // a concat vector.
-      return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false);
+      bool isUndefLO = NumUndefsLO == Half;
+      bool isUndefHI = NumUndefsHI == Half;
+      return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
+                                   isUndefLO, isUndefHI);
     }
   }
 
   if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
        VT == MVT::v16i16) && Subtarget->hasAVX()) {
     unsigned X86Opcode;
-    if (isHorizontalBinOp(BV, ISD::ADD, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
       X86Opcode = X86ISD::HADD;
-    else if (isHorizontalBinOp(BV, ISD::SUB, 0, NumElts, InVec0, InVec1))
+    else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
       X86Opcode = X86ISD::HSUB;
-    else if (isHorizontalBinOp(BV, ISD::FADD, 0, NumElts, InVec0, InVec1))
+    else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
       X86Opcode = X86ISD::FHADD;
-    else if (isHorizontalBinOp(BV, ISD::FSUB, 0, NumElts, InVec0, InVec1))
+    else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
       X86Opcode = X86ISD::FHSUB;
     else
       return SDValue();
 
+    // Don't try to expand this build_vector into a pair of horizontal add/sub
+    // if we can simply emit a pair of scalar add/sub.
+    if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
+      return SDValue();
+
     // Convert this build_vector into two horizontal add/sub followed by
     // a concat vector.
-    return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true);
+    bool isUndefLO = NumUndefsLO == Half;
+    bool isUndefHI = NumUndefsHI == Half;
+    return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
+                                 isUndefLO, isUndefHI);
   }
 
   return SDValue();
diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll
new file mode 100644
index 000000000000..954a9d994e61
--- /dev/null
+++ b/test/CodeGen/X86/haddsub-undef.ll
@@ -0,0 +1,325 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+; Verify that we correctly fold horizontal binop even in the presence of UNDEFs.
+
+define <4 x float> @test1_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+  %vecext10 = extractelement <4 x float> %b, i32 2
+  %vecext11 = extractelement <4 x float> %b, i32 3
+  %add12 = fadd float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit5, float %add12, i32 3
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: test1_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test2_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext6 = extractelement <4 x float> %b, i32 0
+  %vecext7 = extractelement <4 x float> %b, i32 1
+  %add8 = fadd float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit, float %add8, i32 2
+  %vecext10 = extractelement <4 x float> %b, i32 2
+  %vecext11 = extractelement <4 x float> %b, i32 3
+  %add12 = fadd float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: test2_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test3_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+  %vecext6 = extractelement <4 x float> %b, i32 0
+  %vecext7 = extractelement <4 x float> %b, i32 1
+  %add8 = fadd float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
+  ret <4 x float> %vecinit9
+}
+; CHECK-LABEL: test3_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test4_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  ret <4 x float> %vecinit
+}
+; CHECK-LABEL: test4_undef
+; CHECK-NOT: haddps
+; CHECK: ret
+
+
+define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) {
+  %vecext = extractelement <2 x double> %a, i32 0
+  %vecext1 = extractelement <2 x double> %a, i32 1
+  %add = fadd double %vecext, %vecext1
+  %vecinit = insertelement <2 x double> undef, double %add, i32 0
+  ret <2 x double> %vecinit
+}
+; CHECK-LABEL: test5_undef
+; CHECK-NOT: haddpd
+; CHECK: ret
+
+
+define <4 x float> @test6_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test6_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test7_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %b, i32 0
+  %vecext1 = extractelement <4 x float> %b, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 2
+  %vecext2 = extractelement <4 x float> %b, i32 2
+  %vecext3 = extractelement <4 x float> %b, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test7_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 2
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test8_undef
+; CHECK-NOT: haddps
+; CHECK: ret
+
+
+define <4 x float> @test9_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %b, i32 2
+  %vecext3 = extractelement <4 x float> %b, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test9_undef
+; CHECK: haddps
+; CHECK-NEXT: ret
+
+define <8 x float> @test10_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <8 x float> %b, i32 2
+  %vecext3 = extractelement <8 x float> %b, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 3
+  ret <8 x float> %vecinit5
+}
+; CHECK-LABEL: test10_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x float> @test11_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <8 x float> %b, i32 4
+  %vecext3 = extractelement <8 x float> %b, i32 5
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 6
+  ret <8 x float> %vecinit5
+}
+; CHECK-LABEL: test11_undef
+; SSE-NOT: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK: ret
+
+define <8 x float> @test12_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <8 x float> %a, i32 2
+  %vecext3 = extractelement <8 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 1
+  ret <8 x float> %vecinit5
+}
+; CHECK-LABEL: test12_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x float> @test13_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add1 = fadd float %vecext, %vecext1
+  %vecinit1 = insertelement <8 x float> undef, float %add1, i32 0
+  %vecext2 = extractelement <8 x float> %a, i32 2
+  %vecext3 = extractelement <8 x float> %a, i32 3
+  %add2 = fadd float %vecext2, %vecext3
+  %vecinit2 = insertelement <8 x float> %vecinit1, float %add2, i32 1
+  %vecext4 = extractelement <8 x float> %a, i32 4
+  %vecext5 = extractelement <8 x float> %a, i32 5
+  %add3 = fadd float %vecext4, %vecext5
+  %vecinit3 = insertelement <8 x float> %vecinit2, float %add3, i32 2
+  %vecext6 = extractelement <8 x float> %a, i32 6
+  %vecext7 = extractelement <8 x float> %a, i32 7
+  %add4 = fadd float %vecext6, %vecext7
+  %vecinit4 = insertelement <8 x float> %vecinit3, float %add4, i32 3
+  ret <8 x float> %vecinit4
+}
+; CHECK-LABEL: test13_undef
+; SSE: haddps
+; SSE-NOT: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %b, i32 2
+  %vecext3 = extractelement <8 x i32> %b, i32 3
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 3
+  ret <8 x i32> %vecinit5
+}
+; CHECK-LABEL: test14_undef
+; SSE: phaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; CHECK-NOT: phaddd
+; CHECK: ret
+
+; On AVX2, the following sequence can be folded into a single horizontal add.
+; If the Subtarget doesn't support AVX2, then we avoid emitting two packed 
+; integer horizontal adds instead of two scalar adds followed by vector inserts.
+define <8 x i32> @test15_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %b, i32 4
+  %vecext3 = extractelement <8 x i32> %b, i32 5
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 6
+  ret <8 x i32> %vecinit5
+}
+; CHECK-LABEL: test15_undef
+; SSE-NOT: phaddd
+; AVX-NOT: vphaddd
+; AVX2: vphaddd
+; CHECK: ret
+
+define <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %a, i32 2
+  %vecext3 = extractelement <8 x i32> %a, i32 3
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1
+  ret <8 x i32> %vecinit5
+}
+; CHECK-LABEL: test16_undef
+; SSE: phaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x i32> @test17_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add1 = add i32 %vecext, %vecext1
+  %vecinit1 = insertelement <8 x i32> undef, i32 %add1, i32 0
+  %vecext2 = extractelement <8 x i32> %a, i32 2
+  %vecext3 = extractelement <8 x i32> %a, i32 3
+  %add2 = add i32 %vecext2, %vecext3
+  %vecinit2 = insertelement <8 x i32> %vecinit1, i32 %add2, i32 1
+  %vecext4 = extractelement <8 x i32> %a, i32 4
+  %vecext5 = extractelement <8 x i32> %a, i32 5
+  %add3 = add i32 %vecext4, %vecext5
+  %vecinit3 = insertelement <8 x i32> %vecinit2, i32 %add3, i32 2
+  %vecext6 = extractelement <8 x i32> %a, i32 6
+  %vecext7 = extractelement <8 x i32> %a, i32 7
+  %add4 = add i32 %vecext6, %vecext7
+  %vecinit4 = insertelement <8 x i32> %vecinit3, i32 %add4, i32 3
+  ret <8 x i32> %vecinit4
+}
+; CHECK-LABEL: test17_undef
+; SSE: phaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; CHECK-NOT: haddps
+; CHECK: ret
+

From 85386a3de9ebcbf688c0cb864fc26c62fa5220f3 Mon Sep 17 00:00:00 2001
From: Dinesh Dwivedi <dinesh.d@samsung.com>
Date: Thu, 19 Jun 2014 10:36:52 +0000
Subject: [PATCH 104/157] Added instruction combine to transform few more
 negative values addition to subtraction (Part 1) This patch enables
 transforms for following patterns.   (x + (~(y & c) + 1)   -->   x - (y & c) 
  (x + (~((y >> z) & c) + 1)   -->   x - ((y>>z) & c)

Differential Revision: http://reviews.llvm.org/D3733


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211266 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineAddSub.cpp         | 45 +++++++++++++
 test/Transforms/InstCombine/add2.ll           | 63 +++++++++++++++++++
 2 files changed, 108 insertions(+)

diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 77b2404b49d4..5b28d8ccb5b2 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -954,6 +954,48 @@ bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS) {
     return true;
 
   return false;
+ }
+
+// Checks if any operand is negative and we can convert add to sub.
+// This function checks for following negative patterns
+//   ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
+//   TODO: ADD(XOR(AND(Z, ~C), ~C), 1) == NEG(OR(Z, C)) if C is even
+//   TODO: XOR(AND(Z, ~C), (~C + 1)) == NEG(OR(Z, C)) if C is odd
+Value *checkForNegativeOperand(BinaryOperator &I,
+                               InstCombiner::BuilderTy *Builder) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+  // This function creates 2 instructions to replace ADD, we need at least one 
+  // of LHS or RHS to have one use to ensure benefit in transform.
+  if (!LHS->hasOneUse() && !RHS->hasOneUse())
+    return nullptr;
+
+  bool IHasNSW = I.hasNoSignedWrap();
+  bool IHasNUW = I.hasNoUnsignedWrap();
+
+  Value *X = nullptr, *Y = nullptr, *Z = nullptr;
+  const APInt *C1 = nullptr, *C2 = nullptr;
+
+  // if ONE is on other side, swap
+  if (match(RHS, m_Add(m_Value(X), m_One())))
+    std::swap(LHS, RHS);
+
+  if (match(LHS, m_Add(m_Value(X), m_One()))) {
+    // if XOR on other side, swap
+    if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))
+      std::swap(X, RHS);
+
+    // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))
+    // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))
+    if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))) {
+      if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) {
+        Value *NewAnd = Builder->CreateAnd(Z, *C1);
+        return Builder->CreateSub(RHS, NewAnd, "", IHasNUW, IHasNSW);
+      }
+    }
+  }
+
+  return nullptr;
 }
 
 Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
@@ -1065,6 +1107,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     if (Value *V = dyn_castNegVal(RHS))
       return BinaryOperator::CreateSub(LHS, V);
 
+  if (Value *V = checkForNegativeOperand(I, Builder))
+    return ReplaceInstUsesWith(I, V);
+
   // A+B --> A|B iff A and B have no bits set in common.
   if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
     APInt LHSKnownOne(IT->getBitWidth(), 0);
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index c728b4437af4..0328c4ff6e3f 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -87,6 +87,69 @@ define i16 @test9(i16 %a) {
 ; CHECK-NEXT:  ret i16 %d
 }
 
+define i32 @test10(i32 %x) {
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add = add i32 %x, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -1431655766
+; CHECK-NEXT: ret i32 [[AND]]
+}
+
+define i32 @test11(i32 %x, i32 %y) {
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+define i32 @test12(i32 %x, i32 %y) {
+  %shr = ashr i32 %x, 3
+  %shr.not = or i32 %shr, -1431655766
+  %neg = xor i32 %shr.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: [[SHR:%[a-z0-9]+]] = ashr i32 %x, 3
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHR]], 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+define i32 @test13(i32 %x, i32 %y) {
+  %x.not = or i32 %x, -1431655767
+  %neg = xor i32 %x.not, 1431655766
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+define i32 @test14(i32 %x, i32 %y) {
+  %shr = ashr i32 %x, 3
+  %shr.not = or i32 %shr, -1431655767
+  %neg = xor i32 %shr.not, 1431655766
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: [[SHR:%[a-z0-9]+]] = ashr i32 %x, 3
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHR]], 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
 define i16 @add_nsw_mul_nsw(i16 %x) {
  %add1 = add nsw i16 %x, %x
  %add2 = add nsw i16 %add1, %x

From dc9bdcc2cbd027e38d0abfdf36ff3fa59453d8be Mon Sep 17 00:00:00 2001
From: Dinesh Dwivedi <dinesh.d@samsung.com>
Date: Thu, 19 Jun 2014 14:11:53 +0000
Subject: [PATCH 105/157] Updated comments as suggested by Rafael. Thanks.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211268 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/JumpThreading.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index e501ff29d038..6e50d3331df6 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -158,7 +158,13 @@ bool JumpThreading::runOnFunction(Function &F) {
   TLI = &getAnalysis<TargetLibraryInfo>();
   LVI = &getAnalysis<LazyValueInfo>();
 
-  // Remove unreachable blocks from function as they may result in infinite loop.
+  // Remove unreachable blocks from function as they may result in infinite
+  // loop. We do threading if we found something profitable. Jump threading a
+  // branch can create other opportunities. If these opportunities form a cycle
+  // i.e. if any jump treading is undoing previous threading in the path, then
+  // we will loop forever. We take care of this issue by not jump threading for
+  // back edges. This works for normal cases but not for unreachable blocks as
+  // they may have cycle with no back edge.
   removeUnreachableBlocks(F);
 
   FindLoopHeaders(F);

From 27107726551771a102be3043fa9e7aeffa2f8023 Mon Sep 17 00:00:00 2001
From: Matheus Almeida <matheus.almeida@imgtec.com>
Date: Thu, 19 Jun 2014 14:39:14 +0000
Subject: [PATCH 106/157] [mips] Small update to the logic behind the expansion
 of assembly pseudo instructions.

Summary:
The functions that do the expansion now return false on success and true otherwise. This is so
we can catch some errors during the expansion (e.g.: immediate too large). The next patch adds some test cases.

Reviewers: vmedic

Reviewed By: vmedic

Differential Revision: http://reviews.llvm.org/D4214

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211269 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 29 ++++++++++++++-------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index dd2b857789ca..8f46820ff7e9 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -116,14 +116,20 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   bool needsExpansion(MCInst &Inst);
 
-  void expandInstruction(MCInst &Inst, SMLoc IDLoc,
+  // Expands assembly pseudo instructions.
+  // Returns false on success, true otherwise.
+  bool expandInstruction(MCInst &Inst, SMLoc IDLoc,
                          SmallVectorImpl<MCInst> &Instructions);
-  void expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+
+  bool expandLoadImm(MCInst &Inst, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions);
-  void expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
+
+  bool expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
                             SmallVectorImpl<MCInst> &Instructions);
-  void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
+
+  bool expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
                             SmallVectorImpl<MCInst> &Instructions);
+
   void expandMemInst(MCInst &Inst, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions, bool isLoad,
                      bool isImmOpnd);
@@ -965,7 +971,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
   }   // if load/store
 
   if (needsExpansion(Inst))
-    expandInstruction(Inst, IDLoc, Instructions);
+    return expandInstruction(Inst, IDLoc, Instructions);
   else
     Instructions.push_back(Inst);
 
@@ -984,9 +990,11 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
   }
 }
 
-void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
+bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
                                       SmallVectorImpl<MCInst> &Instructions) {
   switch (Inst.getOpcode()) {
+  default: assert(0 && "unimplemented expansion");
+    return true;
   case Mips::LoadImm32Reg:
     return expandLoadImm(Inst, IDLoc, Instructions);
   case Mips::LoadAddr32Imm:
@@ -996,7 +1004,7 @@ void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
   }
 }
 
-void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
                                   SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(1);
@@ -1038,9 +1046,10 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     tmpInst.setLoc(IDLoc);
     Instructions.push_back(tmpInst);
   }
+  return false;
 }
 
-void
+bool
 MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
                                     SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
@@ -1081,9 +1090,10 @@ MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
     tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
     Instructions.push_back(tmpInst);
   }
+  return false;
 }
 
-void
+bool
 MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
                                     SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
@@ -1115,6 +1125,7 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
     Instructions.push_back(tmpInst);
   }
+  return false;
 }
 
 void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,

From 7e4098332804df5126060a2c8f846445a82c5a62 Mon Sep 17 00:00:00 2001
From: Matheus Almeida <matheus.almeida@imgtec.com>
Date: Thu, 19 Jun 2014 15:08:04 +0000
Subject: [PATCH 107/157] [mips] Implementation of dli.

Patch by David Chisnall
His work was sponsored by: DARPA, AFRL

Some small modifications to the original patch: we now error if
it's not possible to expand an instruction (mips-expansions-bad.s has some
examples). Added some comments to the expansions.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211271 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp |  97 ++++++++-
 lib/Target/Mips/Mips64InstrInfo.td          |   7 +
 test/MC/Mips/mips-expansions-bad.s          |   6 +
 test/MC/Mips/mips64-expansions.s            | 209 ++++++++++++++++++++
 4 files changed, 312 insertions(+), 7 deletions(-)
 create mode 100644 test/MC/Mips/mips-expansions-bad.s
 create mode 100644 test/MC/Mips/mips64-expansions.s

diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 8f46820ff7e9..dc9ed2feeb9c 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -984,6 +984,7 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
   case Mips::LoadImm32Reg:
   case Mips::LoadAddr32Imm:
   case Mips::LoadAddr32Reg:
+  case Mips::LoadImm64Reg:
     return true;
   default:
     return false;
@@ -997,6 +998,12 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
     return true;
   case Mips::LoadImm32Reg:
     return expandLoadImm(Inst, IDLoc, Instructions);
+  case Mips::LoadImm64Reg:
+    if (!isGP64()) {
+      Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+      return true;
+    }
+    return expandLoadImm(Inst, IDLoc, Instructions);
   case Mips::LoadAddr32Imm:
     return expandLoadAddressImm(Inst, IDLoc, Instructions);
   case Mips::LoadAddr32Reg:
@@ -1004,6 +1011,30 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
   }
 }
 
+namespace {
+template <int Shift, bool PerformShift>
+void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
+                   SmallVectorImpl<MCInst> &Instructions) {
+  MCInst tmpInst;
+  if (PerformShift) {
+    tmpInst.setOpcode(Mips::DSLL);
+    tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+    tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+    tmpInst.addOperand(MCOperand::CreateImm(16));
+    tmpInst.setLoc(IDLoc);
+    Instructions.push_back(tmpInst);
+    tmpInst.clear();
+  }
+  tmpInst.setOpcode(Mips::ORi);
+  tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+  tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+  tmpInst.addOperand(
+      MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift)));
+  tmpInst.setLoc(IDLoc);
+  Instructions.push_back(tmpInst);
+}
+}
+
 bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
                                   SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
@@ -1012,8 +1043,10 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
   const MCOperand &RegOp = Inst.getOperand(0);
   assert(RegOp.isReg() && "expected register operand kind");
 
-  int ImmValue = ImmOp.getImm();
+  int64_t ImmValue = ImmOp.getImm();
   tmpInst.setLoc(IDLoc);
+  // FIXME: gas has a special case for values that are 000...1111, which
+  // becomes a li -1 and then a dsrl
   if (0 <= ImmValue && ImmValue <= 65535) {
     // For 0 <= j <= 65535.
     // li d,j => ori d,$zero,j
@@ -1030,21 +1063,71 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
-  } else {
-    // For any other value of j that is representable as a 32-bit integer.
+  } else if ((ImmValue & 0xffffffff) == ImmValue) {
+    // For any value of j that is representable as a 32-bit integer, create
+    // a sequence of:
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(j)
     tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
     Instructions.push_back(tmpInst);
-    tmpInst.clear();
-    tmpInst.setOpcode(Mips::ORi);
+    createShiftOr<0, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+  } else if ((ImmValue & (0xffffLL << 48)) == 0) {
+    if (!isGP64()) {
+      Error (IDLoc, "instruction requires a CPU feature not currently enabled");
+      return true;
+    }
+
+    //            <-------  lo32 ------>
+    // <-------  hi32 ------>
+    // <- hi16 ->             <- lo16 ->
+    //  _________________________________
+    // |          |          |          |
+    // | 16-bytes | 16-bytes | 16-bytes |
+    // |__________|__________|__________|
+    //
+    // For any value of j that is representable as a 48-bit integer, create
+    // a sequence of:
+    // li d,j => lui d,hi16(j)
+    //           ori d,d,hi16(lo32(j))
+    //           dsll d,d,16
+    //           ori d,d,lo16(lo32(j))
+    tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+    tmpInst.addOperand(
+        MCOperand::CreateImm((ImmValue & (0xffffLL << 32)) >> 32));
+    Instructions.push_back(tmpInst);
+    createShiftOr<16, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+    createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+  } else {
+    if (!isGP64()) {
+      Error (IDLoc, "instruction requires a CPU feature not currently enabled");
+      return true;
+    }
+
+    // <-------  hi32 ------> <-------  lo32 ------>
+    // <- hi16 ->                        <- lo16 ->
+    //  ___________________________________________
+    // |          |          |          |          |
+    // | 16-bytes | 16-bytes | 16-bytes | 16-bytes |
+    // |__________|__________|__________|__________|
+    //
+    // For any value of j that isn't representable as a 48-bit integer.
+    // li d,j => lui d,hi16(j)
+    //           ori d,d,lo16(hi32(j))
+    //           dsll d,d,16
+    //           ori d,d,hi16(lo32(j))
+    //           dsll d,d,16
+    //           ori d,d,lo16(lo32(j))
+    tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
-    tmpInst.setLoc(IDLoc);
+    tmpInst.addOperand(
+        MCOperand::CreateImm((ImmValue & (0xffffLL << 48)) >> 48));
     Instructions.push_back(tmpInst);
+    createShiftOr<32, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+    createShiftOr<16, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+    createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
   }
   return false;
 }
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 88422ce19dea..2b9cda7ac83c 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -23,6 +23,8 @@ def uimm16_64      : Operand<i64> {
 // Signed Operand
 def simm10_64 : Operand<i64>;
 
+def imm64: Operand<i64>;
+
 // Transformation Function - get Imm - 32.
 def Subtract32 : SDNodeXForm<imm, [{
   return getImm(N, (unsigned)N->getZExtValue() - 32);
@@ -486,6 +488,11 @@ def : MipsInstAlias<"dsrl $rd, $rt, $rs",
                     (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
                     ISA_MIPS3;
 
+class LoadImm64< string instr_asm, Operand Od, RegisterOperand RO> :
+  MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
+                     !strconcat(instr_asm, "\t$rt, $imm64")> ;
+def LoadImm64Reg : LoadImm64<"dli", imm64, GPR64Opnd>;
+
 /// Move between CPU and coprocessor registers
 let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
 def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
diff --git a/test/MC/Mips/mips-expansions-bad.s b/test/MC/Mips/mips-expansions-bad.s
new file mode 100644
index 000000000000..a137deb8d172
--- /dev/null
+++ b/test/MC/Mips/mips-expansions-bad.s
@@ -0,0 +1,6 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+        .text
+        li $5, 0x100000000 # CHECK: :[[@LINE]]:9: error: instruction requires a CPU feature not currently enabled
+        dli $5, 1 # CHECK: :[[@LINE]]:9: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64-expansions.s b/test/MC/Mips/mips64-expansions.s
new file mode 100644
index 000000000000..0efdd2fa5c81
--- /dev/null
+++ b/test/MC/Mips/mips64-expansions.s
@@ -0,0 +1,209 @@
+# RUN: llvm-mc %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
+#
+# The GNU assembler implements 'dli' and 'dla' variants on 'li' and 'la'
+# supporting double-word lengths.  Test that not only are they present, bu
+# that they also seem to handle 64-bit values.
+#
+# XXXRW: Does using powers of ten make me a bad person?
+#
+# CHECK: ori	$12, $zero, 1           # encoding: [0x01,0x00,0x0c,0x34]
+# CHECK: ori	$12, $zero, 10          # encoding: [0x0a,0x00,0x0c,0x34]
+# CHECK: ori	$12, $zero, 100         # encoding: [0x64,0x00,0x0c,0x34]
+# CHECK: ori	$12, $zero, 1000        # encoding: [0xe8,0x03,0x0c,0x34]
+# CHECK: ori	$12, $zero, 10000       # encoding: [0x10,0x27,0x0c,0x34]
+# CHECK: lui	$12, 1                  # encoding: [0x01,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 34464         # encoding: [0xa0,0x86,0x8c,0x35]
+# CHECK: lui	$12, 15                 # encoding: [0x0f,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 16960         # encoding: [0x40,0x42,0x8c,0x35]
+# CHECK: lui	$12, 152                # encoding: [0x98,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 38528         # encoding: [0x80,0x96,0x8c,0x35]
+# CHECK: lui	$12, 1525               # encoding: [0xf5,0x05,0x0c,0x3c]
+# CHECK: ori	$12, $12, 57600         # encoding: [0x00,0xe1,0x8c,0x35]
+# CHECK: lui	$12, 15258              # encoding: [0x9a,0x3b,0x0c,0x3c]
+# CHECK: ori	$12, $12, 51712         # encoding: [0x00,0xca,0x8c,0x35]
+# CHECK: lui	$12, 2                  # encoding: [0x02,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 21515         # encoding: [0x0b,0x54,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 58368         # encoding: [0x00,0xe4,0x8c,0x35]
+# CHECK: lui	$12, 23                 # encoding: [0x17,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 18550         # encoding: [0x76,0x48,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 59392         # encoding: [0x00,0xe8,0x8c,0x35]
+# CHECK: lui	$12, 232                # encoding: [0xe8,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 54437         # encoding: [0xa5,0xd4,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 4096          # encoding: [0x00,0x10,0x8c,0x35]
+# CHECK: lui	$12, 2328               # encoding: [0x18,0x09,0x0c,0x3c]
+# CHECK: ori	$12, $12, 20082         # encoding: [0x72,0x4e,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 40960         # encoding: [0x00,0xa0,0x8c,0x35]
+# CHECK: lui	$12, 23283              # encoding: [0xf3,0x5a,0x0c,0x3c]
+# CHECK: ori	$12, $12, 4218          # encoding: [0x7a,0x10,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 16384         # encoding: [0x00,0x40,0x8c,0x35]
+# CHECK: lui	$12, 3                  # encoding: [0x03,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 36222         # encoding: [0x7e,0x8d,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 42182         # encoding: [0xc6,0xa4,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 32768         # encoding: [0x00,0x80,0x8c,0x35]
+# CHECK: lui	$12, 35                 # encoding: [0x23,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 34546         # encoding: [0xf2,0x86,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 28609         # encoding: [0xc1,0x6f,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 355                # encoding: [0x63,0x01,0x0c,0x3c]
+# CHECK: ori	$12, $12, 17784         # encoding: [0x78,0x45,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 23946         # encoding: [0x8a,0x5d,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 3552               # encoding: [0xe0,0x0d,0x0c,0x3c]
+# CHECK: ori	$12, $12, 46771         # encoding: [0xb3,0xb6,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 42852         # encoding: [0x64,0xa7,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 35527              # encoding: [0xc7,0x8a,0x0c,0x3c]
+# CHECK: ori	$12, $12, 8964          # encoding: [0x04,0x23,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 35304         # encoding: [0xe8,0x89,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: addiu	$12, $zero, -1          # encoding: [0xff,0xff,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -10         # encoding: [0xf6,0xff,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -100        # encoding: [0x9c,0xff,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -1000       # encoding: [0x18,0xfc,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -10000      # encoding: [0xf0,0xd8,0x0c,0x24]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 65534         # encoding: [0xfe,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 31072         # encoding: [0x60,0x79,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 65520         # encoding: [0xf0,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 48576         # encoding: [0xc0,0xbd,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 65383         # encoding: [0x67,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 27008         # encoding: [0x80,0x69,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 64010         # encoding: [0x0a,0xfa,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 7936          # encoding: [0x00,0x1f,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 50277         # encoding: [0x65,0xc4,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 13824         # encoding: [0x00,0x36,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65533         # encoding: [0xfd,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 44020         # encoding: [0xf4,0xab,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 7168          # encoding: [0x00,0x1c,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65512         # encoding: [0xe8,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 46985         # encoding: [0x89,0xb7,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 6144          # encoding: [0x00,0x18,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65303         # encoding: [0x17,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 11098         # encoding: [0x5a,0x2b,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 61440         # encoding: [0x00,0xf0,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 63207         # encoding: [0xe7,0xf6,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 45453         # encoding: [0x8d,0xb1,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 24576         # encoding: [0x00,0x60,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 42252         # encoding: [0x0c,0xa5,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 61317         # encoding: [0x85,0xef,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 49152         # encoding: [0x00,0xc0,0x8c,0x35]
+# CHECK: lui	$12, 65532              # encoding: [0xfc,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 29313         # encoding: [0x81,0x72,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 23353         # encoding: [0x39,0x5b,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 32768         # encoding: [0x00,0x80,0x8c,0x35]
+# CHECK: lui	$12, 65500              # encoding: [0xdc,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 30989         # encoding: [0x0d,0x79,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 36927         # encoding: [0x3f,0x90,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 65180              # encoding: [0x9c,0xfe,0x0c,0x3c]
+# CHECK: ori	$12, $12, 47751         # encoding: [0x87,0xba,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 41590         # encoding: [0x76,0xa2,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 61983              # encoding: [0x1f,0xf2,0x0c,0x3c]
+# CHECK: ori	$12, $12, 18764         # encoding: [0x4c,0x49,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 22684         # encoding: [0x9c,0x58,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 30008              # encoding: [0x38,0x75,0x0c,0x3c]
+# CHECK: ori	$12, $12, 56571         # encoding: [0xfb,0xdc,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 30232         # encoding: [0x18,0x76,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+
+	dli	$t0, 1
+	dli	$t0, 10
+	dli	$t0, 100
+	dli	$t0, 1000
+	dli	$t0, 10000
+	dli	$t0, 100000
+	dli	$t0, 1000000
+	dli	$t0, 10000000
+	dli	$t0, 100000000
+	dli	$t0, 1000000000
+	dli	$t0, 10000000000
+	dli	$t0, 100000000000
+	dli	$t0, 1000000000000
+	dli	$t0, 10000000000000
+	dli	$t0, 100000000000000
+	dli	$t0, 1000000000000000
+	dli	$t0, 10000000000000000
+	dli	$t0, 100000000000000000
+	dli	$t0, 1000000000000000000
+	dli	$t0, 10000000000000000000
+	dli	$t0, -1
+	dli	$t0, -10
+	dli	$t0, -100
+	dli	$t0, -1000
+	dli	$t0, -10000
+	dli	$t0, -100000
+	dli	$t0, -1000000
+	dli	$t0, -10000000
+	dli	$t0, -100000000
+	dli	$t0, -1000000000
+	dli	$t0, -10000000000
+	dli	$t0, -100000000000
+	dli	$t0, -1000000000000
+	dli	$t0, -10000000000000
+	dli	$t0, -100000000000000
+	dli	$t0, -1000000000000000
+	dli	$t0, -10000000000000000
+	dli	$t0, -100000000000000000
+	dli	$t0, -1000000000000000000
+	dli	$t0, -10000000000000000000

From bb804ee909a0e81d39920bdc8272e624fb8da443 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard@arm.com>
Date: Thu, 19 Jun 2014 15:39:33 +0000
Subject: [PATCH 108/157] Emit DWARF3 call frame information when DWARF3+ debug
 info is requested

Currently, llvm always emits a DWARF CIE with a version of 1, even when emitting
DWARF 3 or 4, which both support CIE version 3. This patch makes it emit the
newer CIE version when we are emitting DWARF 3 or 4. This will not reduce
compatibility, as we already emit other DWARF3/4 features, and is worth doing as
the DWARF3 spec removed some ambiguities in the interpretation of call frame
information.

It also fixes a minor bug where the "return address" field of the CIE was
encoded as a ULEB128, which is only valid when the CIE version is 3. There are
no test changes for this, because (as far as I can tell) none of the platforms
that we test have a return address register with a DWARF register number >127.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211272 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/Dwarf.h                  |  1 -
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp         |  2 +
 lib/MC/MCDwarf.cpp                            | 14 +++-
 test/DebugInfo/AArch64/eh_frame.s             |  4 +-
 .../DebugInfo/AArch64/eh_frame_personality.ll |  4 +-
 test/DebugInfo/SystemZ/eh_frame.s             |  4 +-
 test/DebugInfo/SystemZ/eh_frame_personality.s |  4 +-
 test/MC/ELF/cfi-adjust-cfa-offset.s           |  2 +-
 test/MC/ELF/cfi-advance-loc2.s                |  2 +-
 test/MC/ELF/cfi-def-cfa-offset.s              |  2 +-
 test/MC/ELF/cfi-def-cfa-register.s            |  2 +-
 test/MC/ELF/cfi-def-cfa.s                     |  2 +-
 test/MC/ELF/cfi-escape.s                      |  2 +-
 test/MC/ELF/cfi-offset.s                      |  2 +-
 test/MC/ELF/cfi-register.s                    |  2 +-
 test/MC/ELF/cfi-rel-offset.s                  |  2 +-
 test/MC/ELF/cfi-rel-offset2.s                 |  2 +-
 test/MC/ELF/cfi-remember.s                    |  2 +-
 test/MC/ELF/cfi-restore.s                     |  2 +-
 test/MC/ELF/cfi-same-value.s                  |  2 +-
 test/MC/ELF/cfi-sections.s                    |  4 +-
 test/MC/ELF/cfi-signal-frame.s                |  4 +-
 test/MC/ELF/cfi-undefined.s                   |  2 +-
 test/MC/ELF/cfi-version.ll                    | 47 +++++++++++++
 test/MC/ELF/cfi-window-save.s                 |  2 +-
 test/MC/ELF/cfi-zero-addr-delta.s             |  2 +-
 test/MC/ELF/cfi.s                             | 70 +++++++++----------
 test/MC/Mips/eh-frame.s                       |  8 +--
 test/MC/PowerPC/ppc64-initial-cfa.s           |  8 +--
 29 files changed, 132 insertions(+), 74 deletions(-)
 create mode 100644 test/MC/ELF/cfi-version.ll

diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index ca316441ea7e..cd9f75600cbc 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -57,7 +57,6 @@ enum LLVMConstants : uint32_t {
   DW_TAG_user_base = 0x1000, // Recommended base for user tags.
 
   DWARF_VERSION = 4,       // Default dwarf version we output.
-  DW_CIE_VERSION = 1,      // Common frame information version.
   DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes.
   DW_PUBNAMES_VERSION = 2, // Section version number for .debug_pubnames.
   DW_ARANGES_VERSION = 2   // Section version number for .debug_aranges.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7aaf731b6f8b..3847eb124f3d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -209,6 +209,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
                                     : MMI->getModule()->getDwarfVersion();
 
+  Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion);
+
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
     beginModule();
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index be6731abedd9..1016466839f6 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -1270,7 +1270,10 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
 
   // Version
   if (verboseAsm) streamer.AddComment("DW_CIE_VERSION");
-  streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
+  // For DWARF2, we use CIE version 1
+  // For DWARF3+, we use CIE version 3
+  uint8_t CIEVersion = context.getDwarfVersion() <= 2 ? 1 : 3;
+  streamer.EmitIntValue(CIEVersion, 1);
 
   // Augmentation String
   SmallString<8> Augmentation;
@@ -1298,7 +1301,14 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
 
   // Return Address Register
   if (verboseAsm) streamer.AddComment("CIE Return Address Column");
-  streamer.EmitULEB128IntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true));
+  if (CIEVersion == 1) {
+    assert(MRI->getRARegister() <= 255 &&
+           "DWARF 2 encodes return_address_register in one byte");
+    streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true), 1);
+  } else {
+    streamer.EmitULEB128IntValue(
+        MRI->getDwarfRegNum(MRI->getRARegister(), true));
+  }
 
   // Augmentation Data Length (optional)
 
diff --git a/test/DebugInfo/AArch64/eh_frame.s b/test/DebugInfo/AArch64/eh_frame.s
index d8d6b6d9325f..12a58961d717 100644
--- a/test/DebugInfo/AArch64/eh_frame.s
+++ b/test/DebugInfo/AArch64/eh_frame.s
@@ -17,7 +17,7 @@ foo:
 // Output is:
 
 // CHECK: Contents of section .eh_frame:
-// CHECK-NEXT: 0000 10000000 00000000 017a5200 017c1e01  .........zR..|..
+// CHECK-NEXT: 0000 10000000 00000000 037a5200 017c1e01  .........zR..|..
 // CHECK-NEXT: 0010 1b0c1f00 10000000 18000000 00000000  ................
 
 
@@ -30,7 +30,7 @@ foo:
 // -------------------
 // 10000000: length of first CIE = 0x10
 // 00000000: This is a CIE
-// 01: version = 0x1
+// 03: version = 0x3
 // 7a 52 00: augmentation string "zR" -- pointer format is specified
 // 01: code alignment factor 1
 // 7c: data alignment factor -4
diff --git a/test/DebugInfo/AArch64/eh_frame_personality.ll b/test/DebugInfo/AArch64/eh_frame_personality.ll
index d35f2a2fcafb..51d6bf80b950 100644
--- a/test/DebugInfo/AArch64/eh_frame_personality.ll
+++ b/test/DebugInfo/AArch64/eh_frame_personality.ll
@@ -16,7 +16,7 @@ clean:
 }
 
 ; CHECK: Contents of section .eh_frame:
-; CHECK: 0000 1c000000 00000000 017a504c 5200017c  .........zPLR..|
+; CHECK: 0000 1c000000 00000000 037a504c 5200017c  .........zPLR..|
 ; CHECK: 0010 1e0b0000 00000000 00000000 1b0c1f00  ................
 
 ; Don't really care about the rest:
@@ -33,7 +33,7 @@ clean:
 ; ----------
 ; 1c000000: Length = 0x1c
 ; 00000000: This is a CIE
-; 01: Version 1
+; 03: Version 3
 ; 7a 50 4c 52 00: Augmentation string "zPLR" (personality routine, language-specific data, pointer format)
 ; 01: Code alignment factor 1
 ; 78: Data alignment factor: -8
diff --git a/test/DebugInfo/SystemZ/eh_frame.s b/test/DebugInfo/SystemZ/eh_frame.s
index 4e7afd56e94b..d55b6cdea8c3 100644
--- a/test/DebugInfo/SystemZ/eh_frame.s
+++ b/test/DebugInfo/SystemZ/eh_frame.s
@@ -23,7 +23,7 @@ check_largest_class:
 # Contents of the .eh_frame section:
 #
 # 00000000 0000001c 00000000 CIE
-#   Version:               1
+#   Version:               3
 #   Augmentation:          "zR"
 #   Code alignment factor: 1
 #   Data alignment factor: -8
@@ -48,7 +48,7 @@ check_largest_class:
 #   DW_CFA_nop
 #
 # CHECK: Contents of section .eh_frame:
-# CHECK-NEXT: 0000 00000014 00000000 017a5200 01780e01  .........zR..x..
+# CHECK-NEXT: 0000 00000014 00000000 037a5200 01780e01  .........zR..x..
 # CHECK-NEXT: 0010 1b0c0fa0 01000000 0000001c 0000001c  ................
 # CHECK-NEXT: 0020 00000000 00000012 00468d07 8e068f05  .........F......
 # CHECK-NEXT: 0030 440ec002 00000000                    D.......
diff --git a/test/DebugInfo/SystemZ/eh_frame_personality.s b/test/DebugInfo/SystemZ/eh_frame_personality.s
index 46b46db1d806..456e0a6e6bdd 100644
--- a/test/DebugInfo/SystemZ/eh_frame_personality.s
+++ b/test/DebugInfo/SystemZ/eh_frame_personality.s
@@ -37,7 +37,7 @@ DW.ref.__gxx_personality_v0:
 # Contents of the .eh_frame section:
 #
 # 00000000 0000001c 00000000 CIE
-#   Version:               1
+#   Version:               3
 #   Augmentation:          "zPLR"
 #   Code alignment factor: 1
 #   Data alignment factor: -8
@@ -61,7 +61,7 @@ DW.ref.__gxx_personality_v0:
 #   DW_CFA_nop
 #
 # CHECK: Contents of section .eh_frame:
-# CHECK-NEXT: 0000 0000001c 00000000 017a504c 52000178  .........zPLR..x
+# CHECK-NEXT: 0000 0000001c 00000000 037a504c 52000178  .........zPLR..x
 # CHECK-NEXT: 0010 0e079b00 0000001b 1b0c0fa0 01000000  ................
 # CHECK-NEXT: 0020 0000001c 00000024 00000000 00000012  .......$........
 # CHECK-NEXT: 0030 04000000 00468e06 8f05440e c0020000  .....F....D.....
diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s
index b3768cb9834c..9d639f70d8dd 100644
--- a/test/MC/ELF/cfi-adjust-cfa-offset.s
+++ b/test/MC/ELF/cfi-adjust-cfa-offset.s
@@ -28,7 +28,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 1C000000 1C000000
 // CHECK-NEXT:       0020: 00000000 0A000000 00440E10 410E1444
 // CHECK-NEXT:       0030: 0E080000 00000000
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
index d7a53c462b70..98caa0185f59 100644
--- a/test/MC/ELF/cfi-advance-loc2.s
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -26,7 +26,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 01010000 00030001 0E080000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
index eac2c731fa93..59f740055d47 100644
--- a/test/MC/ELF/cfi-def-cfa-offset.s
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -27,7 +27,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 0A000000 00440E10 450E0800
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
index 00d8b99af9d6..178ba32882dc 100644
--- a/test/MC/ELF/cfi-def-cfa-register.s
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410D06 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
index 36e147f5a4da..dfb0d4b59396 100644
--- a/test/MC/ELF/cfi-def-cfa.s
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410C07 08000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
index 839d6717debc..5394ee414aa7 100644
--- a/test/MC/ELF/cfi-escape.s
+++ b/test/MC/ELF/cfi-escape.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00411507 7F000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index 951a6001e519..a65b4fc783c7 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00418602 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
index 4abbb53b8fc9..94417702c13c 100644
--- a/test/MC/ELF/cfi-register.s
+++ b/test/MC/ELF/cfi-register.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410906 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s
index 34254c862a46..0dc69c89cf4c 100644
--- a/test/MC/ELF/cfi-rel-offset.s
+++ b/test/MC/ELF/cfi-rel-offset.s
@@ -31,7 +31,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 24000000 1C000000
 // CHECK-NEXT:       0020: 00000000 05000000 00410E08 410D0641
 // CHECK-NEXT:       0030: 11067F41 0E104186 02000000 00000000
diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s
index 3de769f39fa0..360e7b0ea0f5 100644
--- a/test/MC/ELF/cfi-rel-offset2.s
+++ b/test/MC/ELF/cfi-rel-offset2.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 01000000 00411106 7F000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
index 98c759d4fffc..3a38948b6a3e 100644
--- a/test/MC/ELF/cfi-remember.s
+++ b/test/MC/ELF/cfi-remember.s
@@ -26,7 +26,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 03000000 00410A41 0B000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
index d25b5ff2e93f..e225797f54d6 100644
--- a/test/MC/ELF/cfi-restore.s
+++ b/test/MC/ELF/cfi-restore.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 0041C600 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s
index 9f5ae4be9ed4..2d37f4d0b43e 100644
--- a/test/MC/ELF/cfi-same-value.s
+++ b/test/MC/ELF/cfi-same-value.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410806 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-sections.s b/test/MC/ELF/cfi-sections.s
index 15a79e5c055e..b0ba543e5bdb 100644
--- a/test/MC/ELF/cfi-sections.s
+++ b/test/MC/ELF/cfi-sections.s
@@ -26,7 +26,7 @@ f2:
 // ELF_64-NEXT:     AddressAlignment: 8
 // ELF_64-NEXT:     EntrySize: 0
 // ELF_64-NEXT:     SectionData (
-// ELF_64-NEXT:       0000: 14000000 FFFFFFFF 01000178 100C0708
+// ELF_64-NEXT:       0000: 14000000 FFFFFFFF 03000178 100C0708
 // ELF_64-NEXT:       0010: 90010000 00000000 14000000 00000000
 // ELF_64-NEXT:       0020: 00000000 00000000 01000000 00000000
 // ELF_64-NEXT:       0030: 14000000 00000000 00000000 00000000
@@ -47,7 +47,7 @@ f2:
 // ELF_32-NEXT:     AddressAlignment: 4
 // ELF_32-NEXT:     EntrySize: 0
 // ELF_32-NEXT:     SectionData (
-// ELF_32-NEXT:       0000: 10000000 FFFFFFFF 0100017C 080C0404
+// ELF_32-NEXT:       0000: 10000000 FFFFFFFF 0300017C 080C0404
 // ELF_32-NEXT:       0010: 88010000 0C000000 00000000 00000000
 // ELF_32-NEXT:       0020: 01000000 0C000000 00000000 01000000
 // ELF_32-NEXT:       0030: 01000000
diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s
index 023311962189..98deb0a1de5c 100644
--- a/test/MC/ELF/cfi-signal-frame.s
+++ b/test/MC/ELF/cfi-signal-frame.s
@@ -23,10 +23,10 @@ g:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5253 00017810
+// CHECK-NEXT:       0000: 14000000 00000000 037A5253 00017810
 // CHECK-NEXT:       0010: 011B0C07 08900100 10000000 1C000000
 // CHECK-NEXT:       0020: 00000000 00000000 00000000 14000000
-// CHECK-NEXT:       0030: 00000000 017A5200 01781001 1B0C0708
+// CHECK-NEXT:       0030: 00000000 037A5200 01781001 1B0C0708
 // CHECK-NEXT:       0040: 90010000 10000000 1C000000 00000000
 // CHECK-NEXT:       0050: 00000000 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
index 9773a36a3b03..568b3159cc44 100644
--- a/test/MC/ELF/cfi-undefined.s
+++ b/test/MC/ELF/cfi-undefined.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410706 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll
new file mode 100644
index 000000000000..a06638c04e0e
--- /dev/null
+++ b/test/MC/ELF/cfi-version.ll
@@ -0,0 +1,47 @@
+; RUN: llc %s -o - -dwarf-version 2 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF2
+; RUN: llc %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
+; RUN: llc %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv8-arm-none-eabi"
+
+; Function Attrs: nounwind
+define i32 @foo() #0 {
+entry:
+  %call = call i32 bitcast (i32 (...)* @bar to i32 ()*)(), !dbg !12
+  %add = add nsw i32 %call, 1, !dbg !12
+  ret i32 %add, !dbg !12
+}
+
+declare i32 @bar(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"/tmp"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @foo, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{metadata !"clang version 3.5.0 "}
+!12 = metadata !{i32 2, i32 0, metadata !4, null}
+
+; DWARF2:      .debug_frame contents:
+; DWARF2: 00000000 0000000c ffffffff CIE
+; DWARF2-NEXT:   Version:               1
+; DWARF2-NEXT:   Augmentation:
+
+; DWARF34:      .debug_frame contents:
+; DWARF34: 00000000 0000000c ffffffff CIE
+; DWARF34-NEXT:   Version:               3
+; DWARF34-NEXT:   Augmentation:
diff --git a/test/MC/ELF/cfi-window-save.s b/test/MC/ELF/cfi-window-save.s
index c7d438a19260..b083901c137a 100644
--- a/test/MC/ELF/cfi-window-save.s
+++ b/test/MC/ELF/cfi-window-save.s
@@ -26,7 +26,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00412D00 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
index 05cb0ae35bd2..8662839b5274 100644
--- a/test/MC/ELF/cfi-zero-addr-delta.s
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -30,7 +30,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 1C000000 1C000000
 // CHECK-NEXT:       0020: 00000000 04000000 00410E10 410A0E08
 // CHECK-NEXT:       0030: 410B0000 00000000
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
index fd229b6064ad..21be615c5f39 100644
--- a/test/MC/ELF/cfi.s
+++ b/test/MC/ELF/cfi.s
@@ -234,116 +234,116 @@ f37:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A4C52 00017810
+// CHECK-NEXT:       0000: 14000000 00000000 037A4C52 00017810
 // CHECK-NEXT:       0010: 02031B0C 07089001 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 01000000 04000000 00000000
-// CHECK-NEXT:       0030: 20000000 00000000 017A504C 52000178
+// CHECK-NEXT:       0030: 20000000 00000000 037A504C 52000178
 // CHECK-NEXT:       0040: 100B0000 00000000 00000003 1B0C0708
 // CHECK-NEXT:       0050: 90010000 14000000 28000000 00000000
 // CHECK-NEXT:       0060: 01000000 04000000 00000000 14000000
 // CHECK-NEXT:       0070: 70000000 00000000 01000000 04000000
-// CHECK-NEXT:       0080: 00000000 20000000 00000000 017A504C
+// CHECK-NEXT:       0080: 00000000 20000000 00000000 037A504C
 // CHECK-NEXT:       0090: 52000178 100B0000 00000000 00000002
 // CHECK-NEXT:       00A0: 1B0C0708 90010000 10000000 28000000
 // CHECK-NEXT:       00B0: 00000000 01000000 02000000 18000000
-// CHECK-NEXT:       00C0: 00000000 017A5052 00017810 04020000
+// CHECK-NEXT:       00C0: 00000000 037A5052 00017810 04020000
 // CHECK-NEXT:       00D0: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       00E0: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       00F0: 00000000 017A5052 00017810 06030000
+// CHECK-NEXT:       00F0: 00000000 037A5052 00017810 06030000
 // CHECK-NEXT:       0100: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       0110: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       0120: 00000000 017A5052 00017810 0A040000
+// CHECK-NEXT:       0120: 00000000 037A5052 00017810 0A040000
 // CHECK-NEXT:       0130: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0140: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       0150: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0150: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0160: 040A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0170: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0180: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0180: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0190: 060B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       01A0: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       01B0: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       01B0: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       01C0: 0A0C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       01D0: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       01E0: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       01E0: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       01F0: 00017810 0A080000 00000000 00001B0C
 // CHECK-NEXT:       0200: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       0210: 01000000 00000000 1C000000 00000000
-// CHECK-NEXT:       0220: 017A5052 00017810 0A100000 00000000
+// CHECK-NEXT:       0220: 037A5052 00017810 0A100000 00000000
 // CHECK-NEXT:       0230: 00001B0C 07089001 10000000 24000000
 // CHECK-NEXT:       0240: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0250: 00000000 017A5052 00017810 04120000
+// CHECK-NEXT:       0250: 00000000 037A5052 00017810 04120000
 // CHECK-NEXT:       0260: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       0270: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0280: 00000000 017A5052 00017810 06130000
+// CHECK-NEXT:       0280: 00000000 037A5052 00017810 06130000
 // CHECK-NEXT:       0290: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       02A0: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       02B0: 00000000 017A5052 00017810 0A140000
+// CHECK-NEXT:       02B0: 00000000 037A5052 00017810 0A140000
 // CHECK-NEXT:       02C0: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       02D0: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       02E0: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       02E0: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       02F0: 041A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0300: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0310: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0310: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0320: 061B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0330: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0340: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0340: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0350: 0A1C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       0360: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       0370: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0370: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       0380: 00017810 0A180000 00000000 00001B0C
 // CHECK-NEXT:       0390: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       03A0: 01000000 00000000 1C000000 00000000
-// CHECK-NEXT:       03B0: 017A5052 00017810 0A800000 00000000
+// CHECK-NEXT:       03B0: 037A5052 00017810 0A800000 00000000
 // CHECK-NEXT:       03C0: 00001B0C 07089001 10000000 24000000
 // CHECK-NEXT:       03D0: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       03E0: 00000000 017A5052 00017810 04820000
+// CHECK-NEXT:       03E0: 00000000 037A5052 00017810 04820000
 // CHECK-NEXT:       03F0: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       0400: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0410: 00000000 017A5052 00017810 06830000
+// CHECK-NEXT:       0410: 00000000 037A5052 00017810 06830000
 // CHECK-NEXT:       0420: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       0430: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       0440: 00000000 017A5052 00017810 0A840000
+// CHECK-NEXT:       0440: 00000000 037A5052 00017810 0A840000
 // CHECK-NEXT:       0450: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0460: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       0470: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0470: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0480: 048A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0490: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       04A0: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       04A0: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       04B0: 068B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       04C0: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       04D0: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       04D0: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       04E0: 0A8C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       04F0: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       0500: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0500: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       0510: 00017810 0A880000 00000000 00001B0C
 // CHECK-NEXT:       0520: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       0530: 01000000 00000000 1C000000 00000000
-// CHECK-NEXT:       0540: 017A5052 00017810 0A900000 00000000
+// CHECK-NEXT:       0540: 037A5052 00017810 0A900000 00000000
 // CHECK-NEXT:       0550: 00001B0C 07089001 10000000 24000000
 // CHECK-NEXT:       0560: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0570: 00000000 017A5052 00017810 04920000
+// CHECK-NEXT:       0570: 00000000 037A5052 00017810 04920000
 // CHECK-NEXT:       0580: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       0590: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       05A0: 00000000 017A5052 00017810 06930000
+// CHECK-NEXT:       05A0: 00000000 037A5052 00017810 06930000
 // CHECK-NEXT:       05B0: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       05C0: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       05D0: 00000000 017A5052 00017810 0A940000
+// CHECK-NEXT:       05D0: 00000000 037A5052 00017810 0A940000
 // CHECK-NEXT:       05E0: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       05F0: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       0600: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0600: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0610: 049A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0620: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0630: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0630: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0640: 069B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0650: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0660: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0660: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0670: 0A9C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       0680: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       0690: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0690: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       06A0: 00017810 0A980000 00000000 00001B0C
 // CHECK-NEXT:       06B0: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       06C0: 01000000 00000000 10000000 00000000
-// CHECK-NEXT:       06D0: 017A5200 01781001 1B000000 10000000
+// CHECK-NEXT:       06D0: 037A5200 01781001 1B000000 10000000
 // CHECK-NEXT:       06E0: 18000000 00000000 01000000 00000000
 // CHECK-NEXT:     )
 // CHECK-NEXT:   }
diff --git a/test/MC/Mips/eh-frame.s b/test/MC/Mips/eh-frame.s
index 167159885d72..d6b9cf0a5405 100644
--- a/test/MC/Mips/eh-frame.s
+++ b/test/MC/Mips/eh-frame.s
@@ -31,7 +31,7 @@ func:
 // MIPS32: 00000000
 
 // Version
-// MIPS32: 01
+// MIPS32: 03
 
 // Augmentation String
 // MIPS32: 7a5200
@@ -67,7 +67,7 @@ func:
 // MIPS32EL: 00000000
 
 // Version
-// MIPS32EL: 01
+// MIPS32EL: 03
 
 // Augmentation String
 // MIPS32EL: 7a5200
@@ -103,7 +103,7 @@ func:
 // MIPS64: 00000000
 
 // Version
-// MIPS64: 01
+// MIPS64: 03
 
 // Augmentation String
 // MIPS64: 7a5200
@@ -141,7 +141,7 @@ func:
 // MIPS64EL: 00000000
 
 // Version
-// MIPS64EL: 01
+// MIPS64EL: 03
 
 // Augmentation String
 // MIPS64EL: 7a5200
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.s b/test/MC/PowerPC/ppc64-initial-cfa.s
index ca97e1b96b07..d0bc6b3ecd74 100644
--- a/test/MC/PowerPC/ppc64-initial-cfa.s
+++ b/test/MC/PowerPC/ppc64-initial-cfa.s
@@ -28,8 +28,8 @@ _proc:
 # STATIC-NEXT:   Relocations [
 # STATIC-NEXT:   ]
 # STATIC-NEXT:   SectionData (
-# STATIC-BE-NEXT:  0000: 00000010 00000000 017A5200 04784101
-# STATIC-LE-NEXT:  0000: 10000000 00000000 017A5200 04784101
+# STATIC-BE-NEXT:  0000: 00000010 00000000 037A5200 04784101
+# STATIC-LE-NEXT:  0000: 10000000 00000000 037A5200 04784101
 # STATIC-BE-NEXT:  0010: 1B0C0100 00000010 00000018 00000000
 # STATIC-LE-NEXT:  0010: 1B0C0100 10000000 18000000 00000000
 # STATIC-BE-NEXT:  0020: 00000004 00000000
@@ -69,8 +69,8 @@ _proc:
 # PIC-NEXT:   Relocations [
 # PIC-NEXT:   ]
 # PIC-NEXT:   SectionData (
-# PIC-BE-NEXT:  0000: 00000010 00000000 017A5200 04784101
-# PIC-LE-NEXT:  0000: 10000000 00000000 017A5200 04784101
+# PIC-BE-NEXT:  0000: 00000010 00000000 037A5200 04784101
+# PIC-LE-NEXT:  0000: 10000000 00000000 037A5200 04784101
 # PIC-BE-NEXT:  0010: 1B0C0100 00000010 00000018 00000000
 # PIC-LE-NEXT:  0010: 1B0C0100 10000000 18000000 00000000
 # PIC-BE-NEXT:  0020: 00000004 00000000

From 591f9ee07680fb678e4510c94ff65b0d0a48d224 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard@arm.com>
Date: Thu, 19 Jun 2014 15:52:37 +0000
Subject: [PATCH 109/157] Emit DWARF info for all code section in an assembly
 file

Currently, when using llvm as an assembler, DWARF debug information is only
generated for the .text section. This patch modifies this so that DWARF info
is emitted for all executable sections.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211273 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/ADT/MapVector.h       |   9 ++
 include/llvm/MC/MCContext.h        |  29 ++---
 include/llvm/MC/MCObjectStreamer.h |   4 +
 include/llvm/MC/MCStreamer.h       |   2 +
 lib/MC/MCContext.cpp               |  23 ++++
 lib/MC/MCDwarf.cpp                 | 174 ++++++++++++++++++++++-------
 lib/MC/MCParser/AsmParser.cpp      |  17 +--
 lib/MC/MCParser/ELFAsmParser.cpp   |  32 ++++--
 8 files changed, 220 insertions(+), 70 deletions(-)

diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h
index 7fd1570cbf12..8f8b7ba2d7ba 100644
--- a/include/llvm/ADT/MapVector.h
+++ b/include/llvm/ADT/MapVector.h
@@ -123,6 +123,15 @@ class MapVector {
     Map.erase(Pos);
     Vector.pop_back();
   }
+
+  /// \brief Remove the element given by Iterator.
+  /// Returns an iterator to the element following the one which was removed,
+  /// which may be end().
+  typename VectorType::iterator erase(typename VectorType::iterator Iterator) {
+    typename MapType::iterator MapIterator = Map.find(Iterator->first);
+    Map.erase(MapIterator);
+    return Vector.erase(Iterator);
+  }
 };
 
 }
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 2f9b32b984ec..ce3a99bb3fe1 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -11,10 +11,12 @@
 #define LLVM_MC_MCCONTEXT_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
@@ -129,11 +131,10 @@ namespace llvm {
     /// assembly source files.
     unsigned GenDwarfFileNumber;
 
-    /// The default initial text section that we generate dwarf debugging line
-    /// info for when generating dwarf assembly source files.
-    const MCSection *GenDwarfSection;
-    /// Symbols created for the start and end of this section.
-    MCSymbol *GenDwarfSectionStartSym, *GenDwarfSectionEndSym;
+    /// Symbols created for the start and end of each section, used for
+    /// generating the .debug_ranges and .debug_aranges sections.
+    MapVector<const MCSection *, std::pair<MCSymbol *, MCSymbol *> >
+    SectionStartEndSyms;
 
     /// The information gathered from labels that will have dwarf label
     /// entries when generating dwarf assembly source files.
@@ -374,16 +375,18 @@ namespace llvm {
     void setGenDwarfFileNumber(unsigned FileNumber) {
       GenDwarfFileNumber = FileNumber;
     }
-    const MCSection *getGenDwarfSection() { return GenDwarfSection; }
-    void setGenDwarfSection(const MCSection *Sec) { GenDwarfSection = Sec; }
-    MCSymbol *getGenDwarfSectionStartSym() { return GenDwarfSectionStartSym; }
-    void setGenDwarfSectionStartSym(MCSymbol *Sym) {
-      GenDwarfSectionStartSym = Sym;
+    MapVector<const MCSection *, std::pair<MCSymbol *, MCSymbol *> > &
+    getGenDwarfSectionSyms() {
+      return SectionStartEndSyms;
     }
-    MCSymbol *getGenDwarfSectionEndSym() { return GenDwarfSectionEndSym; }
-    void setGenDwarfSectionEndSym(MCSymbol *Sym) {
-      GenDwarfSectionEndSym = Sym;
+    std::pair<MapVector<const MCSection *,
+                        std::pair<MCSymbol *, MCSymbol *> >::iterator,
+              bool>
+    addGenDwarfSection(const MCSection *Sec) {
+      return SectionStartEndSyms.insert(
+          std::make_pair(Sec, std::make_pair(nullptr, nullptr)));
     }
+    void finalizeDwarfSections(MCStreamer &MCOS);
     const std::vector<MCGenDwarfLabelEntry> &getMCGenDwarfLabelEntries() const {
       return MCGenDwarfLabelEntries;
     }
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index e41a8ba63725..a8ba23ac00f3 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -126,6 +126,10 @@ class MCObjectStreamer : public MCStreamer {
   void EmitFill(uint64_t NumBytes, uint8_t FillValue) override;
   void EmitZeros(uint64_t NumBytes) override;
   void FinishImpl() override;
+
+  virtual bool mayHaveInstructions() const {
+    return getCurrentSectionData()->hasInstructions();
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 2a8367afeb8f..1c6039bf6d4d 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -722,6 +722,8 @@ class MCStreamer {
   virtual void FinishImpl() = 0;
   /// Finish - Finish emission of machine code.
   void Finish();
+
+  virtual bool mayHaveInstructions() const { return true; }
 };
 
 /// createNullStreamer - Create a dummy machine code streamer, which does
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index cc98be6ea63f..bd2c4e960ac4 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -340,6 +340,29 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
   return !MCDwarfFiles[FileNumber].Name.empty();
 }
 
+/// finalizeDwarfSections - Emit end symbols for each non-empty code section.
+/// Also remove empty sections from SectionStartEndSyms, to avoid generating
+/// useless debug info for them.
+void MCContext::finalizeDwarfSections(MCStreamer &MCOS) {
+  MCContext &context = MCOS.getContext();
+
+  auto sec = SectionStartEndSyms.begin();
+  while (sec != SectionStartEndSyms.end()) {
+    assert(sec->second.first && "Start symbol must be set by now");
+    MCOS.SwitchSection(sec->first);
+    if (MCOS.mayHaveInstructions()) {
+      MCSymbol *SectionEndSym = context.CreateTempSymbol();
+      MCOS.EmitLabel(SectionEndSym);
+      sec->second.second = SectionEndSym;
+      ++sec;
+    } else {
+      MapVector<const MCSection *, std::pair<MCSymbol *, MCSymbol *> >::iterator
+        to_erase = sec;
+      sec = SectionStartEndSyms.erase(to_erase);
+    }
+  }
+}
+
 void MCContext::FatalError(SMLoc Loc, const Twine &Msg) const {
   // If we have a source manager and a location, use it. Otherwise just
   // use the generic report_fatal_error().
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 1016466839f6..995bee121324 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -518,8 +519,12 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
   MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
   MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
   EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
-  EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
-  EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+  if (MCOS->getContext().getGenDwarfSectionSyms().size() > 1) {
+    EmitAbbrev(MCOS, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4);
+  } else {
+    EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+    EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+  }
   EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
   if (!context.getCompilationDir().empty())
     EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string);
@@ -552,20 +557,14 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
 }
 
 // When generating dwarf for assembly source files this emits the data for
-// .debug_aranges section.  Which contains a header and a table of pairs of
-// PointerSize'ed values for the address and size of section(s) with line table
-// entries (just the default .text in our case) and a terminating pair of zeros.
+// .debug_aranges section. This section contains a header and a table of pairs
+// of PointerSize'ed values for the address and size of section(s) with line
+// table entries.
 static void EmitGenDwarfAranges(MCStreamer *MCOS,
                                 const MCSymbol *InfoSectionSymbol) {
   MCContext &context = MCOS->getContext();
 
-  // Create a symbol at the end of the section that we are creating the dwarf
-  // debugging info to use later in here as part of the expression to calculate
-  // the size of the section for the table.
-  MCOS->SwitchSection(context.getGenDwarfSection());
-  MCSymbol *SectionEndSym = context.CreateTempSymbol();
-  MCOS->EmitLabel(SectionEndSym);
-  context.setGenDwarfSectionEndSym(SectionEndSym);
+  auto &Sections = context.getGenDwarfSectionSyms();
 
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
 
@@ -583,8 +582,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
   Length += Pad;
 
   // Add the size of the pair of PointerSize'ed values for the address and size
-  // of the one default .text section we have in the table.
-  Length += 2 * AddrSize;
+  // of each section we have in the table.
+  Length += 2 * AddrSize * Sections.size();
   // And the pair of terminating zeros.
   Length += 2 * AddrSize;
 
@@ -608,14 +607,21 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
   for(int i = 0; i < Pad; i++)
     MCOS->EmitIntValue(0, 1);
 
-  // Now emit the table of pairs of PointerSize'ed values for the section(s)
-  // address and size, in our case just the one default .text section.
-  const MCExpr *Addr = MCSymbolRefExpr::Create(
-    context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
-  const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
-    *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0);
-  MCOS->EmitValue(Addr, AddrSize);
-  MCOS->EmitAbsValue(Size, AddrSize);
+  // Now emit the table of pairs of PointerSize'ed values for the section
+  // addresses and sizes.
+  for (const auto &sec : Sections) {
+    MCSymbol* StartSymbol = sec.second.first;
+    MCSymbol* EndSymbol = sec.second.second;
+    assert(StartSymbol && "StartSymbol must not be NULL");
+    assert(EndSymbol && "EndSymbol must not be NULL");
+
+    const MCExpr *Addr = MCSymbolRefExpr::Create(
+      StartSymbol, MCSymbolRefExpr::VK_None, context);
+    const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
+      *StartSymbol, *EndSymbol, 0);
+    MCOS->EmitValue(Addr, AddrSize);
+    MCOS->EmitAbsValue(Size, AddrSize);
+  }
 
   // And finally the pair of terminating zeros.
   MCOS->EmitIntValue(0, AddrSize);
@@ -627,7 +633,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
 // DIE and a list of label DIEs.
 static void EmitGenDwarfInfo(MCStreamer *MCOS,
                              const MCSymbol *AbbrevSectionSymbol,
-                             const MCSymbol *LineSectionSymbol) {
+                             const MCSymbol *LineSectionSymbol,
+                             const MCSymbol *RangesSectionSymbol) {
   MCContext &context = MCOS->getContext();
 
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
@@ -674,15 +681,37 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
     MCOS->EmitIntValue(0, 4);
   }
 
-  // AT_low_pc, the first address of the default .text section.
-  const MCExpr *Start = MCSymbolRefExpr::Create(
-    context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
-  MCOS->EmitValue(Start, AddrSize);
+  if (RangesSectionSymbol) {
+    // There are multiple sections containing code, so we must use the
+    // .debug_ranges sections.
 
-  // AT_high_pc, the last address of the default .text section.
-  const MCExpr *End = MCSymbolRefExpr::Create(
-    context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context);
-  MCOS->EmitValue(End, AddrSize);
+    // AT_ranges, the 4 byte offset from the start of the .debug_ranges section
+    // to the address range list for this compilation unit.
+    MCOS->EmitSymbolValue(RangesSectionSymbol, 4);
+  } else {
+    // If we only have one non-empty code section, we can use the simpler
+    // AT_low_pc and AT_high_pc attributes.
+
+    // Find the first (and only) non-empty text section
+    auto &Sections = context.getGenDwarfSectionSyms();
+    const auto TextSection = Sections.begin();
+    assert(TextSection != Sections.end() && "No text section found");
+
+    MCSymbol* StartSymbol = TextSection->second.first;
+    MCSymbol* EndSymbol = TextSection->second.second;
+    assert(StartSymbol && "StartSymbol must not be NULL");
+    assert(EndSymbol && "EndSymbol must not be NULL");
+
+    // AT_low_pc, the first address of the default .text section.
+    const MCExpr *Start = MCSymbolRefExpr::Create(
+        StartSymbol, MCSymbolRefExpr::VK_None, context);
+    MCOS->EmitValue(Start, AddrSize);
+
+    // AT_high_pc, the last address of the default .text section.
+    const MCExpr *End = MCSymbolRefExpr::Create(
+      EndSymbol, MCSymbolRefExpr::VK_None, context);
+    MCOS->EmitValue(End, AddrSize);
+  }
 
   // AT_name, the name of the source file.  Reconstruct from the first directory
   // and file table entries.
@@ -766,13 +795,51 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
   MCOS->EmitLabel(InfoEnd);
 }
 
+// When generating dwarf for assembly source files this emits the data for
+// .debug_ranges section. We only emit one range list, which spans all of the
+// executable sections of this file.
+static void EmitGenDwarfRanges(MCStreamer *MCOS) {
+  MCContext &context = MCOS->getContext();
+  auto &Sections = context.getGenDwarfSectionSyms();
+
+  const MCAsmInfo *AsmInfo = context.getAsmInfo();
+  int AddrSize = AsmInfo->getPointerSize();
+
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
+
+  for (const auto sec : Sections) {
+
+    MCSymbol* StartSymbol = sec.second.first;
+    MCSymbol* EndSymbol = sec.second.second;
+    assert(StartSymbol && "StartSymbol must not be NULL");
+    assert(EndSymbol && "EndSymbol must not be NULL");
+
+    // Emit a base address selection entry for the start of this section
+    const MCExpr *SectionStartAddr = MCSymbolRefExpr::Create(
+      StartSymbol, MCSymbolRefExpr::VK_None, context);
+    MCOS->EmitFill(AddrSize, 0xFF);
+    MCOS->EmitValue(SectionStartAddr, AddrSize);
+
+    // Emit a range list entry spanning this section
+    const MCExpr *SectionSize = MakeStartMinusEndExpr(*MCOS,
+      *StartSymbol, *EndSymbol, 0);
+    MCOS->EmitIntValue(0, AddrSize);
+    MCOS->EmitAbsValue(SectionSize, AddrSize);
+  }
+
+  // Emit end of list entry
+  MCOS->EmitIntValue(0, AddrSize);
+  MCOS->EmitIntValue(0, AddrSize);
+}
+
 //
 // When generating dwarf for assembly source files this emits the Dwarf
 // sections.
 //
 void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
-  // Create the dwarf sections in this order (.debug_line already created).
   MCContext &context = MCOS->getContext();
+
+  // Create the dwarf sections in this order (.debug_line already created).
   const MCAsmInfo *AsmInfo = context.getAsmInfo();
   bool CreateDwarfSectionSymbols =
       AsmInfo->doesDwarfUseRelocationsAcrossSections();
@@ -781,6 +848,22 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
     LineSectionSymbol = MCOS->getDwarfLineTableSymbol(0);
   MCSymbol *AbbrevSectionSymbol = nullptr;
   MCSymbol *InfoSectionSymbol = nullptr;
+  MCSymbol *RangesSectionSymbol = NULL;
+
+  // Create end symbols for each section, and remove empty sections
+  MCOS->getContext().finalizeDwarfSections(*MCOS);
+
+  // If there are no sections to generate debug info for, we don't need
+  // to do anything
+  if (MCOS->getContext().getGenDwarfSectionSyms().empty())
+    return;
+
+  // We only need to use the .debug_ranges section if we have multiple
+  // code sections.
+  const bool UseRangesSection =
+      MCOS->getContext().getGenDwarfSectionSyms().size() > 1;
+  CreateDwarfSectionSymbols |= UseRangesSection;
+
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
   if (CreateDwarfSectionSymbols) {
     InfoSectionSymbol = context.CreateTempSymbol();
@@ -791,20 +874,30 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
     AbbrevSectionSymbol = context.CreateTempSymbol();
     MCOS->EmitLabel(AbbrevSectionSymbol);
   }
-  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+  if (UseRangesSection) {
+    MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
+    if (CreateDwarfSectionSymbols) {
+      RangesSectionSymbol = context.CreateTempSymbol();
+      MCOS->EmitLabel(RangesSectionSymbol);
+    }
+  }
 
-  // If there are no line table entries then do not emit any section contents.
-  if (!context.hasMCLineSections())
-    return;
+  assert((RangesSectionSymbol != NULL) || !UseRangesSection);
+
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
 
   // Output the data for .debug_aranges section.
   EmitGenDwarfAranges(MCOS, InfoSectionSymbol);
 
+  if (UseRangesSection)
+    EmitGenDwarfRanges(MCOS);
+
   // Output the data for .debug_abbrev section.
   EmitGenDwarfAbbrev(MCOS);
 
   // Output the data for .debug_info section.
-  EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol);
+  EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol,
+                   RangesSectionSymbol);
 }
 
 //
@@ -815,12 +908,13 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
 //
 void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
                                      SourceMgr &SrcMgr, SMLoc &Loc) {
-  // We won't create dwarf labels for temporary symbols or symbols not in
-  // the default text.
+  // We won't create dwarf labels for temporary symbols.
   if (Symbol->isTemporary())
     return;
   MCContext &context = MCOS->getContext();
-  if (context.getGenDwarfSection() != MCOS->getCurrentSection().first)
+  // We won't create dwarf labels for symbols in sections that we are not
+  // generating debug info for.
+  if (!context.getGenDwarfSectionSyms().count(MCOS->getCurrentSection().first))
     return;
 
   // The dwarf label's name does not have the symbol name's leading
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 1ba02d181dbb..50375d37d841 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -633,10 +633,12 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   // If we are generating dwarf for assembly source files save the initial text
   // section and generate a .file directive.
   if (getContext().getGenDwarfForAssembly()) {
-    getContext().setGenDwarfSection(getStreamer().getCurrentSection().first);
     MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
     getStreamer().EmitLabel(SectionStartSym);
-    getContext().setGenDwarfSectionStartSym(SectionStartSym);
+    auto InsertResult = getContext().addGenDwarfSection(
+        getStreamer().getCurrentSection().first);
+    assert(InsertResult.second && ".text section should not have debug info yet");
+    InsertResult.first->second.first = SectionStartSym;
     getContext().setGenDwarfFileNumber(getStreamer().EmitDwarfFileDirective(
         0, StringRef(), getContext().getMainFileName()));
   }
@@ -1592,12 +1594,11 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) {
     printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
   }
 
-  // If we are generating dwarf for assembly source files and the current
-  // section is the initial text section then generate a .loc directive for
-  // the instruction.
+  // If we are generating dwarf for the current section then generate a .loc
+  // directive for the instruction.
   if (!HadError && getContext().getGenDwarfForAssembly() &&
-      getContext().getGenDwarfSection() ==
-          getStreamer().getCurrentSection().first) {
+      getContext().getGenDwarfSectionSyms().count(
+        getStreamer().getCurrentSection().first)) {
 
     unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
 
@@ -2029,7 +2030,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
           break;
 
       if (FAI >= NParameters) {
-	assert(M && "expected macro to be defined");
+    assert(M && "expected macro to be defined");
         Error(IDLoc,
               "parameter named '" + FA.Name + "' does not exist for macro '" +
               M->Name + "'");
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 78bb6c7aad0e..98b2b3bd60b2 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -150,7 +150,7 @@ class ELFAsmParser : public MCAsmParserExtension {
 
 private:
   bool ParseSectionName(StringRef &SectionName);
-  bool ParseSectionArguments(bool IsPush);
+  bool ParseSectionArguments(bool IsPush, SMLoc loc);
   unsigned parseSunStyleSectionFlags();
 };
 
@@ -382,7 +382,7 @@ unsigned ELFAsmParser::parseSunStyleSectionFlags() {
 bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
   getStreamer().PushSection();
 
-  if (ParseSectionArguments(/*IsPush=*/true)) {
+  if (ParseSectionArguments(/*IsPush=*/true, loc)) {
     getStreamer().PopSection();
     return true;
   }
@@ -397,11 +397,11 @@ bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
 }
 
 // FIXME: This is a work in progress.
-bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
-  return ParseSectionArguments(/*IsPush=*/false);
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc loc) {
+  return ParseSectionArguments(/*IsPush=*/false, loc);
 }
 
-bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
+bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
   StringRef SectionName;
 
   if (ParseSectionName(SectionName))
@@ -545,10 +545,24 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
   }
 
   SectionKind Kind = computeSectionKind(Flags, Size);
-  getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
-                                                         Flags, Kind, Size,
-                                                         GroupName),
-                              Subsection);
+  const MCSection *ELFSection = getContext().getELFSection(
+      SectionName, Type, Flags, Kind, Size, GroupName);
+  getStreamer().SwitchSection(ELFSection, Subsection);
+
+  if (getContext().getGenDwarfForAssembly()) {
+    auto &Sections = getContext().getGenDwarfSectionSyms();
+    auto InsertResult = Sections.insert(
+        std::make_pair(ELFSection, std::make_pair(nullptr, nullptr)));
+    if (InsertResult.second) {
+      if (getContext().getDwarfVersion() <= 2)
+        Error(loc, "DWARF2 only supports one section per compilation unit");
+
+      MCSymbol *SectionStartSymbol = getContext().CreateTempSymbol();
+      getStreamer().EmitLabel(SectionStartSymbol);
+      InsertResult.first->second.first = SectionStartSymbol;
+    }
+  }
+
   return false;
 }
 

From 761f33167f2ae180dddcf48abcfcc3ead0c2d219 Mon Sep 17 00:00:00 2001
From: Zachary Turner <zturner@google.com>
Date: Thu, 19 Jun 2014 16:17:42 +0000
Subject: [PATCH 110/157] Kill the LLVM global lock.

This patch removes the LLVM global lock, and updates all existing
users of the global lock to use their own mutex.    None of the
existing users of the global lock were protecting code that was
mutually exclusive with any of the other users of the global
lock, so its purpose was not being met.

Reviewed by: rnk

Differential Revision: http://reviews.llvm.org/D4142

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211277 91177308-0d34-0410-b5e6-96231b3b80d8
---
 docs/ProgrammersManual.rst       |  5 -----
 include/llvm/Support/Threading.h |  8 --------
 lib/Support/ManagedStatic.cpp    | 23 ++++++++++++++++++++---
 lib/Support/Threading.cpp        | 12 ------------
 lib/Support/Timer.cpp            |  4 ++--
 5 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
index 4973e5c66719..df3795a34d33 100644
--- a/docs/ProgrammersManual.rst
+++ b/docs/ProgrammersManual.rst
@@ -2226,11 +2226,6 @@ Note that, because no other threads are allowed to issue LLVM API calls before
 ``llvm_start_multithreaded()`` returns, it is possible to have
 ``ManagedStatic``\ s of ``llvm::sys::Mutex``\ s.
 
-The ``llvm_acquire_global_lock()`` and ``llvm_release_global_lock`` APIs provide
-access to the global lock used to implement the double-checked locking for lazy
-initialization.  These should only be used internally to LLVM, and only if you
-know what you're doing!
-
 .. _llvmcontext:
 
 Achieving Isolation with ``LLVMContext``
diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h
index a7e8774558d5..d84d11867958 100644
--- a/include/llvm/Support/Threading.h
+++ b/include/llvm/Support/Threading.h
@@ -33,14 +33,6 @@ namespace llvm {
   /// mode or not.
   bool llvm_is_multithreaded();
 
-  /// acquire_global_lock - Acquire the global lock.  This is a no-op if called
-  /// before llvm_start_multithreaded().
-  void llvm_acquire_global_lock();
-
-  /// release_global_lock - Release the global lock.  This is a no-op if called
-  /// before llvm_start_multithreaded().
-  void llvm_release_global_lock();
-
   /// llvm_execute_on_thread - Execute the given \p UserFn on a separate
   /// thread, passing it the provided \p UserData.
   ///
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 6a1c2a545a8d..9d7e99f97fb5 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -15,15 +15,32 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Atomic.h"
 #include <cassert>
+#include <mutex>
 using namespace llvm;
 
 static const ManagedStaticBase *StaticList = nullptr;
 
+// ManagedStatics can get created during execution of static constructors.  As a
+// result, we cannot use a global static std::mutex object for the lock since it
+// may not have been constructed.  Instead, we do a call-once initialization of
+// a pointer to a mutex.
+static std::once_flag MutexInitializationFlag;
+static std::recursive_mutex* ManagedStaticMutex = nullptr;
+
+// Not all supported platforms (in particular VS2012) have thread-safe function
+// static initialization, so roll our own.
+static std::recursive_mutex& GetManagedStaticMutex() {
+  std::call_once(MutexInitializationFlag,
+      []() { ManagedStaticMutex = new std::recursive_mutex(); } );
+
+  return *ManagedStaticMutex;
+}
+
 void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
                                               void (*Deleter)(void*)) const {
   assert(Creator);
   if (llvm_is_multithreaded()) {
-    llvm_acquire_global_lock();
+    std::lock_guard<std::recursive_mutex> Lock(GetManagedStaticMutex());
 
     if (!Ptr) {
       void* tmp = Creator();
@@ -43,8 +60,6 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
       Next = StaticList;
       StaticList = this;
     }
-
-    llvm_release_global_lock();
   } else {
     assert(!Ptr && !DeleterFn && !Next &&
            "Partially initialized ManagedStatic!?");
@@ -75,6 +90,8 @@ void ManagedStaticBase::destroy() const {
 
 /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
 void llvm::llvm_shutdown() {
+  std::lock_guard<std::recursive_mutex> Lock(GetManagedStaticMutex());
+
   while (StaticList)
     StaticList->destroy();
 
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 1acfa79b11d5..2358dde0e6c5 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -21,13 +21,10 @@ using namespace llvm;
 
 static bool multithreaded_mode = false;
 
-static sys::Mutex* global_lock = nullptr;
-
 bool llvm::llvm_start_multithreaded() {
 #if LLVM_ENABLE_THREADS != 0
   assert(!multithreaded_mode && "Already multithreaded!");
   multithreaded_mode = true;
-  global_lock = new sys::Mutex(true);
 
   // We fence here to ensure that all initialization is complete BEFORE we
   // return from llvm_start_multithreaded().
@@ -47,7 +44,6 @@ void llvm::llvm_stop_multithreaded() {
   sys::MemoryFence();
 
   multithreaded_mode = false;
-  delete global_lock;
 #endif
 }
 
@@ -55,14 +51,6 @@ bool llvm::llvm_is_multithreaded() {
   return multithreaded_mode;
 }
 
-void llvm::llvm_acquire_global_lock() {
-  if (multithreaded_mode) global_lock->acquire();
-}
-
-void llvm::llvm_release_global_lock() {
-  if (multithreaded_mode) global_lock->release();
-}
-
 #if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
 #include <pthread.h>
 
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 61465ae5e8be..210bda754e74 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -84,14 +85,13 @@ static TimerGroup *getDefaultTimerGroup() {
   sys::MemoryFence();
   if (tmp) return tmp;
   
-  llvm_acquire_global_lock();
+  sys::SmartScopedLock<true> Lock(*TimerLock);
   tmp = DefaultTimerGroup;
   if (!tmp) {
     tmp = new TimerGroup("Miscellaneous Ungrouped Timers");
     sys::MemoryFence();
     DefaultTimerGroup = tmp;
   }
-  llvm_release_global_lock();
 
   return tmp;
 }

From b7509c63e9e5739ce2fec27579648df3ff2ac249 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard@arm.com>
Date: Thu, 19 Jun 2014 16:35:19 +0000
Subject: [PATCH 111/157] Tests for r211273

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211279 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/MC/ARM/dwarf-asm-multiple-sections.s   | 79 +++++++++++++++++++++
 test/MC/ARM/dwarf-asm-no-code.s             | 27 +++++++
 test/MC/ARM/dwarf-asm-nonstandard-section.s | 57 +++++++++++++++
 test/MC/ARM/dwarf-asm-single-section.s      | 56 +++++++++++++++
 4 files changed, 219 insertions(+)
 create mode 100644 test/MC/ARM/dwarf-asm-multiple-sections.s
 create mode 100644 test/MC/ARM/dwarf-asm-no-code.s
 create mode 100644 test/MC/ARM/dwarf-asm-nonstandard-section.s
 create mode 100644 test/MC/ARM/dwarf-asm-single-section.s

diff --git a/test/MC/ARM/dwarf-asm-multiple-sections.s b/test/MC/ARM/dwarf-asm-multiple-sections.s
new file mode 100644
index 000000000000..5e64fcc023ff
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-multiple-sections.s
@@ -0,0 +1,79 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 2 2>&1 | FileCheck -check-prefix VERSION %s
+// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 1 2>&1 | FileCheck -check-prefix DWARF1 %s
+// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 5 2>&1 | FileCheck -check-prefix DWARF5 %s
+  .section .text, "ax"
+a:
+  mov r0, r0
+
+  .section foo, "ax"
+b:
+  mov r1, r1
+
+// DWARF: .debug_abbrev contents:
+// DWARF: Abbrev table for offset: 0x00000000
+// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_ranges    DW_FORM_data4
+// DWARF:         DW_AT_name      DW_FORM_string
+// DWARF:         DW_AT_comp_dir  DW_FORM_string
+// DWARF:         DW_AT_producer  DW_FORM_string
+// DWARF:         DW_AT_language  DW_FORM_data2
+
+// DWARF: .debug_info contents:
+// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
+// CHECK-NOT-DWARF: DW_TAG_
+// DWARF: DW_AT_ranges [DW_FORM_data4]      (0x00000000)
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("a")
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("b")
+
+
+// DWARF: .debug_aranges contents:
+// DWARF-NEXT: Address Range Header: length = 0x00000024, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+
+
+// DWARF: .debug_line contents:
+// DWARF:      0x0000000000000000      9      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004      9      0      1   0   0  is_stmt end_sequence
+// DWARF-NEXT: 0x0000000000000000     13      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004     13      0      1   0   0  is_stmt end_sequence
+
+
+// DWARF: .debug_ranges contents:
+// DWARF: 00000000 ffffffff 00000000
+// DWARF: 00000000 00000000 00000004
+// DWARF: 00000000 ffffffff 00000000
+// DWARF: 00000000 00000000 00000004
+// DWARF: 00000000 <End of list>
+
+
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
+// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .debug_ranges
+// RELOC-NEXT: 0000004f R_ARM_ABS32 .text
+// RELOC-NEXT: 00000061 R_ARM_ABS32 foo
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+// RELOC-NEXT: 00000004 R_ARM_ABS32 .text
+// RELOC-NEXT: 00000014 R_ARM_ABS32 foo
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .text
+// RELOC-NEXT: 00000018 R_ARM_ABS32 foo
+
+
+// VERSION: {{.*}} error: DWARF2 only supports one section per compilation unit
+
+// DWARF1: Dwarf version 1 is not supported.
+// DWARF5: Dwarf version 5 is not supported.
diff --git a/test/MC/ARM/dwarf-asm-no-code.s b/test/MC/ARM/dwarf-asm-no-code.s
new file mode 100644
index 000000000000..7d06a4190091
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-no-code.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+// If there is no code in an assembly file, no debug info is produced
+
+.section .data, "aw"
+a:
+.long 42
+
+// DWARF: .debug_abbrev contents:
+// DWARF-NEXT: < EMPTY >
+
+// DWARF: .debug_info contents:
+
+// DWARF: .debug_aranges contents:
+
+// DWARF: .debug_line contents:
+
+// DWARF: .debug_ranges contents:
+
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_info]:
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_aranges]:
diff --git a/test/MC/ARM/dwarf-asm-nonstandard-section.s b/test/MC/ARM/dwarf-asm-nonstandard-section.s
new file mode 100644
index 000000000000..adf92bab18d2
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-nonstandard-section.s
@@ -0,0 +1,57 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+  .section foo, "ax"
+b:
+  mov r1, r1
+
+// DWARF: .debug_abbrev contents:
+// DWARF: Abbrev table for offset: 0x00000000
+// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_low_pc    DW_FORM_addr
+// DWARF:         DW_AT_high_pc   DW_FORM_addr
+// DWARF:         DW_AT_name      DW_FORM_string
+// DWARF:         DW_AT_comp_dir  DW_FORM_string
+// DWARF:         DW_AT_producer  DW_FORM_string
+// DWARF:         DW_AT_language  DW_FORM_data2
+
+// DWARF: .debug_info contents:
+// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
+// DWARF-NOT:         DW_TAG_
+// DWARF:               DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+// DWARF:               DW_AT_high_pc [DW_FORM_addr]      (0x0000000000000004)
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("b")
+
+
+// DWARF: .debug_aranges contents:
+// DWARF-NEXT: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+
+
+// DWARF: .debug_line contents:
+// DWARF:      0x0000000000000000      7      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004      7      0      1   0   0  is_stmt end_sequence
+
+
+// DWARF: .debug_ranges contents:
+// DWARF-NOT: {{0-9a-f}}
+// DWARF: .debug_pubnames contents:
+
+
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
+// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
+// RELOC-NEXT: 00000010 R_ARM_ABS32 foo
+// RELOC-NEXT: 00000014 R_ARM_ABS32 foo
+// RELOC-NEXT: 00000053 R_ARM_ABS32 foo
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info
+// RELOC-NEXT: 00000010 R_ARM_ABS32 foo
diff --git a/test/MC/ARM/dwarf-asm-single-section.s b/test/MC/ARM/dwarf-asm-single-section.s
new file mode 100644
index 000000000000..d69dfab5dee5
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-single-section.s
@@ -0,0 +1,56 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+  .section .text, "ax"
+a:
+  mov r0, r0
+
+
+// DWARF: .debug_abbrev contents:
+// DWARF: Abbrev table for offset: 0x00000000
+// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_low_pc    DW_FORM_addr
+// DWARF:         DW_AT_high_pc   DW_FORM_addr
+// DWARF:         DW_AT_name      DW_FORM_string
+// DWARF:         DW_AT_comp_dir  DW_FORM_string
+// DWARF:         DW_AT_producer  DW_FORM_string
+// DWARF:         DW_AT_language  DW_FORM_data2
+
+// DWARF: .debug_info contents:
+// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
+// CHECK-NOT-DWARF: DW_TAG_
+// DWARF:               DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+// DWARF:               DW_AT_high_pc [DW_FORM_addr]      (0x0000000000000004)
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("a")
+
+
+// DWARF: .debug_aranges contents:
+// DWARF-NEXT: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+
+// DWARF: .debug_line contents:
+// DWARF:      0x0000000000000000      7      0      1   0   0 is_stmt
+// DWARF-NEXT: 0x0000000000000004      7      0      1   0   0 is_stmt end_sequence
+
+
+// DWARF: .debug_ranges contents:
+// DWARF-NOT: {{0-9a-f}}
+// DWARF: .debug_pubnames contents:
+
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
+// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .text
+// RELOC-NEXT: 00000014 R_ARM_ABS32 .text
+// RELOC-NEXT: 00000053 R_ARM_ABS32 .text
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .text

From e4d0a5ec1841ac5a407c3a07b62749923dda74c2 Mon Sep 17 00:00:00 2001
From: Jingyue Wu <jingyue@google.com>
Date: Thu, 19 Jun 2014 16:50:16 +0000
Subject: [PATCH 112/157] [ValueTracking] Extend range metadata to call/invoke

Summary:
With this patch, range metadata can be added to call/invoke including
IntrinsicInst. Previously, it could only be added to load.

Rename computeKnownBitsLoad to computeKnownBitsFromRangeMetadata because
range metadata is not only used by load.

Update the language reference to reflect this change.

Test Plan:
Add several tests in range-2.ll to confirm the verifier is happy with
having range metadata on call/invoke.

Add two tests in AddOverFlow.ll to confirm annotating range metadata to
call/invoke can benefit InstCombine.

Reviewers: meheff, nlewycky, reames, hfinkel, eliben

Reviewed By: eliben

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D4187

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211281 91177308-0d34-0410-b5e6-96231b3b80d8
---
 docs/LangRef.rst                           | 16 +++++----
 include/llvm/Analysis/ValueTracking.h      |  5 ++-
 lib/Analysis/ValueTracking.cpp             | 17 ++++++---
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp  |  2 +-
 lib/IR/Verifier.cpp                        |  3 +-
 test/Transforms/InstCombine/AddOverFlow.ll | 38 ++++++++++++++++++++
 test/Transforms/InstCombine/add2.ll        | 41 ++++++++++++++++++++++
 test/Verifier/range-1.ll                   |  2 +-
 test/Verifier/range-2.ll                   | 30 ++++++++++++++++
 9 files changed, 138 insertions(+), 16 deletions(-)

diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index cb94d3967b13..b75058fa9a5f 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -2749,11 +2749,12 @@ number representing the maximum relative error, for example:
 '``range``' Metadata
 ^^^^^^^^^^^^^^^^^^^^
 
-``range`` metadata may be attached only to loads of integer types. It
-expresses the possible ranges the loaded value is in. The ranges are
-represented with a flattened list of integers. The loaded value is known
-to be in the union of the ranges defined by each consecutive pair. Each
-pair has the following properties:
+``range`` metadata may be attached only to ``load``, ``call`` and ``invoke`` of
+integer types. It expresses the possible ranges the loaded value or the value
+returned by the called function at this call site is in. The ranges are
+represented with a flattened list of integers. The loaded value or the value
+returned is known to be in the union of the ranges defined by each consecutive
+pair. Each pair has the following properties:
 
 -  The type must match the type loaded by the instruction.
 -  The pair ``a,b`` represents the range ``[a,b)``.
@@ -2771,8 +2772,9 @@ Examples:
 
       %a = load i8* %x, align 1, !range !0 ; Can only be 0 or 1
       %b = load i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1
-      %c = load i8* %z, align 1, !range !2 ; Can only be 0, 1, 3, 4 or 5
-      %d = load i8* %z, align 1, !range !3 ; Can only be -2, -1, 3, 4 or 5
+      %c = call i8 @foo(),       !range !2 ; Can only be 0, 1, 3, 4 or 5
+      %d = invoke i8 @bar() to label %cont
+             unwind label %lpad, !range !3 ; Can only be -2, -1, 3, 4 or 5
     ...
     !0 = metadata !{ i8 0, i8 2 }
     !1 = metadata !{ i8 255, i8 2 }
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index ce7896738d8c..83b5408fb1c2 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -37,7 +37,10 @@ namespace llvm {
   /// for all of the elements in the vector.
   void computeKnownBits(Value *V,  APInt &KnownZero, APInt &KnownOne,
                         const DataLayout *TD = nullptr, unsigned Depth = 0);
-  void computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero);
+  /// Compute known bits from the range metadata.
+  /// \p KnownZero the set of bits that are known to be zero
+  void computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
+                                         APInt &KnownZero);
 
   /// ComputeSignBit - Determine whether the sign bit is known to be zero or
   /// one.  Convenience wrapper around computeKnownBits.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 4f4875357828..9de945ed7016 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -188,7 +188,8 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
     KnownOne.setBit(BitWidth - 1);
 }
 
-void llvm::computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
+void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
+                                             APInt &KnownZero) {
   unsigned BitWidth = KnownZero.getBitWidth();
   unsigned NumRanges = Ranges.getNumOperands() / 2;
   assert(NumRanges >= 1);
@@ -338,7 +339,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   default: break;
   case Instruction::Load:
     if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
-      computeKnownBitsLoad(*MD, KnownZero);
+      computeKnownBitsFromRangeMetadata(*MD, KnownZero);
     break;
   case Instruction::And: {
     // If either the LHS or the RHS are Zero, the result is zero.
@@ -733,6 +734,12 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     break;
   }
   case Instruction::Call:
+  case Instruction::Invoke:
+    if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
+      computeKnownBitsFromRangeMetadata(*MD, KnownZero);
+    // If a range metadata is attached to this IntrinsicInst, intersect the
+    // explicit range specified by the metadata and the implicit range of
+    // the intrinsic.
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
       switch (II->getIntrinsicID()) {
       default: break;
@@ -742,16 +749,16 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
         // If this call is undefined for 0, the result will be less than 2^n.
         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
           LowBits -= 1;
-        KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
         break;
       }
       case Intrinsic::ctpop: {
         unsigned LowBits = Log2_32(BitWidth)+1;
-        KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
         break;
       }
       case Intrinsic::x86_sse42_crc32_64_64:
-        KnownZero = APInt::getHighBitsSet(64, 32);
+        KnownZero |= APInt::getHighBitsSet(64, 32);
         break;
       }
     }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 639eb462e7ff..5923f0e208e1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2084,7 +2084,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       unsigned MemBits = VT.getScalarType().getSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
     } else if (const MDNode *Ranges = LD->getRanges()) {
-      computeKnownBitsLoad(*Ranges, KnownZero);
+      computeKnownBitsFromRangeMetadata(*Ranges, KnownZero);
     }
     break;
   }
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index d1c7f7d25c39..ad3c29c564e7 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -2233,7 +2233,8 @@ void Verifier::visitInstruction(Instruction &I) {
   }
 
   MDNode *MD = I.getMetadata(LLVMContext::MD_range);
-  Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I);
+  Assert1(!MD || isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
+          "Ranges are only for loads, calls and invokes!", &I);
 
   InstsInThisBlock.insert(&I);
 }
diff --git a/test/Transforms/InstCombine/AddOverFlow.ll b/test/Transforms/InstCombine/AddOverFlow.ll
index 70178ec83b6e..8f3d429c8d60 100644
--- a/test/Transforms/InstCombine/AddOverFlow.ll
+++ b/test/Transforms/InstCombine/AddOverFlow.ll
@@ -34,6 +34,44 @@ define i16 @zero_sign_bit2(i16 %a, i16 %b) {
   ret i16 %3
 }
 
+declare i16 @bounded(i16 %input);
+declare i32 @__gxx_personality_v0(...);
+!0 = metadata !{i16 0, i16 32768} ; [0, 32767]
+!1 = metadata !{i16 0, i16 32769} ; [0, 32768]
+
+define i16 @add_bounded_values(i16 %a, i16 %b) {
+; CHECK-LABEL: @add_bounded_values(
+entry:
+  %c = call i16 @bounded(i16 %a), !range !0
+  %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !0
+cont:
+; %c and %d are in [0, 32767]. Therefore, %c + %d doesn't unsigned overflow.
+  %e = add i16 %c, %d
+; CHECK: add nuw i16 %c, %d
+  ret i16 %e
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i16 42
+}
+
+define i16 @add_bounded_values_2(i16 %a, i16 %b) {
+; CHECK-LABEL: @add_bounded_values_2(
+entry:
+  %c = call i16 @bounded(i16 %a), !range !1
+  %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !1
+cont:
+; Similar to add_bounded_values, but %c and %d are in [0, 32768]. Therefore,
+; %c + %d may unsigned overflow and we cannot add NUW.
+  %e = add i16 %c, %d
+; CHECK: add i16 %c, %d
+  ret i16 %e
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i16 42
+}
+
 ; CHECK-LABEL: @ripple_nsw1
 ; CHECK: add nsw i16 %a, %b
 define i16 @ripple_nsw1(i16 %x, i16 %y) {
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index 0328c4ff6e3f..821fce0dabcc 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -217,3 +217,44 @@ define i32 @mul_add_to_mul_6(i32 %x, i32 %y) {
 ; CHECK-NEXT: %add = mul nsw i32 %mul1, 6
 ; CHECK-NEXT: ret i32 %add
 }
+
+; This test and the next test verify that when a range metadata is attached to
+; llvm.cttz, ValueTracking correctly intersects the range specified by the
+; metadata and the range implied by the intrinsic.
+;
+; In this test, the range specified by the metadata is more strict. Therefore,
+; ValueTracking uses that range.
+define i16 @add_cttz(i16 %a) {
+; CHECK-LABEL: @add_cttz(
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in [0, 8).
+  ; Intersecting these ranges, we know the value returned is in [0, 8).
+  ; Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 1000 ; decimal -8
+  ; to
+  ;     or  %cttz, 1111 1111 1111 1000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !0
+  %b = add i16 %cttz, -8
+; CHECK: or i16 %cttz, -8
+  ret i16 %b
+}
+declare i16 @llvm.cttz.i16(i16, i1)
+!0 = metadata !{i16 0, i16 8}
+
+; Similar to @add_cttz, but in this test, the range implied by the
+; intrinsic is more strict. Therefore, ValueTracking uses that range.
+define i16 @add_cttz_2(i16 %a) {
+; CHECK-LABEL: @add_cttz_2(
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in
+  ; [0, 32). Intersecting these ranges, we know the value returned is in
+  ; [0, 16). Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 0000 ; decimal -16
+  ; to
+  ;     or  %cttz, 1111 1111 1111 0000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !1
+  %b = add i16 %cttz, -16
+; CHECK: or i16 %cttz, -16
+  ret i16 %b
+}
+!1 = metadata !{i16 0, i16 32}
diff --git a/test/Verifier/range-1.ll b/test/Verifier/range-1.ll
index b6a75d13bba0..f15ca3f74065 100644
--- a/test/Verifier/range-1.ll
+++ b/test/Verifier/range-1.ll
@@ -6,7 +6,7 @@ entry:
   ret void
 }
 !0 = metadata !{i8 0, i8 1}
-; CHECK: Ranges are only for loads!
+; CHECK: Ranges are only for loads, calls and invokes!
 ; CHECK-NEXT: store i8 0, i8* %x, align 1, !range !0
 
 define i8 @f2(i8* %x) {
diff --git a/test/Verifier/range-2.ll b/test/Verifier/range-2.ll
index 8d85d1915195..1d2e0575d76a 100644
--- a/test/Verifier/range-2.ll
+++ b/test/Verifier/range-2.ll
@@ -34,3 +34,33 @@ entry:
   ret i8 %y
 }
 !4 = metadata !{i8 -1, i8 0, i8 1, i8 -2}
+
+; We can annotate the range of the return value of a CALL.
+define void @call_all(i8* %x) {
+entry:
+  %v1 = call i8 @f1(i8* %x), !range !0
+  %v2 = call i8 @f2(i8* %x), !range !1
+  %v3 = call i8 @f3(i8* %x), !range !2
+  %v4 = call i8 @f4(i8* %x), !range !3
+  %v5 = call i8 @f5(i8* %x), !range !4
+  ret void
+}
+
+; We can annotate the range of the return value of an INVOKE.
+define void @invoke_all(i8* %x) {
+entry:
+  %v1 = invoke i8 @f1(i8* %x) to label %cont unwind label %lpad, !range !0
+  %v2 = invoke i8 @f2(i8* %x) to label %cont unwind label %lpad, !range !1
+  %v3 = invoke i8 @f3(i8* %x) to label %cont unwind label %lpad, !range !2
+  %v4 = invoke i8 @f4(i8* %x) to label %cont unwind label %lpad, !range !3
+  %v5 = invoke i8 @f5(i8* %x) to label %cont unwind label %lpad, !range !4
+
+cont:
+  ret void
+
+lpad:
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret void
+}
+declare i32 @__gxx_personality_v0(...)

From 36a321160e61149088f306c3605a9ad5fdd67e0a Mon Sep 17 00:00:00 2001
From: Alp Toker <alp@nuanti.com>
Date: Thu, 19 Jun 2014 17:15:36 +0000
Subject: [PATCH 113/157] MCNullStreamer: assign file IDs to resolve crashes
 and errors

Use the MCStreamer base implementations for file ID tracking instead of
overriding them as no-ops.

Avoids assertions when streaming Dwarf debug info, and fixes ASM parsing of loc
and file directives.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211282 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/MC/MCNullStreamer.cpp          | 10 +---------
 test/DebugInfo/global.ll           |  3 +++
 test/MC/AsmParser/directive_file.s |  1 +
 test/MC/AsmParser/directive_line.s |  1 +
 test/MC/AsmParser/directive_loc.s  |  1 +
 5 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 4f2740ed3ae9..5ab2829a6ba3 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -81,15 +81,7 @@ namespace {
                            unsigned char Value = 0) override { return false; }
 
     void EmitFileDirective(StringRef Filename) override {}
-    unsigned EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
-                                    StringRef Filename,
-                                    unsigned CUID = 0) override {
-      return 0;
-    }
-    void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
-                               unsigned Column, unsigned Flags,
-                               unsigned Isa, unsigned Discriminator,
-                               StringRef FileName) override {}
+
     void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo&) override {}
 
     void EmitBundleAlignMode(unsigned AlignPow2) override {}
diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/global.ll
index c515114fd50b..3c97f0cb2279 100644
--- a/test/DebugInfo/global.ll
+++ b/test/DebugInfo/global.ll
@@ -3,6 +3,9 @@
 ; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
 ; RUN: llvm-dwarfdump %t | FileCheck %s
 
+; Also test that the null streamer doesn't crash with debug info.
+; RUN: %llc_dwarf -O0 -filetype=null < %s
+
 ; generated from the following source compiled to bitcode with clang -g -O1
 ; static int i;
 ; int main() {
diff --git a/test/MC/AsmParser/directive_file.s b/test/MC/AsmParser/directive_file.s
index 9b99e0f24e99..d7290ebe1dbe 100644
--- a/test/MC/AsmParser/directive_file.s
+++ b/test/MC/AsmParser/directive_file.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null
 
         .file "hello"
         .file 1 "worl\144"   # "\144" is "d"
diff --git a/test/MC/AsmParser/directive_line.s b/test/MC/AsmParser/directive_line.s
index 94ce44602998..110b68a46216 100644
--- a/test/MC/AsmParser/directive_line.s
+++ b/test/MC/AsmParser/directive_line.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s
+# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null
 # FIXME: Actually test the output.
 
         .line
diff --git a/test/MC/AsmParser/directive_loc.s b/test/MC/AsmParser/directive_loc.s
index cda9579fb272..404ebcecdd0a 100644
--- a/test/MC/AsmParser/directive_loc.s
+++ b/test/MC/AsmParser/directive_loc.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null
 
         .file 1 "hello"
 # CHECK: .file 1 "hello"

From 6072096fd63c1996cfe731ecddcb6b3232e37b70 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Thu, 19 Jun 2014 17:59:14 +0000
Subject: [PATCH 114/157] DebugInfo: Fission: Ensure the address pool entries
 for location lists are emitted.

The address pool was being emitted before location lists. The latter
could add more entries to the pool which would be lost/never emitted.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211284 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 +-
 test/DebugInfo/X86/fission-ranges.ll  | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 3847eb124f3d..f78ca2c03b4e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1038,9 +1038,9 @@ void DwarfDebug::endModule() {
     emitDebugInfoDWO();
     emitDebugAbbrevDWO();
     emitDebugLineDWO();
+    emitDebugLocDWO();
     // Emit DWO addresses.
     AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
-    emitDebugLocDWO();
   } else
     // Emit info into a debug loc section.
     emitDebugLoc();
diff --git a/test/DebugInfo/X86/fission-ranges.ll b/test/DebugInfo/X86/fission-ranges.ll
index 057039c3c5cc..135837582fcc 100644
--- a/test/DebugInfo/X86/fission-ranges.ll
+++ b/test/DebugInfo/X86/fission-ranges.ll
@@ -44,6 +44,13 @@
 ; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo)
 ; HDR-NOT: .rela.{{.*}}.dwo
 
+; Make sure we have enough stuff in the debug_addr to cover the address indexes
+; (6 is the last index in debug_loc.dwo, making 7 entries of 8 bytes each, 7 * 8
+; == 56 base 10 == 38 base 16)
+
+; HDR: .debug_addr 00000038
+; HDR-NOT: .rela.{{.*}}.dwo
+
 ; From the code:
 
 ; extern int c;

From a4d0ff9cd1fb3fe4c3a58d0747dc523f7d9e9e3f Mon Sep 17 00:00:00 2001
From: Zachary Turner <zturner@google.com>
Date: Thu, 19 Jun 2014 18:18:23 +0000
Subject: [PATCH 115/157] Remove support for LLVM runtime multi-threading.

After a number of previous small iterations, the functions
llvm_start_multithreaded() and llvm_stop_multithreaded() have
been reduced essentially to no-ops.  This change removes them
entirely.

Reviewed by: rnk, dblaikie

Differential Revision: http://reviews.llvm.org/D4216

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211287 91177308-0d34-0410-b5e6-96231b3b80d8
---
 docs/ProgrammersManual.rst           | 45 +++-------------------------
 include/llvm-c/Core.h                | 13 ++++----
 include/llvm/Support/ManagedStatic.h |  3 --
 include/llvm/Support/Threading.h     | 23 ++++----------
 lib/IR/Core.cpp                      |  3 +-
 lib/Support/Threading.cpp            | 29 ++----------------
 unittests/Support/ManagedStatic.cpp  |  2 --
 7 files changed, 19 insertions(+), 99 deletions(-)

diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
index df3795a34d33..a7b28b36ca1d 100644
--- a/docs/ProgrammersManual.rst
+++ b/docs/ProgrammersManual.rst
@@ -2170,46 +2170,13 @@ compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and
 using the resultant compiler to build a copy of LLVM with multithreading
 support.
 
-.. _startmultithreaded:
-
-Entering and Exiting Multithreaded Mode
----------------------------------------
-
-In order to properly protect its internal data structures while avoiding
-excessive locking overhead in the single-threaded case, the LLVM must intialize
-certain data structures necessary to provide guards around its internals.  To do
-so, the client program must invoke ``llvm_start_multithreaded()`` before making
-any concurrent LLVM API calls.  To subsequently tear down these structures, use
-the ``llvm_stop_multithreaded()`` call.  You can also use the
-``llvm_is_multithreaded()`` call to check the status of multithreaded mode.
-
-Note that both of these calls must be made *in isolation*.  That is to say that
-no other LLVM API calls may be executing at any time during the execution of
-``llvm_start_multithreaded()`` or ``llvm_stop_multithreaded``.  It is the
-client's responsibility to enforce this isolation.
-
-The return value of ``llvm_start_multithreaded()`` indicates the success or
-failure of the initialization.  Failure typically indicates that your copy of
-LLVM was built without multithreading support, typically because GCC atomic
-intrinsics were not found in your system compiler.  In this case, the LLVM API
-will not be safe for concurrent calls.  However, it *will* be safe for hosting
-threaded applications in the JIT, though :ref:`care must be taken
-<jitthreading>` to ensure that side exits and the like do not accidentally
-result in concurrent LLVM API calls.
-
 .. _shutdown:
 
 Ending Execution with ``llvm_shutdown()``
 -----------------------------------------
 
 When you are done using the LLVM APIs, you should call ``llvm_shutdown()`` to
-deallocate memory used for internal structures.  This will also invoke
-``llvm_stop_multithreaded()`` if LLVM is operating in multithreaded mode.  As
-such, ``llvm_shutdown()`` requires the same isolation guarantees as
-``llvm_stop_multithreaded()``.
-
-Note that, if you use scope-based shutdown, you can use the
-``llvm_shutdown_obj`` class, which calls ``llvm_shutdown()`` in its destructor.
+deallocate memory used for internal structures.
 
 .. _managedstatic:
 
@@ -2217,15 +2184,11 @@ Lazy Initialization with ``ManagedStatic``
 ------------------------------------------
 
 ``ManagedStatic`` is a utility class in LLVM used to implement static
-initialization of static resources, such as the global type tables.  Before the
-invocation of ``llvm_shutdown()``, it implements a simple lazy initialization
-scheme.  Once ``llvm_start_multithreaded()`` returns, however, it uses
+initialization of static resources, such as the global type tables.  In a
+single-threaded environment, it implements a simple lazy initialization scheme.
+When LLVM is compiled with support for multi-threading, however, it uses
 double-checked locking to implement thread-safe lazy initialization.
 
-Note that, because no other threads are allowed to issue LLVM API calls before
-``llvm_start_multithreaded()`` returns, it is possible to have
-``ManagedStatic``\ s of ``llvm::sys::Mutex``\ s.
-
 .. _llvmcontext:
 
 Achieving Isolation with ``LLVMContext``
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 0e78ed71fa9a..8693a3020aba 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -2848,16 +2848,13 @@ void LLVMDisposePassManager(LLVMPassManagerRef PM);
  * @{
  */
 
-/** Allocate and initialize structures needed to make LLVM safe for
-    multithreading. The return value indicates whether multithreaded
-    initialization succeeded. Must be executed in isolation from all
-    other LLVM api calls.
-    @see llvm::llvm_start_multithreaded */
+/** Deprecated: Multi-threading can only be enabled/disabled with the compile
+    time define LLVM_ENABLE_THREADS.  This function always returns
+    LLVMIsMultithreaded(). */
 LLVMBool LLVMStartMultithreaded(void);
 
-/** Deallocate structures necessary to make LLVM safe for multithreading.
-    Must be executed in isolation from all other LLVM api calls.
-    @see llvm::llvm_stop_multithreaded */
+/** Deprecated: Multi-threading can only be enabled/disabled with the compile
+    time define LLVM_ENABLE_THREADS. */
 void LLVMStopMultithreaded(void);
 
 /** Check whether LLVM is executing in thread-safe mode or not.
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index 1bb8cea092f9..d8fbfeb8e20c 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -103,9 +103,6 @@ void llvm_shutdown();
 /// llvm_shutdown() when it is destroyed.
 struct llvm_shutdown_obj {
   llvm_shutdown_obj() { }
-  explicit llvm_shutdown_obj(bool multithreaded) {
-    if (multithreaded) llvm_start_multithreaded();
-  }
   ~llvm_shutdown_obj() { llvm_shutdown(); }
 };
 
diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h
index d84d11867958..981971bd1e86 100644
--- a/include/llvm/Support/Threading.h
+++ b/include/llvm/Support/Threading.h
@@ -7,30 +7,19 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// TThis file defines llvm_start_multithreaded() and friends.
+// This file declares helper functions for running LLVM in a multi-threaded
+// environment.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_SUPPORT_THREADING_H
 #define LLVM_SUPPORT_THREADING_H
 
-namespace llvm {
-  /// llvm_start_multithreaded - Allocate and initialize structures needed to
-  /// make LLVM safe for multithreading.  The return value indicates whether
-  /// multithreaded initialization succeeded.  LLVM will still be operational
-  /// on "failed" return, and will still be safe for hosting threading
-  /// applications in the JIT, but will not be safe for concurrent calls to the
-  /// LLVM APIs.
-  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
-  bool llvm_start_multithreaded();
-
-  /// llvm_stop_multithreaded - Deallocate structures necessary to make LLVM
-  /// safe for multithreading.
-  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
-  void llvm_stop_multithreaded();
+#include "llvm/Support/Mutex.h"
 
-  /// llvm_is_multithreaded - Check whether LLVM is executing in thread-safe
-  /// mode or not.
+namespace llvm {
+  /// Returns true if LLVM is compiled with support for multi-threading, and
+  /// false otherwise.
   bool llvm_is_multithreaded();
 
   /// llvm_execute_on_thread - Execute the given \p UserFn on a separate
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 197b6cb9054e..68b9baa2baa2 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -2702,11 +2702,10 @@ void LLVMDisposePassManager(LLVMPassManagerRef PM) {
 /*===-- Threading ------------------------------------------------------===*/
 
 LLVMBool LLVMStartMultithreaded() {
-  return llvm_start_multithreaded();
+  return LLVMIsMultithreaded();
 }
 
 void LLVMStopMultithreaded() {
-  llvm_stop_multithreaded();
 }
 
 LLVMBool LLVMIsMultithreaded() {
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 2358dde0e6c5..ca7f3f64aa37 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements llvm_start_multithreaded() and friends.
+// This file defines helper functions for running LLVM in a multi-threaded
+// environment.
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,38 +20,14 @@
 
 using namespace llvm;
 
-static bool multithreaded_mode = false;
-
-bool llvm::llvm_start_multithreaded() {
+bool llvm::llvm_is_multithreaded() {
 #if LLVM_ENABLE_THREADS != 0
-  assert(!multithreaded_mode && "Already multithreaded!");
-  multithreaded_mode = true;
-
-  // We fence here to ensure that all initialization is complete BEFORE we
-  // return from llvm_start_multithreaded().
-  sys::MemoryFence();
   return true;
 #else
   return false;
 #endif
 }
 
-void llvm::llvm_stop_multithreaded() {
-#if LLVM_ENABLE_THREADS != 0
-  assert(multithreaded_mode && "Not currently multithreaded!");
-
-  // We fence here to insure that all threaded operations are complete BEFORE we
-  // return from llvm_stop_multithreaded().
-  sys::MemoryFence();
-
-  multithreaded_mode = false;
-#endif
-}
-
-bool llvm::llvm_is_multithreaded() {
-  return multithreaded_mode;
-}
-
 #if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
 #include <pthread.h>
 
diff --git a/unittests/Support/ManagedStatic.cpp b/unittests/Support/ManagedStatic.cpp
index ad2fc977e6d7..153884ba4298 100644
--- a/unittests/Support/ManagedStatic.cpp
+++ b/unittests/Support/ManagedStatic.cpp
@@ -47,7 +47,6 @@ TEST(Initialize, MultipleThreads) {
   void *p1 = test1::allocate_stack(a1);
   void *p2 = test1::allocate_stack(a2);
 
-  llvm_start_multithreaded();
   pthread_t t1, t2;
   pthread_create(&t1, &a1, test1::helper, nullptr);
   pthread_create(&t2, &a2, test1::helper, nullptr);
@@ -55,7 +54,6 @@ TEST(Initialize, MultipleThreads) {
   pthread_join(t2, nullptr);
   free(p1);
   free(p2);
-  llvm_stop_multithreaded();
 }
 #endif
 

From ab19aae90fc10bab660ceb3d426f31a3cb66d99e Mon Sep 17 00:00:00 2001
From: Zachary Turner <zturner@google.com>
Date: Thu, 19 Jun 2014 18:25:06 +0000
Subject: [PATCH 116/157] Fix build on non-Windows platforms.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211288 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/Mutex.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h
index 496a4381f3fc..f340051361b0 100644
--- a/include/llvm/Support/Mutex.h
+++ b/include/llvm/Support/Mutex.h
@@ -15,11 +15,13 @@
 #define LLVM_SUPPORT_MUTEX_H
 
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Threading.h"
 #include <cassert>
 
 namespace llvm
 {
+  // Forward declare this function.
+  bool llvm_is_multithreaded();
+
   namespace sys
   {
     /// @brief Platform agnostic Mutex class.

From 9022b9a501c2c213deb2d062282658f367758dc7 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Thu, 19 Jun 2014 18:26:26 +0000
Subject: [PATCH 117/157] Remove circular header reference in
 Threading.h/Mutex.h

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211290 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/Threading.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h
index 981971bd1e86..7e8758407c7c 100644
--- a/include/llvm/Support/Threading.h
+++ b/include/llvm/Support/Threading.h
@@ -15,8 +15,6 @@
 #ifndef LLVM_SUPPORT_THREADING_H
 #define LLVM_SUPPORT_THREADING_H
 
-#include "llvm/Support/Mutex.h"
-
 namespace llvm {
   /// Returns true if LLVM is compiled with support for multi-threading, and
   /// false otherwise.

From f25c2015b33b5414598b4528763bdc5079385156 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Thu, 19 Jun 2014 18:26:28 +0000
Subject: [PATCH 118/157] Remove use of removed function,
 llvm_stop_multithreading

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211291 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Support/ManagedStatic.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 9d7e99f97fb5..da01195a9df6 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -94,6 +94,4 @@ void llvm::llvm_shutdown() {
 
   while (StaticList)
     StaticList->destroy();
-
-  if (llvm_is_multithreaded()) llvm_stop_multithreaded();
 }

From cadf0f68e4266d4ecef23360cd62be12057991c9 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Thu, 19 Jun 2014 18:36:15 +0000
Subject: [PATCH 119/157] Relax this test a bit, we don't need the full
 contents of the frame section to match, just the version for this test.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211293 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/MC/ELF/cfi-version.ll | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll
index a06638c04e0e..dfe2c36d550f 100644
--- a/test/MC/ELF/cfi-version.ll
+++ b/test/MC/ELF/cfi-version.ll
@@ -2,9 +2,6 @@
 ; RUN: llc %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
 ; RUN: llc %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
 
-target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
-target triple = "armv8-arm-none-eabi"
-
 ; Function Attrs: nounwind
 define i32 @foo() #0 {
 entry:
@@ -37,11 +34,9 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !12 = metadata !{i32 2, i32 0, metadata !4, null}
 
 ; DWARF2:      .debug_frame contents:
-; DWARF2: 00000000 0000000c ffffffff CIE
-; DWARF2-NEXT:   Version:               1
+; DWARF2:        Version:               1
 ; DWARF2-NEXT:   Augmentation:
 
 ; DWARF34:      .debug_frame contents:
-; DWARF34: 00000000 0000000c ffffffff CIE
-; DWARF34-NEXT:   Version:               3
+; DWARF34:        Version:               3
 ; DWARF34-NEXT:   Augmentation:

From 01ccb9c9b8544aaf8076672abde4984e6ea56036 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 19 Jun 2014 19:11:22 +0000
Subject: [PATCH 120/157] Use the c++ APIs.

No functionality change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211294 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/lto/lto.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index 64abf5cd0582..86e1e810e8d6 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/LTO/LTOCodeGenerator.h"
 #include "llvm/LTO/LTOModule.h"
+#include "llvm/Support/TargetSelect.h"
 
 // extra command-line flags needed for LTOCodeGenerator
 static cl::opt<bool>
@@ -46,12 +47,12 @@ static bool parsedOptions = false;
 // Initialize the configured targets if they have not been initialized.
 static void lto_initialize() {
   if (!initialized) {
-    LLVMInitializeAllTargetInfos();
-    LLVMInitializeAllTargets();
-    LLVMInitializeAllTargetMCs();
-    LLVMInitializeAllAsmParsers();
-    LLVMInitializeAllAsmPrinters();
-    LLVMInitializeAllDisassemblers();
+    InitializeAllTargetInfos();
+    InitializeAllTargets();
+    InitializeAllTargetMCs();
+    InitializeAllAsmParsers();
+    InitializeAllAsmPrinters();
+    InitializeAllDisassemblers();
     initialized = true;
   }
 }

From 580a14ff48136183f7904f3a951c37c81409a73e Mon Sep 17 00:00:00 2001
From: Alp Toker <alp@nuanti.com>
Date: Thu, 19 Jun 2014 19:25:49 +0000
Subject: [PATCH 121/157] Remove unused includes following r211294

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211297 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/lto/lto.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index 86e1e810e8d6..d53d72a52293 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -13,8 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/lto.h"
-#include "llvm-c/Core.h"
-#include "llvm-c/Target.h"
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/LTO/LTOCodeGenerator.h"
 #include "llvm/LTO/LTOModule.h"

From 74fc19ebf43051fa3da353c8f043d0b651f964c0 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Thu, 19 Jun 2014 19:26:42 +0000
Subject: [PATCH 122/157] Fix this test a little harder - use llc_dwarf to make
 sure we don't try to execute it on windows.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211298 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/MC/ELF/cfi-version.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll
index dfe2c36d550f..0588fcd45da6 100644
--- a/test/MC/ELF/cfi-version.ll
+++ b/test/MC/ELF/cfi-version.ll
@@ -1,6 +1,6 @@
-; RUN: llc %s -o - -dwarf-version 2 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF2
-; RUN: llc %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
-; RUN: llc %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
+; RUN: %llc_dwarf %s -o - -dwarf-version 2 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF2
+; RUN: %llc_dwarf %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
+; RUN: %llc_dwarf %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
 
 ; Function Attrs: nounwind
 define i32 @foo() #0 {

From 5b45c5d7ec51ef019a8133c7e27251ce3c9f0bbd Mon Sep 17 00:00:00 2001
From: David Greene <greened@obbligato.org>
Date: Thu, 19 Jun 2014 19:31:05 +0000
Subject: [PATCH 123/157] Turn of -Werror by default

Don't build with -Werror unless asked to.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211299 91177308-0d34-0410-b5e6-96231b3b80d8
---
 utils/llvm-compilers-check | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/utils/llvm-compilers-check b/utils/llvm-compilers-check
index 3173027759b7..d745b3a7c506 100755
--- a/utils/llvm-compilers-check
+++ b/utils/llvm-compilers-check
@@ -149,6 +149,8 @@ def add_options(parser):
                       help=("Do not do installs"))
     parser.add_option("--keep-going", default=False, action="store_true",
                       help=("Keep going after failures"))
+    parser.add_option("--enable-werror", default=False, action="store_true",
+                      help=("Build with -Werror"))
     return
 
 def check_options(parser, options, valid_builds):
@@ -361,16 +363,13 @@ class Builder(threading.Thread):
 
         configure_flags = dict(
             llvm=dict(debug=["--prefix=" + self.install_prefix,
-                             "--enable-werror",
                              "--enable-assertions",
                              "--disable-optimized",
                              "--with-gcc-toolchain=" + cxxroot],
                       release=["--prefix=" + self.install_prefix,
-                               "--enable-werror",
                                "--enable-optimized",
                                "--with-gcc-toolchain=" + cxxroot],
                       paranoid=["--prefix=" + self.install_prefix,
-                                "--enable-werror",
                                 "--enable-assertions",
                                 "--enable-expensive-checks",
                                 "--disable-optimized",
@@ -379,6 +378,11 @@ class Builder(threading.Thread):
                            release=[],
                            paranoid=[]))
 
+        if (self.options.enable_werror):
+            configure_flags["llvm"]["debug"].append("--enable-werror")
+            configure_flags["llvm"]["release"].append("--enable-werror")
+            configure_flags["llvm"]["paranoid"].append("--enable-werror")
+
         configure_env = dict(
             llvm=dict(debug=dict(CC=self.cc,
                                  CXX=self.cxx),

From 30db0c28dee5c8dc8b4bfeb2a94a22ceaba7aec9 Mon Sep 17 00:00:00 2001
From: David Greene <greened@obbligato.org>
Date: Thu, 19 Jun 2014 19:31:09 +0000
Subject: [PATCH 124/157] Add option to keep flavor out of the install
 directory

Sometimes we want to install things in "standard" locations and the
flavor directories interfere with that.  Add an option to keep them
out of the install path.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211300 91177308-0d34-0410-b5e6-96231b3b80d8
---
 utils/llvm-compilers-check | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/utils/llvm-compilers-check b/utils/llvm-compilers-check
index d745b3a7c506..c8bd905ff9ce 100755
--- a/utils/llvm-compilers-check
+++ b/utils/llvm-compilers-check
@@ -149,6 +149,8 @@ def add_options(parser):
                       help=("Do not do installs"))
     parser.add_option("--keep-going", default=False, action="store_true",
                       help=("Keep going after failures"))
+    parser.add_option("--no-flavor-prefix", default=False, action="store_true",
+                      help=("Do not append the build flavor to the install path"))
     parser.add_option("--enable-werror", default=False, action="store_true",
                       help=("Build with -Werror"))
     return
@@ -348,7 +350,9 @@ class Builder(threading.Thread):
         ssabbrev = get_short_abbrevs([ab for ab in self.source_abbrev.values()])
 
         prefix = "[" + ssabbrev[self.source_abbrev[source]] + "-" + self.build_abbrev[build] + "]"
-        self.install_prefix += "/" + self.source_abbrev[source] + "/" + build
+        if (not self.options.no_flavor_prefix):
+            self.install_prefix += "/" + self.source_abbrev[source] + "/" + build
+
         build_suffix += "/" + self.source_abbrev[source] + "/" + build
 
         self.logger = logging.getLogger(prefix)

From 581d5d5df8a7811e1dd5c0221dd2e212ef8dd2ac Mon Sep 17 00:00:00 2001
From: David Greene <greened@obbligato.org>
Date: Thu, 19 Jun 2014 19:31:11 +0000
Subject: [PATCH 125/157] Remove bogus configure check

Configure creates makefiles, so it doesn't make sense to check for
them to see if we can configure.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211301 91177308-0d34-0410-b5e6-96231b3b80d8
---
 utils/llvm-compilers-check | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/utils/llvm-compilers-check b/utils/llvm-compilers-check
index c8bd905ff9ce..4db8426ace88 100755
--- a/utils/llvm-compilers-check
+++ b/utils/llvm-compilers-check
@@ -538,10 +538,6 @@ class Builder(threading.Thread):
                 self.logger.info("[" + prefix + "] Configure failed, no configure script " + conf)
                 return -1
 
-            if not os.path.exists(mf):
-                self.logger.info("[" + prefix + "] Configure failed, no makefile " + mf)
-                return -1
-
             if os.path.exists(conf) and os.path.exists(mf):
                 confstat = os.stat(conf)
                 makestat = os.stat(mf)

From f21de942f3785d86d1f82cd19072ca26de9167da Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Thu, 19 Jun 2014 19:35:39 +0000
Subject: [PATCH 126/157] Support: Add llvm::sys::fs::copy_file

A function to copy one file's contents to another.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211302 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/FileSystem.h |  6 ++++++
 lib/Support/Path.cpp              | 34 +++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index 6abe90446b47..e56e2b718a75 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -335,6 +335,12 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting = true);
 /// @param to The path to rename to. This is created.
 std::error_code rename(const Twine &from, const Twine &to);
 
+/// @brief Copy the contents of \a From to \a To.
+///
+/// @param From The path to copy from.
+/// @param To The path to copy to. This is created.
+std::error_code copy_file(const Twine &From, const Twine &To);
+
 /// @brief Resize path to size. File is resized as if by POSIX truncate().
 ///
 /// @param path Input path.
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index 15edf0ddbbd8..535394c5b3ba 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -846,6 +846,40 @@ std::error_code create_directories(const Twine &Path, bool IgnoreExisting) {
   return create_directory(P, IgnoreExisting);
 }
 
+std::error_code copy_file(const Twine &From, const Twine &To) {
+  int ReadFD, WriteFD;
+  if (std::error_code EC = openFileForRead(From, ReadFD))
+    return EC;
+  if (std::error_code EC = openFileForWrite(To, WriteFD, F_None)) {
+    close(ReadFD);
+    return EC;
+  }
+
+  const size_t BufSize = 4096;
+  char *Buf = new char[BufSize];
+  int BytesRead = 0, BytesWritten = 0;
+  for (;;) {
+    BytesRead = read(ReadFD, Buf, BufSize);
+    if (BytesRead <= 0)
+      break;
+    while (BytesRead) {
+      BytesWritten = write(WriteFD, Buf, BytesRead);
+      if (BytesWritten < 0)
+        break;
+      BytesRead -= BytesWritten;
+    }
+    if (BytesWritten < 0)
+      break;
+  }
+  close(ReadFD);
+  close(WriteFD);
+  delete[] Buf;
+
+  if (BytesRead < 0 || BytesWritten < 0)
+    return std::error_code(errno, std::generic_category());
+  return std::error_code();
+}
+
 bool exists(file_status status) {
   return status_known(status) && status.type() != file_type::file_not_found;
 }

From d06976aba71501f4970bc5bf28d3a9a98c1fb3ad Mon Sep 17 00:00:00 2001
From: Alp Toker <alp@nuanti.com>
Date: Thu, 19 Jun 2014 19:41:26 +0000
Subject: [PATCH 127/157] Fix typos

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211304 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMISelLowering.cpp              | 2 +-
 lib/Target/R600/AMDGPUIntrinsics.td             | 2 +-
 lib/Target/X86/X86FastISel.cpp                  | 2 +-
 test/CodeGen/PowerPC/vec_cmp.ll                 | 8 ++++----
 test/DebugInfo/2009-11-05-DeadGlobalVariable.ll | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4a762ce6d3bd..cf849951088a 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -7147,7 +7147,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
   // thumb-2 environment, so there is no interworking required.  As a result, we
   // do not expect a veneer to be emitted by the linker, clobbering IP.
   //
-  // Each module recieves its own copy of __chkstk, so no import thunk is
+  // Each module receives its own copy of __chkstk, so no import thunk is
   // required, again, ensuring that IP is not clobbered.
   //
   // Finally, although some linkers may theoretically provide a trampoline for
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index 538b4cd8af28..27c0dbea09f4 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -67,7 +67,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_barrier_global  : Intrinsic<[], [], []>;
 }
 
-// Legacy names for compatability.
+// Legacy names for compatibility.
 let TargetPrefix = "AMDIL", isTarget = 1 in {
   def int_AMDIL_abs : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
   def int_AMDIL_fraction : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 9557d96d5309..6bccd1290fec 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1315,7 +1315,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
 
       // Emits an unconditional branch to the FalseBB, obtains the branch
-      // weight, andd adds it to the successor list.
+      // weight, and adds it to the successor list.
       FastEmitBranch(FalseMBB, DbgLoc);
 
       return true;
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
index 4bce8c80fc6a..2733089fcb10 100644
--- a/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -36,7 +36,7 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone {
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v16i8 since it is a altivec native type
+; Additional tests for v16i8 since it is a altivec native type
 
 define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
   %cmp = icmp eq <16 x i8> %x, %y
@@ -165,7 +165,7 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone {
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v8i16 since it is an altivec native type
+; Additional tests for v8i16 since it is an altivec native type
 
 define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
 entry:
@@ -298,7 +298,7 @@ define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone {
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v4si32 since it is an altivec native type
+; Additional tests for v4si32 since it is an altivec native type
 
 define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
 entry:
@@ -449,7 +449,7 @@ entry:
 ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v4f32 since it is a altivec native type
+; Additional tests for v4f32 since it is a altivec native type
 
 define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone {
 entry:
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index c78b8b8f628f..65907d679780 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -o /dev/null
-; Here variable bar is optimzied away. Do not trip over while trying to generate debug info.
+; Here variable bar is optimized away. Do not trip over while trying to generate debug info.
 
 
 define i32 @foo() nounwind uwtable readnone ssp {

From d308c35617935c5826972c3ddaff89f1367d9769 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 19 Jun 2014 19:45:25 +0000
Subject: [PATCH 128/157] Remove an incorrect fixme.

dynamic-no-pic is just another output type. If gnu ld gets support for MachO,
it should also add something like LDPO_DYN_NO_PIC to the plugin interface.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211305 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/gold/gold-plugin.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 371574f26156..a056c61817cf 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -169,8 +169,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
                        tv->tv_u.tv_val);
             return LDPS_ERR;
         }
-        // TODO: add an option to disable PIC.
-        //output_type = LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC;
         break;
       case LDPT_OPTION:
         options::process_plugin_option(tv->tv_u.tv_string);

From edd372aba2c1422ccb1e1f5ca48fb38eda658914 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Thu, 19 Jun 2014 20:00:09 +0000
Subject: [PATCH 129/157] Fix up a few formatting issues.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211307 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/MC/MCDwarf.cpp | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 995bee121324..3bcff86b1c65 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -610,8 +610,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
   // Now emit the table of pairs of PointerSize'ed values for the section
   // addresses and sizes.
   for (const auto &sec : Sections) {
-    MCSymbol* StartSymbol = sec.second.first;
-    MCSymbol* EndSymbol = sec.second.second;
+    MCSymbol *StartSymbol = sec.second.first;
+    MCSymbol *EndSymbol = sec.second.second;
     assert(StartSymbol && "StartSymbol must not be NULL");
     assert(EndSymbol && "EndSymbol must not be NULL");
 
@@ -697,8 +697,8 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
     const auto TextSection = Sections.begin();
     assert(TextSection != Sections.end() && "No text section found");
 
-    MCSymbol* StartSymbol = TextSection->second.first;
-    MCSymbol* EndSymbol = TextSection->second.second;
+    MCSymbol *StartSymbol = TextSection->second.first;
+    MCSymbol *EndSymbol = TextSection->second.second;
     assert(StartSymbol && "StartSymbol must not be NULL");
     assert(EndSymbol && "EndSymbol must not be NULL");
 
@@ -809,8 +809,8 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) {
 
   for (const auto sec : Sections) {
 
-    MCSymbol* StartSymbol = sec.second.first;
-    MCSymbol* EndSymbol = sec.second.second;
+    MCSymbol *StartSymbol = sec.second.first;
+    MCSymbol *EndSymbol = sec.second.second;
     assert(StartSymbol && "StartSymbol must not be NULL");
     assert(EndSymbol && "EndSymbol must not be NULL");
 
@@ -1539,13 +1539,12 @@ namespace {
       return CIEKey(nullptr, -1, 0, false, false);
     }
 
-    CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
-           unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_) :
-      Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
-      LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_),
-      IsSimple(IsSimple_) {
-    }
-    const MCSymbol* Personality;
+    CIEKey(const MCSymbol *Personality_, unsigned PersonalityEncoding_,
+           unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_)
+        : Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
+          LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_),
+          IsSimple(IsSimple_) {}
+    const MCSymbol *Personality;
     unsigned PersonalityEncoding;
     unsigned LsdaEncoding;
     bool IsSignalFrame;
@@ -1620,7 +1619,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
   Emitter.setSectionStart(SectionStart);
 
   MCSymbol *FDEEnd = nullptr;
-  DenseMap<CIEKey, const MCSymbol*> CIEStarts;
+  DenseMap<CIEKey, const MCSymbol *> CIEStarts;
 
   const MCSymbol *DummyDebugKey = nullptr;
   NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame();

From 594f05c8dae6ec0b08694a1d439aee8bca18a47d Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Thu, 19 Jun 2014 20:00:13 +0000
Subject: [PATCH 130/157] Since we're using DW_AT_string rather than DW_AT_strp
 for debug_info for assembly files we can't depend on the offset within the
 section after a string since it could be different between producers etc.
 Relax these tests accordingly.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211308 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/MC/ARM/dwarf-asm-multiple-sections.s   | 4 ++--
 test/MC/ARM/dwarf-asm-nonstandard-section.s | 6 +++---
 test/MC/ARM/dwarf-asm-single-section.s      | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/MC/ARM/dwarf-asm-multiple-sections.s b/test/MC/ARM/dwarf-asm-multiple-sections.s
index 5e64fcc023ff..ed1b89eff3cd 100644
--- a/test/MC/ARM/dwarf-asm-multiple-sections.s
+++ b/test/MC/ARM/dwarf-asm-multiple-sections.s
@@ -60,8 +60,8 @@ b:
 // RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
 // RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
 // RELOC-NEXT: 00000010 R_ARM_ABS32 .debug_ranges
-// RELOC-NEXT: 0000004f R_ARM_ABS32 .text
-// RELOC-NEXT: 00000061 R_ARM_ABS32 foo
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 foo
 
 // RELOC: RELOCATION RECORDS FOR [.rel.debug_ranges]:
 // RELOC-NEXT: 00000004 R_ARM_ABS32 .text
diff --git a/test/MC/ARM/dwarf-asm-nonstandard-section.s b/test/MC/ARM/dwarf-asm-nonstandard-section.s
index adf92bab18d2..497a39ad1162 100644
--- a/test/MC/ARM/dwarf-asm-nonstandard-section.s
+++ b/test/MC/ARM/dwarf-asm-nonstandard-section.s
@@ -46,9 +46,9 @@ b:
 // RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
 // RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
 // RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
-// RELOC-NEXT: 00000010 R_ARM_ABS32 foo
-// RELOC-NEXT: 00000014 R_ARM_ABS32 foo
-// RELOC-NEXT: 00000053 R_ARM_ABS32 foo
+// RELOC-NEXT: R_ARM_ABS32 foo
+// RELOC-NEXT: R_ARM_ABS32 foo
+// RELOC-NEXT: R_ARM_ABS32 foo
 
 // RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
 
diff --git a/test/MC/ARM/dwarf-asm-single-section.s b/test/MC/ARM/dwarf-asm-single-section.s
index d69dfab5dee5..c57e6498a38a 100644
--- a/test/MC/ARM/dwarf-asm-single-section.s
+++ b/test/MC/ARM/dwarf-asm-single-section.s
@@ -45,9 +45,9 @@ a:
 // RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
 // RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
 // RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
-// RELOC-NEXT: 00000010 R_ARM_ABS32 .text
-// RELOC-NEXT: 00000014 R_ARM_ABS32 .text
-// RELOC-NEXT: 00000053 R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 .text
 
 // RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
 

From 4110a7ac7ac9f5814b6828e498efdb3e75110f66 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Thu, 19 Jun 2014 20:08:56 +0000
Subject: [PATCH 131/157] Add StringMap::insert(pair) consistent with the
 standard associative container concept.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch by Agustín Bergé.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211309 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/ADT/StringMap.h    | 43 ++++++++++++++++++---------------
 lib/Support/StringMap.cpp       | 10 ++++++--
 unittests/ADT/StringMapTest.cpp | 38 +++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 21 deletions(-)

diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index 5b1868187986..c40e5e2b3d87 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -64,7 +64,7 @@ class StringMapImpl {
   }
 
   StringMapImpl(unsigned InitSize, unsigned ItemSize);
-  void RehashTable();
+  unsigned RehashTable(unsigned BucketNo = 0);
 
   /// LookupBucketFor - Look up the bucket that the specified string should end
   /// up in.  If it already exists as a key in the map, the Item pointer for the
@@ -323,6 +323,28 @@ class StringMap : public StringMapImpl {
     return true;
   }
 
+  /// insert - Inserts the specified key/value pair into the map if the key
+  /// isn't already in the map. The bool component of the returned pair is true
+  /// if and only if the insertion takes place, and the iterator component of
+  /// the pair points to the element with key equivalent to the key of the pair.
+  std::pair<iterator, bool> insert(std::pair<StringRef, ValueTy> KV) {
+    unsigned BucketNo = LookupBucketFor(KV.first);
+    StringMapEntryBase *&Bucket = TheTable[BucketNo];
+    if (Bucket && Bucket != getTombstoneVal())
+      return std::make_pair(iterator(TheTable + BucketNo, false),
+                            false); // Already exists in map.
+
+    if (Bucket == getTombstoneVal())
+      --NumTombstones;
+    Bucket =
+        MapEntryTy::Create(KV.first, Allocator, std::move(KV.second));
+    ++NumItems;
+    assert(NumItems + NumTombstones <= NumBuckets);
+
+    BucketNo = RehashTable(BucketNo);
+    return std::make_pair(iterator(TheTable + BucketNo, false), true);
+  }
+
   // clear - Empties out the StringMap
   void clear() {
     if (empty()) return;
@@ -346,24 +368,7 @@ class StringMap : public StringMapImpl {
   /// return.
   template <typename InitTy>
   MapEntryTy &GetOrCreateValue(StringRef Key, InitTy Val) {
-    unsigned BucketNo = LookupBucketFor(Key);
-    StringMapEntryBase *&Bucket = TheTable[BucketNo];
-    if (Bucket && Bucket != getTombstoneVal())
-      return *static_cast<MapEntryTy*>(Bucket);
-
-    MapEntryTy *NewItem = MapEntryTy::Create(Key, Allocator, std::move(Val));
-
-    if (Bucket == getTombstoneVal())
-      --NumTombstones;
-    ++NumItems;
-    assert(NumItems + NumTombstones <= NumBuckets);
-
-    // Fill in the bucket for the hash table.  The FullHashValue was already
-    // filled in by LookupBucketFor.
-    Bucket = NewItem;
-
-    RehashTable();
-    return *NewItem;
+    return *insert(std::make_pair(Key, std::move(Val))).first;
   }
 
   MapEntryTy &GetOrCreateValue(StringRef Key) {
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index 72a6d822d2b6..ddb73494ff5d 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -181,7 +181,7 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
 
 /// RehashTable - Grow the table, redistributing values into the buckets with
 /// the appropriate mod-of-hashtable-size.
-void StringMapImpl::RehashTable() {
+unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
   unsigned NewSize;
   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 
@@ -193,9 +193,10 @@ void StringMapImpl::RehashTable() {
   } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) {
     NewSize = NumBuckets;
   } else {
-    return;
+    return BucketNo;
   }
 
+  unsigned NewBucketNo = BucketNo;
   // Allocate one extra bucket which will always be non-empty.  This allows the
   // iterators to stop at end.
   StringMapEntryBase **NewTableArray =
@@ -215,6 +216,8 @@ void StringMapImpl::RehashTable() {
       if (!NewTableArray[NewBucket]) {
         NewTableArray[FullHash & (NewSize-1)] = Bucket;
         NewHashArray[FullHash & (NewSize-1)] = FullHash;
+        if (I == BucketNo)
+          NewBucketNo = NewBucket;
         continue;
       }
       
@@ -227,6 +230,8 @@ void StringMapImpl::RehashTable() {
       // Finally found a slot.  Fill it in.
       NewTableArray[NewBucket] = Bucket;
       NewHashArray[NewBucket] = FullHash;
+      if (I == BucketNo)
+        NewBucketNo = NewBucket;
     }
   }
   
@@ -235,4 +240,5 @@ void StringMapImpl::RehashTable() {
   TheTable = NewTableArray;
   NumBuckets = NewSize;
   NumTombstones = 0;
+  return NewBucketNo;
 }
diff --git a/unittests/ADT/StringMapTest.cpp b/unittests/ADT/StringMapTest.cpp
index 70eec873ed23..215d3dfa02e5 100644
--- a/unittests/ADT/StringMapTest.cpp
+++ b/unittests/ADT/StringMapTest.cpp
@@ -10,6 +10,7 @@
 #include "gtest/gtest.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/DataTypes.h"
+#include <tuple>
 using namespace llvm;
 
 namespace {
@@ -203,6 +204,43 @@ TEST_F(StringMapTest, InsertTest) {
   assertSingleItemMap();
 }
 
+// Test insert(pair<K, V>) method
+TEST_F(StringMapTest, InsertPairTest) {
+  bool Inserted;
+  StringMap<uint32_t>::iterator NewIt;
+  std::tie(NewIt, Inserted) =
+      testMap.insert(std::make_pair(testKeyFirst, testValue));
+  EXPECT_EQ(1u, testMap.size());
+  EXPECT_EQ(testValue, testMap[testKeyFirst]);
+  EXPECT_EQ(testKeyFirst, NewIt->first());
+  EXPECT_EQ(testValue, NewIt->second);
+  EXPECT_TRUE(Inserted);
+
+  StringMap<uint32_t>::iterator ExistingIt;
+  std::tie(ExistingIt, Inserted) =
+      testMap.insert(std::make_pair(testKeyFirst, testValue + 1));
+  EXPECT_EQ(1u, testMap.size());
+  EXPECT_EQ(testValue, testMap[testKeyFirst]);
+  EXPECT_FALSE(Inserted);
+  EXPECT_EQ(NewIt, ExistingIt);
+}
+
+// Test insert(pair<K, V>) method when rehashing occurs
+TEST_F(StringMapTest, InsertRehashingPairTest) {
+  // Check that the correct iterator is returned when the inserted element is
+  // moved to a different bucket during internal rehashing. This depends on
+  // the particular key, and the implementation of StringMap and HashString.
+  // Changes to those might result in this test not actually checking that.
+  StringMap<uint32_t> t(1);
+  EXPECT_EQ(1u, t.getNumBuckets());
+
+  StringMap<uint32_t>::iterator It =
+    t.insert(std::make_pair("abcdef", 42)).first;
+  EXPECT_EQ(2u, t.getNumBuckets());
+  EXPECT_EQ("abcdef", It->first());
+  EXPECT_EQ(42u, It->second);
+}
+
 // Create a non-default constructable value
 struct StringMapTestStruct {
   StringMapTestStruct(int i) : i(i) {}

From e0d89ff76414a1369c54fbd261308e1c313548d7 Mon Sep 17 00:00:00 2001
From: Zachary Turner <zturner@google.com>
Date: Thu, 19 Jun 2014 20:20:03 +0000
Subject: [PATCH 132/157] Include Threading.h instead of forward declaring a
 function.

Previously this led to a circular header dependency, but a recent
change has since removed this dependency, so the correct fix is
to simply include the header rather than forward declare.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211311 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/Mutex.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h
index f340051361b0..496a4381f3fc 100644
--- a/include/llvm/Support/Mutex.h
+++ b/include/llvm/Support/Mutex.h
@@ -15,13 +15,11 @@
 #define LLVM_SUPPORT_MUTEX_H
 
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Threading.h"
 #include <cassert>
 
 namespace llvm
 {
-  // Forward declare this function.
-  bool llvm_is_multithreaded();
-
   namespace sys
   {
     /// @brief Platform agnostic Mutex class.

From d2f9358649478df80d688e77858dee14e37aebed Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Thu, 19 Jun 2014 21:03:04 +0000
Subject: [PATCH 133/157] Add a new subtarget hook for whether or not we'd like
 to enable the atomic load linked expander pass to run for a particular
 subtarget. This requires a check of the subtarget and so save the
 TargetMachine rather than only TargetLoweringInfo and update all callers.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211314 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Target/TargetSubtargetInfo.h  |  3 ++
 lib/CodeGen/AtomicExpandLoadLinkedPass.cpp | 33 +++++++++++++---------
 lib/Target/ARM/ARMSubtarget.cpp            |  4 +++
 lib/Target/ARM/ARMSubtarget.h              |  3 ++
 lib/Target/ARM/ARMTargetMachine.cpp        | 13 ++++-----
 lib/Target/TargetSubtargetInfo.cpp         |  4 +++
 6 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h
index bb164288b013..83f1997bf205 100644
--- a/include/llvm/Target/TargetSubtargetInfo.h
+++ b/include/llvm/Target/TargetSubtargetInfo.h
@@ -73,6 +73,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
   /// MISchedPostRA, is set.
   virtual bool enablePostMachineScheduler() const;
 
+  /// \brief True if the subtarget should run the atomic expansion pass.
+  virtual bool enableAtomicExpandLoadLinked() const;
+
   /// \brief Override generic scheduling policy within a region.
   ///
   /// This is a convenient way for targets that don't provide any custom
diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
index 4c4150bfec90..c7cc4bcb1cb6 100644
--- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
+++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
@@ -21,17 +21,19 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "arm-atomic-expand"
 
 namespace {
   class AtomicExpandLoadLinked : public FunctionPass {
-    const TargetLowering *TLI;
+    const TargetMachine *TM;
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) {
+      : FunctionPass(ID), TM(TM) {
       initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
     }
 
@@ -59,7 +61,7 @@ FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
 }
 
 bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
-  if (!TLI)
+  if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked())
     return false;
 
   SmallVector<Instruction *, 1> AtomicInsts;
@@ -76,7 +78,7 @@ bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
 
   bool MadeChange = false;
   for (Instruction *Inst : AtomicInsts) {
-    if (!TLI->shouldExpandAtomicInIR(Inst))
+    if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst))
       continue;
 
     if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
@@ -98,13 +100,14 @@ bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
   // Load instructions don't actually need a leading fence, even in the
   // SequentiallyConsistent case.
   AtomicOrdering MemOpOrder =
-    TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
+      TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic
+                                                          : LI->getOrdering();
 
   // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
   // an ldrexd (A3.5.3).
   IRBuilder<> Builder(LI);
-  Value *Val =
-      TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
+  Value *Val = TM->getTargetLowering()->emitLoadLinked(
+      Builder, LI->getPointerOperand(), MemOpOrder);
 
   insertTrailingFence(Builder, LI->getOrdering());
 
@@ -165,7 +168,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+  Value *Loaded =
+      TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
 
   Value *NewVal;
   switch (AI->getOperation()) {
@@ -211,8 +215,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
     llvm_unreachable("Unknown atomic op");
   }
 
-  Value *StoreSuccess =
-      TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
+  Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
+      Builder, NewVal, Addr, MemOpOrder);
   Value *TryAgain = Builder.CreateICmpNE(
       StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
   Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
@@ -278,7 +282,8 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+  Value *Loaded =
+      TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
   Value *ShouldStore =
       Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
 
@@ -287,7 +292,7 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
 
   Builder.SetInsertPoint(TryStoreBB);
-  Value *StoreSuccess = TLI->emitStoreConditional(
+  Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
       Builder, CI->getNewValOperand(), Addr, MemOpOrder);
   StoreSuccess = Builder.CreateICmpEQ(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
@@ -352,7 +357,7 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
 AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
                                                        AtomicOrdering Ord) {
-  if (!TLI->getInsertFencesForAtomic())
+  if (!TM->getTargetLowering()->getInsertFencesForAtomic())
     return Ord;
 
   if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
@@ -365,7 +370,7 @@ AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
 
 void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
                                               AtomicOrdering Ord) {
-  if (!TLI->getInsertFencesForAtomic())
+  if (!TM->getTargetLowering()->getInsertFencesForAtomic())
     return;
 
   if (Ord == Acquire || Ord == AcquireRelease)
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 875f1e83f875..e7d699c75614 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -417,6 +417,10 @@ bool ARMSubtarget::enablePostMachineScheduler() const {
   return PostRAScheduler;
 }
 
+bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
+  return hasAnyDataBarrier() && !isThumb1Only();
+}
+
 bool ARMSubtarget::enablePostRAScheduler(
            CodeGenOpt::Level OptLevel,
            TargetSubtargetInfo::AntiDepBreakMode& Mode,
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index ae62a6f876fd..c13ef865389e 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -425,6 +425,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
                              TargetSubtargetInfo::AntiDepBreakMode& Mode,
                              RegClassVector& CriticalPathRCs) const override;
 
+  // enableAtomicExpandLoadLinked - True if we need to expand our atomics.
+  bool enableAtomicExpandLoadLinked() const override;
+
   /// getInstrItins - Return the instruction itineraies based on subtarget
   /// selection.
   const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 7a3836ae62b4..434d3b03ae48 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -171,16 +171,15 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 void ARMPassConfig::addIRPasses() {
-  const ARMSubtarget *Subtarget = &getARMSubtarget();
-  if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
-    addPass(createAtomicExpandLoadLinkedPass(TM));
+  addPass(createAtomicExpandLoadLinkedPass(TM));
 
-    // Cmpxchg instructions are often used with a subsequent comparison to
-    // determine whether it succeeded. We can exploit existing control-flow in
-    // ldrex/strex loops to simplify this, but it needs tidying up.
+  // Cmpxchg instructions are often used with a subsequent comparison to
+  // determine whether it succeeded. We can exploit existing control-flow in
+  // ldrex/strex loops to simplify this, but it needs tidying up.
+  const ARMSubtarget *Subtarget = &getARMSubtarget();
+  if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
     if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
       addPass(createCFGSimplificationPass());
-  }
 
   TargetPassConfig::addIRPasses();
 }
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 0c388f8fb26c..ce00fcc62737 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -39,6 +39,10 @@ bool TargetSubtargetInfo::useMachineScheduler() const {
   return enableMachineScheduler();
 }
 
+bool TargetSubtargetInfo::enableAtomicExpandLoadLinked() const {
+  return true;
+}
+
 bool TargetSubtargetInfo::enableMachineScheduler() const {
   return false;
 }

From 0b5745abd8e46ac60cd16f949a2fd5a581fc8fd4 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 19 Jun 2014 21:14:13 +0000
Subject: [PATCH 134/157] Use lib/LTO directly in the gold plugin.

The tools/lto API is not the best choice for implementing a gold plugin. Among
other issues:

* It is an stable ABI. Old errors stay and we have to be really careful
  before adding new features.
* It has to support two fairly different linkers: gold and ld64.
* We end up with a plugin that depends on a shared lib, something quiet
  unusual in LLVM land.
* It hides LLVM. For some features in the gold plugin it would be really
  nice to be able to just get a Module or a GlobalValue.

This change is intended to be a very direct translation from the C API. It
will just enable other fixes and cleanups.

Tested with a LTO bootstrap on linux.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211315 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/gold/CMakeLists.txt  |  7 +--
 tools/gold/gold-plugin.cpp | 93 +++++++++++++++++++++++++-------------
 2 files changed, 65 insertions(+), 35 deletions(-)

diff --git a/tools/gold/CMakeLists.txt b/tools/gold/CMakeLists.txt
index 07a1e2849b95..3864e154548c 100644
--- a/tools/gold/CMakeLists.txt
+++ b/tools/gold/CMakeLists.txt
@@ -14,13 +14,14 @@ else()
   # ABI compatibility.
   add_definitions( -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 )
 
-  set(LLVM_LINK_COMPONENTS support)
+  set(LLVM_LINK_COMPONENTS
+     ${LLVM_TARGETS_TO_BUILD}
+     LTO
+     )
 
   add_llvm_loadable_module(LLVMgold
     gold-plugin.cpp
     )
 
-  target_link_libraries(LLVMgold ${cmake_2_8_12_PRIVATE} LTO)
-
 endif()
 
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index a056c61817cf..f4b5caeeff19 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -15,11 +15,15 @@
 #include "llvm/Config/config.h" // plugin-api.h requires HAVE_STDINT_H
 #include "llvm-c/lto.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/LTO/LTOCodeGenerator.h"
+#include "llvm/LTO/LTOModule.h"
 #include "llvm/Support/Errno.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include <cerrno>
 #include <cstdlib>
@@ -72,9 +76,10 @@ namespace {
   std::string output_name = "";
   std::list<claimed_file> Modules;
   std::vector<std::string> Cleanup;
-  lto_code_gen_t code_gen = NULL;
+  LTOCodeGenerator *CodeGen = nullptr;
   StringSet<> CannotBeHidden;
 }
+static llvm::TargetOptions TargetOpts;
 
 namespace options {
   enum generate_bc { BC_NO, BC_ALSO, BC_ONLY };
@@ -142,6 +147,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
   // for services.
 
   bool registeredClaimFile = false;
+  bool RegisteredAllSymbolsRead = false;
 
   for (; tv->tv_tag != LDPT_NULL; ++tv) {
     switch (tv->tv_tag) {
@@ -189,7 +195,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
         if ((*callback)(all_symbols_read_hook) != LDPS_OK)
           return LDPS_ERR;
 
-        code_gen = lto_codegen_create();
+        RegisteredAllSymbolsRead = true;
       } break;
       case LDPT_REGISTER_CLEANUP_HOOK: {
         ld_plugin_register_cleanup callback;
@@ -233,6 +239,28 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
     return LDPS_ERR;
   }
 
+  if (RegisteredAllSymbolsRead) {
+    InitializeAllTargetInfos();
+    InitializeAllTargets();
+    InitializeAllTargetMCs();
+    InitializeAllAsmParsers();
+    InitializeAllAsmPrinters();
+    InitializeAllDisassemblers();
+    TargetOpts = InitTargetOptionsFromCodeGenFlags();
+    CodeGen = new LTOCodeGenerator();
+    CodeGen->setTargetOptions(TargetOpts);
+    if (MAttrs.size()) {
+      std::string Attrs;
+      for (unsigned I = 0; I < MAttrs.size(); ++I) {
+        if (I > 0)
+          Attrs.append(",");
+        Attrs.append(MAttrs[I]);
+      }
+
+      CodeGen->setAttr(Attrs.c_str());
+    }
+  }
+
   return LDPS_OK;
 }
 
@@ -241,7 +269,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
 /// with add_symbol if possible.
 static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
                                         int *claimed) {
-  lto_module_t M;
+  LTOModule *M;
   const void *view;
   std::unique_ptr<MemoryBuffer> buffer;
   if (get_view) {
@@ -264,17 +292,15 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
     view = buffer->getBufferStart();
   }
 
-  if (!lto_module_is_object_file_in_memory(view, file->filesize))
+  if (!LTOModule::isBitcodeFile(view, file->filesize))
     return LDPS_OK;
 
-  M = lto_module_create_from_memory(view, file->filesize);
+  std::string Error;
+  M = LTOModule::makeLTOModule(view, file->filesize, TargetOpts, Error);
   if (!M) {
-    if (const char* msg = lto_get_error_message()) {
-      (*message)(LDPL_ERROR,
-                 "LLVM gold plugin has failed to create LTO module: %s",
-                 msg);
-      return LDPS_ERR;
-    }
+    (*message)(LDPL_ERROR,
+               "LLVM gold plugin has failed to create LTO module: %s",
+               Error.c_str());
     return LDPS_OK;
   }
 
@@ -283,20 +309,20 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
   claimed_file &cf = Modules.back();
 
   if (!options::triple.empty())
-    lto_module_set_target_triple(M, options::triple.c_str());
+    M->setTargetTriple(options::triple.c_str());
 
   cf.handle = file->handle;
-  unsigned sym_count = lto_module_get_num_symbols(M);
+  unsigned sym_count = M->getSymbolCount();
   cf.syms.reserve(sym_count);
 
   for (unsigned i = 0; i != sym_count; ++i) {
-    lto_symbol_attributes attrs = lto_module_get_symbol_attribute(M, i);
+    lto_symbol_attributes attrs = M->getSymbolAttributes(i);
     if ((attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL)
       continue;
 
     cf.syms.push_back(ld_plugin_symbol());
     ld_plugin_symbol &sym = cf.syms.back();
-    sym.name = const_cast<char *>(lto_module_get_symbol_name(M, i));
+    sym.name = const_cast<char *>(M->getSymbolName(i));
     sym.name = strdup(sym.name);
     sym.version = NULL;
 
@@ -359,15 +385,15 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
     }
   }
 
-  if (code_gen) {
-    if (lto_codegen_add_module(code_gen, M)) {
-      (*message)(LDPL_ERROR, "Error linking module: %s",
-                 lto_get_error_message());
+  if (CodeGen) {
+    std::string Error;
+    if (!CodeGen->addModule(M, Error)) {
+      (*message)(LDPL_ERROR, "Error linking module: %s", Error.c_str());
       return LDPS_ERR;
     }
   }
 
-  lto_module_dispose(M);
+  delete M;
 
   return LDPS_OK;
 }
@@ -386,7 +412,7 @@ static bool mustPreserve(const claimed_file &F, int i) {
 /// codegen.
 static ld_plugin_status all_symbols_read_hook(void) {
   std::ofstream api_file;
-  assert(code_gen);
+  assert(CodeGen);
 
   if (options::generate_api_file) {
     api_file.open("apifile.txt", std::ofstream::out | std::ofstream::trunc);
@@ -403,7 +429,7 @@ static ld_plugin_status all_symbols_read_hook(void) {
     (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]);
     for (unsigned i = 0, e = I->syms.size(); i != e; i++) {
       if (mustPreserve(*I, i)) {
-        lto_codegen_add_must_preserve_symbol(code_gen, I->syms[i].name);
+        CodeGen->addMustPreserveSymbol(I->syms[i].name);
 
         if (options::generate_api_file)
           api_file << I->syms[i].name << "\n";
@@ -414,16 +440,16 @@ static ld_plugin_status all_symbols_read_hook(void) {
   if (options::generate_api_file)
     api_file.close();
 
-  lto_codegen_set_pic_model(code_gen, output_type);
-  lto_codegen_set_debug_model(code_gen, LTO_DEBUG_MODEL_DWARF);
+  CodeGen->setCodePICModel(output_type);
+  CodeGen->setDebugInfo(LTO_DEBUG_MODEL_DWARF);
   if (!options::mcpu.empty())
-    lto_codegen_set_cpu(code_gen, options::mcpu.c_str());
+    CodeGen->setCpu(options::mcpu.c_str());
 
   // Pass through extra options to the code generator.
   if (!options::extra.empty()) {
     for (std::vector<std::string>::iterator it = options::extra.begin();
          it != options::extra.end(); ++it) {
-      lto_codegen_debug_options(code_gen, (*it).c_str());
+      CodeGen->setCodeGenDebugOptions((*it).c_str());
     }
   }
 
@@ -435,11 +461,12 @@ static ld_plugin_status all_symbols_read_hook(void) {
       path = options::bc_path;
     else
       path = output_name + ".bc";
-    bool err = lto_codegen_write_merged_modules(code_gen, path.c_str());
-    if (err)
+    CodeGen->parseCodeGenDebugOptions();
+    std::string Error;
+    if (!CodeGen->writeMergedModules(path.c_str(), Error))
       (*message)(LDPL_FATAL, "Failed to write the output file.");
     if (options::generate_bc_file == options::BC_ONLY) {
-      lto_codegen_dispose(code_gen);
+      delete CodeGen;
       exit(0);
     }
   }
@@ -447,13 +474,15 @@ static ld_plugin_status all_symbols_read_hook(void) {
   std::string ObjPath;
   {
     const char *Temp;
-    if (lto_codegen_compile_to_file(code_gen, &Temp)) {
+    CodeGen->parseCodeGenDebugOptions();
+    std::string Error;
+    if (!CodeGen->compile_to_file(&Temp, /*DisableOpt*/ false, /*DisableInline*/
+                                  false, /*DisableGVNLoadPRE*/ false, Error))
       (*message)(LDPL_ERROR, "Could not produce a combined object file\n");
-    }
     ObjPath = Temp;
   }
 
-  lto_codegen_dispose(code_gen);
+  delete CodeGen;
   for (std::list<claimed_file>::iterator I = Modules.begin(),
          E = Modules.end(); I != E; ++I) {
     for (unsigned i = 0; i != I->syms.size(); ++i) {

From c28016e4131bdd50c23510adb3a0254b7fb5adc6 Mon Sep 17 00:00:00 2001
From: Kevin Enderby <enderby@apple.com>
Date: Thu, 19 Jun 2014 22:03:18 +0000
Subject: [PATCH 135/157] =?UTF-8?q?Change=20the=20output=20of=20llvm-nm=20?=
 =?UTF-8?q?and=20llvm-size=20for=20Mach-O=20universal=20files=20(aka=20fat?=
 =?UTF-8?q?=20files)=20to=20print=20=E2=80=9C=20(for=20architecture=20XYZ)?=
 =?UTF-8?q?=E2=80=9D=20for=20fat=20files=20with=20more=20than=20one=20arch?=
 =?UTF-8?q?itecture=20to=20be=20like=20what=20the=20darwin=20tools=20do=20?=
 =?UTF-8?q?for=20fat=20files.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also clean up the Mach-O printing of archive membernames in llvm-nm to use
the darwin form of "libx.a(foo.o)".


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211316 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Object/MachOUniversal.h |  4 ++++
 lib/Object/MachOUniversal.cpp        |  8 ++------
 test/Object/nm-universal-binary.test |  8 ++++----
 test/Object/size-trivial-macho.test  |  8 ++++----
 tools/llvm-nm/llvm-nm.cpp            | 22 ++++++++++++++++++----
 tools/llvm-size/llvm-size.cpp        | 19 +++++++++++++++----
 6 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h
index 47e93c26b46a..74448f973b2e 100644
--- a/include/llvm/Object/MachOUniversal.h
+++ b/include/llvm/Object/MachOUniversal.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Archive.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MachO.h"
 
@@ -52,6 +53,9 @@ class MachOUniversalBinary : public Binary {
 
     ObjectForArch getNext() const { return ObjectForArch(Parent, Index + 1); }
     uint32_t getCPUType() const { return Header.cputype; }
+    std::string getArchTypeName() const {
+      return Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype));
+    }
 
     std::error_code getAsObjectFile(std::unique_ptr<ObjectFile> &Result) const;
 
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index 05729ef7467e..887e2bd0a34b 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -72,9 +72,7 @@ std::error_code MachOUniversalBinary::ObjectForArch::getAsObjectFile(
   if (Parent) {
     StringRef ParentData = Parent->getData();
     StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
-    std::string ObjectName =
-        Parent->getFileName().str() + ":" +
-        Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype));
+    std::string ObjectName = Parent->getFileName().str();
     MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer(
         ObjectData, ObjectName, false);
     ErrorOr<ObjectFile *> Obj = ObjectFile::createMachOObjectFile(ObjBuffer);
@@ -91,9 +89,7 @@ std::error_code MachOUniversalBinary::ObjectForArch::getAsArchive(
   if (Parent) {
     StringRef ParentData = Parent->getData();
     StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
-    std::string ObjectName =
-        Parent->getFileName().str() + ":" +
-        Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype));
+    std::string ObjectName = Parent->getFileName().str();
     MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer(
         ObjectData, ObjectName, false);
     ErrorOr<Archive *> Obj = Archive::create(ObjBuffer);
diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test
index c20c733dcd8b..52781267c5cb 100644
--- a/test/Object/nm-universal-binary.test
+++ b/test/Object/nm-universal-binary.test
@@ -3,17 +3,17 @@ RUN:         | FileCheck %s -check-prefix CHECK-OBJ
 RUN: llvm-nm %p/Inputs/macho-universal-archive.x86_64.i386 \
 RUN:         | FileCheck %s -check-prefix CHECK-AR
 
-CHECK-OBJ: macho-universal.x86_64.i386:x86_64
+CHECK-OBJ: macho-universal.x86_64.i386 (for architecture x86_64):
 CHECK-OBJ: 0000000100000f60 T _main
-CHECK-OBJ: macho-universal.x86_64.i386:i386
+CHECK-OBJ: macho-universal.x86_64.i386 (for architecture i386):
 CHECK-OBJ: 00001fa0 T _main
 
-CHECK-AR: macho-universal-archive.x86_64.i386:x86_64:hello.o:
+CHECK-AR: macho-universal-archive.x86_64.i386(hello.o) (for architecture x86_64):
 CHECK-AR: 0000000000000068 s EH_frame0
 CHECK-AR: 000000000000003b s L_.str
 CHECK-AR: 0000000000000000 T _main
 CHECK-AR: 0000000000000080 S _main.eh
 CHECK-AR:                  U _printf
-CHECK-AR: macho-universal-archive.x86_64.i386:i386:foo.o:
+CHECK-AR: macho-universal-archive.x86_64.i386(foo.o) (for architecture i386):
 CHECK-AR: 00000008 S _bar
 CHECK-AR: 00000000 T _foo
diff --git a/test/Object/size-trivial-macho.test b/test/Object/size-trivial-macho.test
index 1ed611b0de22..6602d5651860 100644
--- a/test/Object/size-trivial-macho.test
+++ b/test/Object/size-trivial-macho.test
@@ -71,9 +71,9 @@ mxl: Segment __LINKEDIT: 0x1000 (vmaddr 0x100002000 fileoff 8192)
 mxl: total 0x100003000
 
 u: __TEXT	__DATA	__OBJC	others	dec	hex
-u: 4096	0	0	4294971392	4294975488	100002000	{{.*}}/macho-universal.x86_64.i386:x86_64
-u: 4096	0	0	8192	12288	3000	{{.*}}/macho-universal.x86_64.i386:i386
+u: 4096	0	0	4294971392	4294975488	100002000	{{.*}}/macho-universal.x86_64.i386 (for architecture x86_64)
+u: 4096	0	0	8192	12288	3000	{{.*}}/macho-universal.x86_64.i386 (for architecture i386)
 
 uAR: __TEXT	__DATA	__OBJC	others	dec	hex
-uAR: 136	0	0	32	168	a8	{{.*}}/macho-universal-archive.x86_64.i386:x86_64(hello.o)
-uAR: 5	4	0	0	9	9	{{.*}}/macho-universal-archive.x86_64.i386:i386(foo.o)
+uAR: 136	0	0	32	168	a8	{{.*}}/macho-universal-archive.x86_64.i386(hello.o) (for architecture x86_64)
+uAR: 5	4	0	0	9	9	{{.*}}/macho-universal-archive.x86_64.i386(foo.o) (for architecture i386)
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index e605e6e430a7..c6b80d1dd965 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -752,20 +752,28 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
       if (ChildOrErr.getError())
         continue;
       if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
-        outs() << O->getFileName() << ":\n";
+        if (isa<MachOObjectFile>(O)) {
+          outs() << Filename << "(" << O->getFileName() << ")";
+        } else
+          outs() << O->getFileName();
+        outs() << ":\n";
         dumpSymbolNamesFromObject(O);
       }
     }
     return;
   }
   if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin.get())) {
+    bool moreThanOneArch = UB->getNumberOfObjects() > 1;
     for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
                                                E = UB->end_objects();
          I != E; ++I) {
       std::unique_ptr<ObjectFile> Obj;
       std::unique_ptr<Archive> A;
       if (!I->getAsObjectFile(Obj)) {
-        outs() << Obj->getFileName() << ":\n";
+        outs() << Obj->getFileName();
+        if (isa<MachOObjectFile>(Obj.get()) && moreThanOneArch)
+          outs() << " (for architecture " << I->getArchTypeName() << ")";
+        outs() << ":\n";
         dumpSymbolNamesFromObject(Obj.get());
       }
       else if (!I->getAsArchive(A)) {
@@ -776,8 +784,14 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
           if (ChildOrErr.getError())
             continue;
           if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
-            outs() << A->getFileName() << ":";
-            outs() << O->getFileName() << ":\n";
+            outs() << A->getFileName();
+            if (isa<MachOObjectFile>(O)) {
+              outs() << "(" << O->getFileName() << ")";
+              if (moreThanOneArch)
+                outs() << " (for architecture " << I->getArchTypeName() << ")";
+            } else
+              outs() << ":" << O->getFileName();
+            outs() << ":\n";
             dumpSymbolNamesFromObject(O);
           }
         }
diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp
index b3aaac8d27dd..408bb4a18804 100644
--- a/tools/llvm-size/llvm-size.cpp
+++ b/tools/llvm-size/llvm-size.cpp
@@ -469,13 +469,20 @@ static void PrintFileSectionSizes(StringRef file) {
       std::unique_ptr<Archive> UA;
       if (!I->getAsObjectFile(UO)) {
         if (ObjectFile *o = dyn_cast<ObjectFile>(&*UO.get())) {
+          MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o);
           if (OutputFormat == sysv)
             outs() << o->getFileName() << "  :\n";
+          else if(MachO && OutputFormat == darwin) {
+            if (moreThanOneFile || moreThanOneArch)
+              outs() << o->getFileName() << " (for architecture "
+                     << I->getArchTypeName() << "):";
+            outs() << "\n";
+          }
           PrintObjectSectionSizes(o);
           if (OutputFormat == berkeley) {
-            MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o);
             if (!MachO || moreThanOneFile || moreThanOneArch)
-              outs() << o->getFileName();
+              outs() << o->getFileName() << " (for architecture "
+                     << I->getArchTypeName() << ")";
             outs() << "\n";
           }
         }
@@ -495,11 +502,15 @@ static void PrintFileSectionSizes(StringRef file) {
               outs() << o->getFileName() << "   (ex " << UA->getFileName()
                      << "):\n";
             else if(MachO && OutputFormat == darwin)
-              outs() << UA->getFileName() << "(" << o->getFileName() << "):\n";
+              outs() << UA->getFileName() << "(" << o->getFileName() << ")"
+                     << " (for architecture " << I->getArchTypeName()
+                     << "):\n";
             PrintObjectSectionSizes(o);
             if (OutputFormat == berkeley) {
               if (MachO)
-                outs() << UA->getFileName() << "(" << o->getFileName() << ")\n";
+                outs() << UA->getFileName() << "(" << o->getFileName() << ")"
+                       << " (for architecture " << I->getArchTypeName()
+                       << ")\n";
               else
                 outs() << o->getFileName() << " (ex " << UA->getFileName()
                        << ")\n";

From 9eb38b233e5353a80c7d99676a08fe87d87debdb Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 19 Jun 2014 22:14:12 +0000
Subject: [PATCH 136/157] Set missing options in
 LTOCodeGenerator::setTargetOptions.

Patch by Tom Roeder, I just added the test.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211317 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/LTO/LTOCodeGenerator.cpp |  5 +++++
 test/LTO/jump-table-type.ll  | 23 +++++++++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 test/LTO/jump-table-type.ll

diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 9009958613ed..2772676c8e8c 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -141,6 +141,11 @@ void LTOCodeGenerator::setTargetOptions(TargetOptions options) {
   Options.TrapFuncName = options.TrapFuncName;
   Options.PositionIndependentExecutable = options.PositionIndependentExecutable;
   Options.UseInitArray = options.UseInitArray;
+  Options.DataSections = options.DataSections;
+  Options.FunctionSections = options.FunctionSections;
+
+  Options.MCOptions = options.MCOptions;
+  Options.JTType = options.JTType;
 }
 
 void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) {
diff --git a/test/LTO/jump-table-type.ll b/test/LTO/jump-table-type.ll
new file mode 100644
index 000000000000..a39d3e959830
--- /dev/null
+++ b/test/LTO/jump-table-type.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as <%s >%t1
+; RUN: llvm-lto -o %t2 %t1 -jump-table-type=arity
+; RUN: llvm-nm %t2 | FileCheck %s
+
+; CHECK: T __llvm_jump_instr_table_0_1
+; CHECK: T __llvm_jump_instr_table_1_1
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @g(i32 %a) unnamed_addr jumptable {
+  ret i32 %a
+}
+
+define i32 @f() unnamed_addr jumptable {
+  ret i32 0
+}
+
+define i32 @main() {
+  ret i32 0
+}
+
+@llvm.used = appending global [2 x i8*]  [i8* bitcast (i32(i32)* @g to i8*),
+                                          i8* bitcast (i32()* @f to i8*)]

From 474f2238604c3e1c81e2094acf53726e43c96fd3 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 19 Jun 2014 22:20:07 +0000
Subject: [PATCH 137/157] Reduce indentation. No functionality change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211318 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/gold/gold-plugin.cpp | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index f4b5caeeff19..e21a82373f7d 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -239,26 +239,26 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
     return LDPS_ERR;
   }
 
-  if (RegisteredAllSymbolsRead) {
-    InitializeAllTargetInfos();
-    InitializeAllTargets();
-    InitializeAllTargetMCs();
-    InitializeAllAsmParsers();
-    InitializeAllAsmPrinters();
-    InitializeAllDisassemblers();
-    TargetOpts = InitTargetOptionsFromCodeGenFlags();
-    CodeGen = new LTOCodeGenerator();
-    CodeGen->setTargetOptions(TargetOpts);
-    if (MAttrs.size()) {
-      std::string Attrs;
-      for (unsigned I = 0; I < MAttrs.size(); ++I) {
-        if (I > 0)
-          Attrs.append(",");
-        Attrs.append(MAttrs[I]);
-      }
+  if (!RegisteredAllSymbolsRead)
+    return LDPS_OK;
 
-      CodeGen->setAttr(Attrs.c_str());
+  InitializeAllTargetInfos();
+  InitializeAllTargets();
+  InitializeAllTargetMCs();
+  InitializeAllAsmParsers();
+  InitializeAllAsmPrinters();
+  InitializeAllDisassemblers();
+  TargetOpts = InitTargetOptionsFromCodeGenFlags();
+  CodeGen = new LTOCodeGenerator();
+  CodeGen->setTargetOptions(TargetOpts);
+  if (MAttrs.size()) {
+    std::string Attrs;
+    for (unsigned I = 0; I < MAttrs.size(); ++I) {
+      if (I > 0)
+        Attrs.append(",");
+      Attrs.append(MAttrs[I]);
     }
+    CodeGen->setAttr(Attrs.c_str());
   }
 
   return LDPS_OK;

From 5c2871c78139ed8bee12552f8c8c3ff9afe6f60e Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 19 Jun 2014 22:27:46 +0000
Subject: [PATCH 138/157] Use the assignment operator.

No functionality change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211319 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/LTO/LTOCodeGenerator.cpp | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 2772676c8e8c..a1709f60fff2 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -124,28 +124,7 @@ bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
 }
 
 void LTOCodeGenerator::setTargetOptions(TargetOptions options) {
-  Options.LessPreciseFPMADOption = options.LessPreciseFPMADOption;
-  Options.NoFramePointerElim = options.NoFramePointerElim;
-  Options.AllowFPOpFusion = options.AllowFPOpFusion;
-  Options.UnsafeFPMath = options.UnsafeFPMath;
-  Options.NoInfsFPMath = options.NoInfsFPMath;
-  Options.NoNaNsFPMath = options.NoNaNsFPMath;
-  Options.HonorSignDependentRoundingFPMathOption =
-    options.HonorSignDependentRoundingFPMathOption;
-  Options.UseSoftFloat = options.UseSoftFloat;
-  Options.FloatABIType = options.FloatABIType;
-  Options.NoZerosInBSS = options.NoZerosInBSS;
-  Options.GuaranteedTailCallOpt = options.GuaranteedTailCallOpt;
-  Options.DisableTailCalls = options.DisableTailCalls;
-  Options.StackAlignmentOverride = options.StackAlignmentOverride;
-  Options.TrapFuncName = options.TrapFuncName;
-  Options.PositionIndependentExecutable = options.PositionIndependentExecutable;
-  Options.UseInitArray = options.UseInitArray;
-  Options.DataSections = options.DataSections;
-  Options.FunctionSections = options.FunctionSections;
-
-  Options.MCOptions = options.MCOptions;
-  Options.JTType = options.JTType;
+  Options = options;
 }
 
 void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) {

From 7c9dcfb29e5095d560206973c40e6a33ba108c21 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 19 Jun 2014 22:33:23 +0000
Subject: [PATCH 139/157] Simplify. No functionality change.

Thanks to Alp Toker for noticing it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211320 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/gold/gold-plugin.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index e21a82373f7d..2cde5d1c579c 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -322,8 +322,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
 
     cf.syms.push_back(ld_plugin_symbol());
     ld_plugin_symbol &sym = cf.syms.back();
-    sym.name = const_cast<char *>(M->getSymbolName(i));
-    sym.name = strdup(sym.name);
+    sym.name = strdup(M->getSymbolName(i));
     sym.version = NULL;
 
     int scope = attrs & LTO_SYMBOL_SCOPE_MASK;

From f7b992bdcd05fd6844f96e0431505d0b9c9956f9 Mon Sep 17 00:00:00 2001
From: Kevin Enderby <enderby@apple.com>
Date: Thu, 19 Jun 2014 22:49:21 +0000
Subject: [PATCH 140/157] =?UTF-8?q?Fix=20the=20output=20of=20llvm-nm=20for?=
 =?UTF-8?q?=20Mach-O=20files=20to=20use=20the=20characters=20=E2=80=98d?=
 =?UTF-8?q?=E2=80=99=20and=20=E2=80=98b=E2=80=99=20for=20data=20and=20bss?=
 =?UTF-8?q?=20symbols=20instead=20of=20the=20generic=20=E2=80=99s=E2=80=99?=
 =?UTF-8?q?=20for=20a=20symbol=20in=20a=20section.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211321 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/Object/nm-trivial-object.test   | 7 +++++++
 test/Object/nm-universal-binary.test | 2 +-
 tools/llvm-nm/llvm-nm.cpp            | 4 ++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index 20ac6621e728..c53dc91ecd4c 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -14,6 +14,8 @@ RUN: llvm-nm %p/Inputs/trivial-object-test.macho-i386 \
 RUN:         | FileCheck %s -check-prefix macho
 RUN: llvm-nm %p/Inputs/trivial-object-test.macho-x86-64 \
 RUN:         | FileCheck %s -check-prefix macho64
+RUN: llvm-nm %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix macho-tdb
 RUN: llvm-nm %p/Inputs/common.coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF-COMMON
 RUN: llvm-nm %p/Inputs/relocatable-with-section-address.elf-x86-64 \
@@ -64,6 +66,11 @@ macho64:                  U _SomeOtherFunction
 macho64: 0000000000000000 T _main
 macho64:                  U _puts
 
+macho-tdb: 0000000000000030 s EH_frame0
+macho-tdb: 0000000000000070 b _b
+macho-tdb: 000000000000000c D _d
+macho-tdb: 0000000000000000 T _t
+macho-tdb: 0000000000000048 S _t.eh
 
 Test that nm uses addresses even with ELF .o files.
 ELF-SEC-ADDR64:      0000000000000058 D a
diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test
index 52781267c5cb..8992359a79b6 100644
--- a/test/Object/nm-universal-binary.test
+++ b/test/Object/nm-universal-binary.test
@@ -15,5 +15,5 @@ CHECK-AR: 0000000000000000 T _main
 CHECK-AR: 0000000000000080 S _main.eh
 CHECK-AR:                  U _printf
 CHECK-AR: macho-universal-archive.x86_64.i386(foo.o) (for architecture i386):
-CHECK-AR: 00000008 S _bar
+CHECK-AR: 00000008 D _bar
 CHECK-AR: 00000000 T _foo
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index c6b80d1dd965..5062435d8971 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -577,6 +577,10 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
     StringRef SegmentName = Obj.getSectionFinalSegmentName(Ref);
     if (SegmentName == "__TEXT" && SectionName == "__text")
       return 't';
+    else if (SegmentName == "__DATA" && SectionName == "__data")
+      return 'd';
+    else if (SegmentName == "__DATA" && SectionName == "__bss")
+      return 'b';
     else
       return 's';
   }

From f38fdf1b612bf9e0a19d8d08d4283baa05334bcb Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 19 Jun 2014 22:54:47 +0000
Subject: [PATCH 141/157] Set gold plugin options in a sane order.

This fixes the  processing of --plugin-opt=-jump-table-type=arity.

Nice properties:
 * We call InitTargetOptionsFromCodeGenFlags once.
 * We call parseCodeGenDebugOptions once.
 * It works :-)

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211322 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/gold/gold-plugin.cpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 2cde5d1c579c..bb8b32b814f9 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -248,9 +248,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
   InitializeAllAsmParsers();
   InitializeAllAsmPrinters();
   InitializeAllDisassemblers();
-  TargetOpts = InitTargetOptionsFromCodeGenFlags();
   CodeGen = new LTOCodeGenerator();
-  CodeGen->setTargetOptions(TargetOpts);
   if (MAttrs.size()) {
     std::string Attrs;
     for (unsigned I = 0; I < MAttrs.size(); ++I) {
@@ -261,6 +259,18 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
     CodeGen->setAttr(Attrs.c_str());
   }
 
+  // Pass through extra options to the code generator.
+  if (!options::extra.empty()) {
+    for (std::vector<std::string>::iterator it = options::extra.begin();
+         it != options::extra.end(); ++it) {
+      CodeGen->setCodeGenDebugOptions((*it).c_str());
+    }
+  }
+
+  CodeGen->parseCodeGenDebugOptions();
+  TargetOpts = InitTargetOptionsFromCodeGenFlags();
+  CodeGen->setTargetOptions(TargetOpts);
+
   return LDPS_OK;
 }
 
@@ -444,14 +454,6 @@ static ld_plugin_status all_symbols_read_hook(void) {
   if (!options::mcpu.empty())
     CodeGen->setCpu(options::mcpu.c_str());
 
-  // Pass through extra options to the code generator.
-  if (!options::extra.empty()) {
-    for (std::vector<std::string>::iterator it = options::extra.begin();
-         it != options::extra.end(); ++it) {
-      CodeGen->setCodeGenDebugOptions((*it).c_str());
-    }
-  }
-
   if (options::generate_bc_file != options::BC_NO) {
     std::string path;
     if (options::generate_bc_file == options::BC_ONLY)
@@ -460,7 +462,6 @@ static ld_plugin_status all_symbols_read_hook(void) {
       path = options::bc_path;
     else
       path = output_name + ".bc";
-    CodeGen->parseCodeGenDebugOptions();
     std::string Error;
     if (!CodeGen->writeMergedModules(path.c_str(), Error))
       (*message)(LDPL_FATAL, "Failed to write the output file.");
@@ -473,7 +474,6 @@ static ld_plugin_status all_symbols_read_hook(void) {
   std::string ObjPath;
   {
     const char *Temp;
-    CodeGen->parseCodeGenDebugOptions();
     std::string Error;
     if (!CodeGen->compile_to_file(&Temp, /*DisableOpt*/ false, /*DisableInline*/
                                   false, /*DisableGVNLoadPRE*/ false, Error))

From 2ae686a2ab90def4f9fa38c1407bbe7f062068f3 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 19 Jun 2014 23:06:53 +0000
Subject: [PATCH 142/157] The gold plugin doesn't need disassemblers.

Back in r128440 tools/LTO started exporting the disassembler interface. It
was never clear why, but whatever the reason I am pretty sure it doesn't hold
for tools/gold.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211325 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/gold/gold-plugin.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index bb8b32b814f9..4e273de3404a 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -247,7 +247,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
   InitializeAllTargetMCs();
   InitializeAllAsmParsers();
   InitializeAllAsmPrinters();
-  InitializeAllDisassemblers();
   CodeGen = new LTOCodeGenerator();
   if (MAttrs.size()) {
     std::string Attrs;

From d1e64617ee1b76c28fa512d622f0b977144b300f Mon Sep 17 00:00:00 2001
From: Kevin Enderby <enderby@apple.com>
Date: Fri, 20 Jun 2014 00:04:16 +0000
Subject: [PATCH 143/157] Added the -m option as an alias for -format=darwin to
 llvm-nm and llvm-size which is what the darwin tools use for the Mach-O
 format output.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211326 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/Object/nm-darwin-m.test        | 2 +-
 test/Object/size-trivial-macho.test | 2 +-
 tools/llvm-nm/llvm-nm.cpp           | 3 +++
 tools/llvm-size/llvm-size.cpp       | 1 +
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/test/Object/nm-darwin-m.test b/test/Object/nm-darwin-m.test
index 6c718128aa61..5bb19dcacd37 100644
--- a/test/Object/nm-darwin-m.test
+++ b/test/Object/nm-darwin-m.test
@@ -2,7 +2,7 @@ RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test1.mach0-armv7 \
 RUN:         | FileCheck %s -check-prefix test1
 RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test2.macho-i386 \
 RUN:         | FileCheck %s -check-prefix test2
-RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test3.macho-x86-64 \
+RUN: llvm-nm -m %p/Inputs/darwin-m-test3.macho-x86-64 \
 RUN:         | FileCheck %s -check-prefix test3
 
 # This is testing that the various bits in the n_desc feild are correct
diff --git a/test/Object/size-trivial-macho.test b/test/Object/size-trivial-macho.test
index 6602d5651860..1642790c2c7b 100644
--- a/test/Object/size-trivial-macho.test
+++ b/test/Object/size-trivial-macho.test
@@ -8,7 +8,7 @@ RUN: llvm-size %p/Inputs/macho-archive-x86_64.a \
 RUN:         | FileCheck %s -check-prefix AR
 RUN: llvm-size -format darwin %p/Inputs/macho-archive-x86_64.a \
 RUN:         | FileCheck %s -check-prefix mAR
-RUN: llvm-size -format darwin -x -l %p/Inputs/hello-world.macho-x86_64 \
+RUN: llvm-size -m -x -l %p/Inputs/hello-world.macho-x86_64 \
 RUN:         | FileCheck %s -check-prefix mxl
 RUN: llvm-size %p/Inputs/macho-universal.x86_64.i386 \
 RUN:         | FileCheck %s -check-prefix u
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 5062435d8971..cccddb0736d1 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -81,6 +81,7 @@ cl::alias ExternalOnly2("g", cl::desc("Alias for --extern-only"),
 
 cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"));
 cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"));
+cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"));
 
 cl::opt<bool> PrintFileName(
     "print-file-name",
@@ -828,6 +829,8 @@ int main(int argc, char **argv) {
     OutputFormat = bsd;
   if (POSIXFormat)
     OutputFormat = posix;
+  if (DarwinFormat)
+    OutputFormat = darwin;
 
   // The relative order of these is important. If you pass --size-sort it should
   // only print out the size. However, if you pass -S --size-sort, it should
diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp
index 408bb4a18804..b71380dcaa4a 100644
--- a/tools/llvm-size/llvm-size.cpp
+++ b/tools/llvm-size/llvm-size.cpp
@@ -46,6 +46,7 @@ static cl::opt<OutputFormatTy>
        OutputFormatShort(cl::desc("Specify output format"),
          cl::values(clEnumValN(sysv, "A", "System V format"),
                     clEnumValN(berkeley, "B", "Berkeley format"),
+                    clEnumValN(darwin, "m", "Darwin -m format"),
                     clEnumValEnd),
          cl::init(berkeley));
 

From 6fda71e05edc6447f780dee7fff8bd4bf543f39e Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Fri, 20 Jun 2014 00:23:03 +0000
Subject: [PATCH 144/157] Revert "Add StringMap::insert(pair) consistent with
 the standard associative container concept."

This reverts commit r211309.

It looks like it broke some bots:

http://lab.llvm.org:8011/builders/clang-x86_64-ubuntu-gdb-75/builds/15563/steps/compile/logs/stdio

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211328 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/ADT/StringMap.h    | 43 +++++++++++++++------------------
 lib/Support/StringMap.cpp       | 10 ++------
 unittests/ADT/StringMapTest.cpp | 38 -----------------------------
 3 files changed, 21 insertions(+), 70 deletions(-)

diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index c40e5e2b3d87..5b1868187986 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -64,7 +64,7 @@ class StringMapImpl {
   }
 
   StringMapImpl(unsigned InitSize, unsigned ItemSize);
-  unsigned RehashTable(unsigned BucketNo = 0);
+  void RehashTable();
 
   /// LookupBucketFor - Look up the bucket that the specified string should end
   /// up in.  If it already exists as a key in the map, the Item pointer for the
@@ -323,28 +323,6 @@ class StringMap : public StringMapImpl {
     return true;
   }
 
-  /// insert - Inserts the specified key/value pair into the map if the key
-  /// isn't already in the map. The bool component of the returned pair is true
-  /// if and only if the insertion takes place, and the iterator component of
-  /// the pair points to the element with key equivalent to the key of the pair.
-  std::pair<iterator, bool> insert(std::pair<StringRef, ValueTy> KV) {
-    unsigned BucketNo = LookupBucketFor(KV.first);
-    StringMapEntryBase *&Bucket = TheTable[BucketNo];
-    if (Bucket && Bucket != getTombstoneVal())
-      return std::make_pair(iterator(TheTable + BucketNo, false),
-                            false); // Already exists in map.
-
-    if (Bucket == getTombstoneVal())
-      --NumTombstones;
-    Bucket =
-        MapEntryTy::Create(KV.first, Allocator, std::move(KV.second));
-    ++NumItems;
-    assert(NumItems + NumTombstones <= NumBuckets);
-
-    BucketNo = RehashTable(BucketNo);
-    return std::make_pair(iterator(TheTable + BucketNo, false), true);
-  }
-
   // clear - Empties out the StringMap
   void clear() {
     if (empty()) return;
@@ -368,7 +346,24 @@ class StringMap : public StringMapImpl {
   /// return.
   template <typename InitTy>
   MapEntryTy &GetOrCreateValue(StringRef Key, InitTy Val) {
-    return *insert(std::make_pair(Key, std::move(Val))).first;
+    unsigned BucketNo = LookupBucketFor(Key);
+    StringMapEntryBase *&Bucket = TheTable[BucketNo];
+    if (Bucket && Bucket != getTombstoneVal())
+      return *static_cast<MapEntryTy*>(Bucket);
+
+    MapEntryTy *NewItem = MapEntryTy::Create(Key, Allocator, std::move(Val));
+
+    if (Bucket == getTombstoneVal())
+      --NumTombstones;
+    ++NumItems;
+    assert(NumItems + NumTombstones <= NumBuckets);
+
+    // Fill in the bucket for the hash table.  The FullHashValue was already
+    // filled in by LookupBucketFor.
+    Bucket = NewItem;
+
+    RehashTable();
+    return *NewItem;
   }
 
   MapEntryTy &GetOrCreateValue(StringRef Key) {
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index ddb73494ff5d..72a6d822d2b6 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -181,7 +181,7 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
 
 /// RehashTable - Grow the table, redistributing values into the buckets with
 /// the appropriate mod-of-hashtable-size.
-unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
+void StringMapImpl::RehashTable() {
   unsigned NewSize;
   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 
@@ -193,10 +193,9 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
   } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) {
     NewSize = NumBuckets;
   } else {
-    return BucketNo;
+    return;
   }
 
-  unsigned NewBucketNo = BucketNo;
   // Allocate one extra bucket which will always be non-empty.  This allows the
   // iterators to stop at end.
   StringMapEntryBase **NewTableArray =
@@ -216,8 +215,6 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
       if (!NewTableArray[NewBucket]) {
         NewTableArray[FullHash & (NewSize-1)] = Bucket;
         NewHashArray[FullHash & (NewSize-1)] = FullHash;
-        if (I == BucketNo)
-          NewBucketNo = NewBucket;
         continue;
       }
       
@@ -230,8 +227,6 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
       // Finally found a slot.  Fill it in.
       NewTableArray[NewBucket] = Bucket;
       NewHashArray[NewBucket] = FullHash;
-      if (I == BucketNo)
-        NewBucketNo = NewBucket;
     }
   }
   
@@ -240,5 +235,4 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
   TheTable = NewTableArray;
   NumBuckets = NewSize;
   NumTombstones = 0;
-  return NewBucketNo;
 }
diff --git a/unittests/ADT/StringMapTest.cpp b/unittests/ADT/StringMapTest.cpp
index 215d3dfa02e5..70eec873ed23 100644
--- a/unittests/ADT/StringMapTest.cpp
+++ b/unittests/ADT/StringMapTest.cpp
@@ -10,7 +10,6 @@
 #include "gtest/gtest.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/DataTypes.h"
-#include <tuple>
 using namespace llvm;
 
 namespace {
@@ -204,43 +203,6 @@ TEST_F(StringMapTest, InsertTest) {
   assertSingleItemMap();
 }
 
-// Test insert(pair<K, V>) method
-TEST_F(StringMapTest, InsertPairTest) {
-  bool Inserted;
-  StringMap<uint32_t>::iterator NewIt;
-  std::tie(NewIt, Inserted) =
-      testMap.insert(std::make_pair(testKeyFirst, testValue));
-  EXPECT_EQ(1u, testMap.size());
-  EXPECT_EQ(testValue, testMap[testKeyFirst]);
-  EXPECT_EQ(testKeyFirst, NewIt->first());
-  EXPECT_EQ(testValue, NewIt->second);
-  EXPECT_TRUE(Inserted);
-
-  StringMap<uint32_t>::iterator ExistingIt;
-  std::tie(ExistingIt, Inserted) =
-      testMap.insert(std::make_pair(testKeyFirst, testValue + 1));
-  EXPECT_EQ(1u, testMap.size());
-  EXPECT_EQ(testValue, testMap[testKeyFirst]);
-  EXPECT_FALSE(Inserted);
-  EXPECT_EQ(NewIt, ExistingIt);
-}
-
-// Test insert(pair<K, V>) method when rehashing occurs
-TEST_F(StringMapTest, InsertRehashingPairTest) {
-  // Check that the correct iterator is returned when the inserted element is
-  // moved to a different bucket during internal rehashing. This depends on
-  // the particular key, and the implementation of StringMap and HashString.
-  // Changes to those might result in this test not actually checking that.
-  StringMap<uint32_t> t(1);
-  EXPECT_EQ(1u, t.getNumBuckets());
-
-  StringMap<uint32_t>::iterator It =
-    t.insert(std::make_pair("abcdef", 42)).first;
-  EXPECT_EQ(2u, t.getNumBuckets());
-  EXPECT_EQ("abcdef", It->first());
-  EXPECT_EQ(42u, It->second);
-}
-
 // Create a non-default constructable value
 struct StringMapTestStruct {
   StringMapTestStruct(int i) : i(i) {}

From 160dcf5b61b8d328cd1705a90c1e0ae27dcabd41 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Fri, 20 Jun 2014 00:38:12 +0000
Subject: [PATCH 145/157] Don't build switch lookup tables for dllimport or TLS
 variables

We would previously put dllimport variables in switch lookup tables, which
doesn't work because the address cannot be used in a constant initializer.
This is basically the same problem that we have in PR19955.

Putting TLS variables in switch tables also desn't work, because the
address of such a variable is not constant.

Differential Revision: http://reviews.llvm.org/D4220

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211331 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/IR/Constant.h                    |  3 ++
 lib/IR/Constants.cpp                          | 53 ++++++++++++-------
 lib/Transforms/Utils/SimplifyCFG.cpp          |  4 ++
 .../SimplifyCFG/X86/switch_to_lookup_table.ll | 52 ++++++++++++++++++
 4 files changed, 92 insertions(+), 20 deletions(-)

diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h
index 39c7c37dafd5..257bb80f2119 100644
--- a/include/llvm/IR/Constant.h
+++ b/include/llvm/IR/Constant.h
@@ -71,6 +71,9 @@ class Constant : public User {
   /// isThreadDependent - Return true if the value can vary between threads.
   bool isThreadDependent() const;
 
+  /// Return true if the value is dependent on a dllimport variable.
+  bool isDLLImportDependent() const;
+
   /// isConstantUsed - Return true if the constant has users other than constant
   /// exprs and other dangling things.
   bool isConstantUsed() const;
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index aa26cff6a7b6..5851625383b9 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -278,35 +278,48 @@ bool Constant::canTrap() const {
   return canTrapImpl(this, NonTrappingOps);
 }
 
-/// isThreadDependent - Return true if the value can vary between threads.
-bool Constant::isThreadDependent() const {
-  SmallPtrSet<const Constant*, 64> Visited;
-  SmallVector<const Constant*, 64> WorkList;
-  WorkList.push_back(this);
-  Visited.insert(this);
+/// Check if C contains a GlobalValue for which Predicate is true.
+static bool
+ConstHasGlobalValuePredicate(const Constant *C,
+                             bool (*Predicate)(const GlobalValue *)) {
+  SmallPtrSet<const Constant *, 8> Visited;
+  SmallVector<const Constant *, 8> WorkList;
+  WorkList.push_back(C);
+  Visited.insert(C);
 
   while (!WorkList.empty()) {
-    const Constant *C = WorkList.pop_back_val();
-
-    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-      if (GV->isThreadLocal())
+    const Constant *WorkItem = WorkList.pop_back_val();
+    if (const auto *GV = dyn_cast<GlobalValue>(WorkItem))
+      if (Predicate(GV))
         return true;
-    }
-
-    for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
-      const Constant *D = dyn_cast<Constant>(C->getOperand(I));
-      if (!D)
+    for (const Value *Op : WorkItem->operands()) {
+      const Constant *ConstOp = dyn_cast<Constant>(Op);
+      if (!ConstOp)
         continue;
-      if (Visited.insert(D))
-        WorkList.push_back(D);
+      if (Visited.insert(ConstOp))
+        WorkList.push_back(ConstOp);
     }
   }
-
   return false;
 }
 
-/// isConstantUsed - Return true if the constant has users other than constant
-/// exprs and other dangling things.
+/// Return true if the value can vary between threads.
+bool Constant::isThreadDependent() const {
+  auto DLLImportPredicate = [](const GlobalValue *GV) {
+    return GV->isThreadLocal();
+  };
+  return ConstHasGlobalValuePredicate(this, DLLImportPredicate);
+}
+
+bool Constant::isDLLImportDependent() const {
+  auto DLLImportPredicate = [](const GlobalValue *GV) {
+    return GV->hasDLLImportStorageClass();
+  };
+  return ConstHasGlobalValuePredicate(this, DLLImportPredicate);
+}
+
+/// Return true if the constant has users other than constant exprs and other
+/// dangling things.
 bool Constant::isConstantUsed() const {
   for (const User *U : users()) {
     const Constant *UC = dyn_cast<Constant>(U);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index e155daf6fcce..ff2f2a036225 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3313,6 +3313,10 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
 static bool ValidLookupTableConstant(Constant *C) {
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     return CE->isGEPWithNoNotionalOverIndexing();
+  if (C->isThreadDependent())
+    return false;
+  if (C->isDLLImportDependent())
+    return false;
 
   return isa<ConstantFP>(C) ||
       isa<ConstantInt>(C) ||
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 81079b1aa5ab..ee63d2c0c0f6 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -918,3 +918,55 @@ return:
 ; CHECK: switch i32
 ; CHECK-NOT: @switch.table
 }
+
+; Don't build tables for switches with TLS variables.
+@tls_a = thread_local global i32 0
+@tls_b = thread_local global i32 0
+@tls_c = thread_local global i32 0
+@tls_d = thread_local global i32 0
+define i32* @tls(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+return:
+  %retval.0 = phi i32* [ @tls_d, %sw.default ], [ @tls_c, %sw.bb2 ], [ @tls_b, %sw.bb1 ], [ @tls_a, %entry ]
+  ret i32* %retval.0
+; CHECK-LABEL: @tls(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
+}
+
+; Don't build tables for switches with dllimport variables.
+@dllimport_a = external dllimport global i32
+@dllimport_b = external dllimport global i32
+@dllimport_c = external dllimport global i32
+@dllimport_d = external dllimport global i32
+define i32* @dllimport(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+return:
+  %retval.0 = phi i32* [ @dllimport_d, %sw.default ], [ @dllimport_c, %sw.bb2 ], [ @dllimport_b, %sw.bb1 ], [ @dllimport_a, %entry ]
+  ret i32* %retval.0
+; CHECK-LABEL: @dllimport(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
+}

From c577e71bf580d344970406915e050a301ae8a28d Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Fri, 20 Jun 2014 01:05:28 +0000
Subject: [PATCH 146/157] [x86] Make the x86 PACKSSWB, PACKSSDW, PACKUSWB, and
 PACKUSDW instructions available as synthetic SDNodes PACKSS and PACKUS that
 will select to the correct instruction variants based on the return type.
 This allows us to use these rather important instructions when lowering
 vector shuffles.

Also moves the relevant instruction definitions to be split out from
the fully generic multiclasses to allow them to match these new SDNodes
in the same way that the UNPCK instructions do.

No functionality should actually be changed here.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211332 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp      |  16 +++
 lib/Target/X86/X86ISelLowering.h        |   2 +
 lib/Target/X86/X86InstrFragmentsSIMD.td |   4 +
 lib/Target/X86/X86InstrSSE.td           | 151 ++++++++++++++++++++----
 4 files changed, 152 insertions(+), 21 deletions(-)

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a7b6e707816f..09018a5e1187 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12513,6 +12513,20 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
                        Op.getOperand(1), Op.getOperand(2));
   }
 
+  case Intrinsic::x86_sse2_packssdw_128:
+  case Intrinsic::x86_sse2_packsswb_128:
+  case Intrinsic::x86_avx2_packssdw:
+  case Intrinsic::x86_avx2_packsswb:
+    return DAG.getNode(X86ISD::PACKSS, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+
+  case Intrinsic::x86_sse2_packuswb_128:
+  case Intrinsic::x86_sse41_packusdw:
+  case Intrinsic::x86_avx2_packuswb:
+  case Intrinsic::x86_avx2_packusdw:
+    return DAG.getNode(X86ISD::PACKUS, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+
   case Intrinsic::x86_ssse3_pshuf_b_128:
   case Intrinsic::x86_avx2_pshuf_b:
     return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
@@ -15286,6 +15300,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::TESTM:              return "X86ISD::TESTM";
   case X86ISD::TESTNM:             return "X86ISD::TESTNM";
   case X86ISD::KORTEST:            return "X86ISD::KORTEST";
+  case X86ISD::PACKSS:             return "X86ISD::PACKSS";
+  case X86ISD::PACKUS:             return "X86ISD::PACKUS";
   case X86ISD::PALIGNR:            return "X86ISD::PALIGNR";
   case X86ISD::PSHUFD:             return "X86ISD::PSHUFD";
   case X86ISD::PSHUFHW:            return "X86ISD::PSHUFHW";
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index af2e4344e5b6..df9ab3aa0681 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -315,6 +315,8 @@ namespace llvm {
       KORTEST,
 
       // Several flavors of instructions with vector shuffle behaviors.
+      PACKSS,
+      PACKUS,
       PALIGNR,
       PSHUFD,
       PSHUFHW,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 1582f4388192..6f0fa9462770 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -224,6 +224,10 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
 def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
 def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
 
+def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>;
+def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
+def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
+
 def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
 def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
 
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 043b2f32c6ff..11c3f11f2cdc 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4336,20 +4336,6 @@ defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
 defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
                              SSE_INTALU_ITINS_P, 0>;
 
-//===---------------------------------------------------------------------===//
-// SSE2 - Packed Integer Pack Instructions
-//===---------------------------------------------------------------------===//
-
-defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
-                                  int_x86_avx2_packsswb,
-                                  SSE_INTALU_ITINS_SHUFF_P, 0>;
-defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
-                                  int_x86_avx2_packssdw,
-                                  SSE_INTALU_ITINS_SHUFF_P, 0>;
-defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
-                                  int_x86_avx2_packuswb,
-                                  SSE_INTALU_ITINS_SHUFF_P, 0>;
-
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Shuffle Instructions
 //===---------------------------------------------------------------------===//
@@ -4431,6 +4417,136 @@ let Predicates = [UseSSE2] in {
             (PSHUFDri VR128:$src1, imm:$imm)>;
 }
 
+//===---------------------------------------------------------------------===//
+// Packed Integer Pack Instructions (SSE & AVX)
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
+                     ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
+                     bit Is2Addr = 1> {
+  def rr : PDI<opc, MRMSrcReg,
+               (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+               !if(Is2Addr,
+                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                   !strconcat(OpcodeStr,
+                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+               [(set VR128:$dst,
+                     (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
+               Sched<[WriteShuffle]>;
+  def rm : PDI<opc, MRMSrcMem,
+               (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+               !if(Is2Addr,
+                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                   !strconcat(OpcodeStr,
+                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+               [(set VR128:$dst,
+                     (OutVT (OpNode VR128:$src1,
+                                    (bc_frag (memopv2i64 addr:$src2)))))]>,
+               Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
+                       ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
+  def Yrr : PDI<opc, MRMSrcReg,
+                (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+                !strconcat(OpcodeStr,
+                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                [(set VR256:$dst,
+                      (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
+                Sched<[WriteShuffle]>;
+  def Yrm : PDI<opc, MRMSrcMem,
+                (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+                !strconcat(OpcodeStr,
+                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                [(set VR256:$dst,
+                      (OutVT (OpNode VR256:$src1,
+                                     (bc_frag (memopv4i64 addr:$src2)))))]>,
+                Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
+                     ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
+                     bit Is2Addr = 1> {
+  def rr : SS48I<opc, MRMSrcReg,
+                 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                 !if(Is2Addr,
+                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr,
+                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+                 [(set VR128:$dst,
+                       (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
+                 Sched<[WriteShuffle]>;
+  def rm : SS48I<opc, MRMSrcMem,
+                 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                 !if(Is2Addr,
+                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr,
+                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+                 [(set VR128:$dst,
+                       (OutVT (OpNode VR128:$src1,
+                                      (bc_frag (memopv2i64 addr:$src2)))))]>,
+                 Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
+                     ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
+  def Yrr : SS48I<opc, MRMSrcReg,
+                  (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+                  !strconcat(OpcodeStr,
+                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set VR256:$dst,
+                        (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
+                  Sched<[WriteShuffle]>;
+  def Yrm : SS48I<opc, MRMSrcMem,
+                  (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+                  !strconcat(OpcodeStr,
+                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set VR256:$dst,
+                        (OutVT (OpNode VR256:$src1,
+                                       (bc_frag (memopv4i64 addr:$src2)))))]>,
+                  Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+let Predicates = [HasAVX] in {
+  defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
+                             bc_v8i16, 0>, VEX_4V;
+  defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
+                             bc_v4i32, 0>, VEX_4V;
+
+  defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus,
+                             bc_v8i16, 0>, VEX_4V;
+  defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus,
+                             bc_v4i32, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+  defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss,
+                               bc_v16i16>, VEX_4V, VEX_L;
+  defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss,
+                               bc_v8i32>, VEX_4V, VEX_L;
+
+  defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus,
+                               bc_v16i16>, VEX_4V, VEX_L;
+  defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus,
+                               bc_v8i32>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss,
+                            bc_v8i16>;
+  defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss,
+                            bc_v4i32>;
+
+  defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus,
+                            bc_v8i16>;
+
+  let Predicates = [HasSSE41] in
+  defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus,
+                            bc_v4i32>;
+}
+} // ExeDomain = SSEPackedInt
+
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Unpack Instructions
 //===---------------------------------------------------------------------===//
@@ -7053,8 +7169,6 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 let Predicates = [HasAVX] in {
   let isCommutable = 0 in
-  defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
-                                      0, DEFAULT_ITINS_SHUFFLESCHED>, VEX_4V;
   defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V;
@@ -7086,9 +7200,6 @@ let Predicates = [HasAVX] in {
 
 let Predicates = [HasAVX2] in {
   let isCommutable = 0 in
-  defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
-                                        int_x86_avx2_packusdw, WriteShuffle>,
-                                        VEX_4V, VEX_L;
   defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
@@ -7120,8 +7231,6 @@ let Predicates = [HasAVX2] in {
 
 let Constraints = "$src1 = $dst" in {
   let isCommutable = 0 in
-  defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw,
-                                     1, DEFAULT_ITINS_SHUFFLESCHED>;
   defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
                                  memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
   defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,

From 16e592a6fe237900a7fa347c732f29395013fc34 Mon Sep 17 00:00:00 2001
From: "Duncan P. N. Exon Smith" <dexonsmith@apple.com>
Date: Fri, 20 Jun 2014 01:30:43 +0000
Subject: [PATCH 147/157] Support: Write ScaledNumbers::getRounded()

Start extracting helper functions out of -block-freq's `UnsignedFloat`
into `Support/ScaledNumber.h` with the eventual goal of moving and
renaming the class to `ScaledNumber`.

The bike shed about names is still being painted, but I'm going with
this for now.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211333 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../llvm/Analysis/BlockFrequencyInfoImpl.h    | 10 ++--
 include/llvm/Support/ScaledNumber.h           | 57 ++++++++++++++++++
 unittests/Support/CMakeLists.txt              |  1 +
 unittests/Support/ScaledNumberTest.cpp        | 60 +++++++++++++++++++
 4 files changed, 123 insertions(+), 5 deletions(-)
 create mode 100644 include/llvm/Support/ScaledNumber.h
 create mode 100644 unittests/Support/ScaledNumberTest.cpp

diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index bd72d3ed6d4d..7ad76e5350e2 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -22,6 +22,7 @@
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ScaledNumber.h"
 #include "llvm/Support/raw_ostream.h"
 #include <deque>
 #include <list>
@@ -343,12 +344,11 @@ template <class DigitsT> class UnsignedFloat : UnsignedFloatBase {
   }
 
   static UnsignedFloat getRounded(UnsignedFloat P, bool Round) {
-    if (!Round)
+    // Saturate.
+    if (P.isLargest())
       return P;
-    if (P.Digits == DigitsLimits::max())
-      // Careful of overflow in the exponent.
-      return UnsignedFloat(1, P.Exponent) <<= Width;
-    return UnsignedFloat(P.Digits + 1, P.Exponent);
+
+    return ScaledNumbers::getRounded(P.Digits, P.Exponent, Round);
   }
 };
 
diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h
new file mode 100644
index 000000000000..afdc7dc1888a
--- /dev/null
+++ b/include/llvm/Support/ScaledNumber.h
@@ -0,0 +1,57 @@
+//===- llvm/Support/ScaledNumber.h - Support for scaled numbers -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions (and a class) useful for working with scaled
+// numbers -- in particular, pairs of integers where one represents digits and
+// another represents a scale.  The functions are helpers and live in the
+// namespace ScaledNumbers.  The class ScaledNumber is useful for modelling
+// certain cost metrics that need simple, integer-like semantics that are easy
+// to reason about.
+//
+// These might remind you of soft-floats.  If you want one of those, you're in
+// the wrong place.  Look at include/llvm/ADT/APFloat.h instead.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SCALEDNUMBER_H
+#define LLVM_SUPPORT_SCALEDNUMBER_H
+
+#include <cstdint>
+#include <limits>
+#include <utility>
+
+namespace llvm {
+namespace ScaledNumbers {
+
+/// \brief Get the width of a number.
+template <class DigitsT> inline int getWidth() { return sizeof(DigitsT) * 8; }
+
+/// \brief Conditionally round up a scaled number.
+///
+/// Given \c Digits and \c Scale, round up iff \c ShouldRound is \c true.
+/// Always returns \c Scale unless there's an overflow, in which case it
+/// returns \c 1+Scale.
+///
+/// \pre adding 1 to \c Scale will not overflow INT16_MAX.
+template <class DigitsT>
+inline std::pair<DigitsT, int16_t> getRounded(DigitsT Digits, int16_t Scale,
+                                              bool ShouldRound) {
+  static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
+
+  if (ShouldRound)
+    if (!++Digits)
+      // Overflow.
+      return std::make_pair(DigitsT(1) << (getWidth<DigitsT>() - 1), Scale + 1);
+  return std::make_pair(Digits, Scale);
+}
+}
+}
+
+#endif
+
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index c50acdfb8e64..08096a4a179a 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_unittest(SupportTests
   ProcessTest.cpp
   ProgramTest.cpp
   RegexTest.cpp
+  ScaledNumberTest.cpp
   SourceMgrTest.cpp
   StringPool.cpp
   SwapByteOrderTest.cpp
diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp
new file mode 100644
index 000000000000..1fa54ff81d75
--- /dev/null
+++ b/unittests/Support/ScaledNumberTest.cpp
@@ -0,0 +1,60 @@
+//===- llvm/unittest/Support/ScaledNumberTest.cpp - ScaledPair tests -----==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ScaledNumber.h"
+
+#include "llvm/Support/DataTypes.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::ScaledNumbers;
+
+namespace {
+
+template <class UIntT> struct ScaledPair {
+  UIntT D;
+  int S;
+  ScaledPair(const std::pair<UIntT, int16_t> &F) : D(F.first), S(F.second) {}
+  ScaledPair(UIntT D, int S) : D(D), S(S) {}
+
+  bool operator==(const ScaledPair<UIntT> &X) const {
+    return D == X.D && S == X.S;
+  }
+};
+template <class UIntT>
+bool operator==(const std::pair<UIntT, int16_t> &L,
+                const ScaledPair<UIntT> &R) {
+  return ScaledPair<UIntT>(L) == R;
+}
+template <class UIntT>
+void PrintTo(const ScaledPair<UIntT> &F, ::std::ostream *os) {
+  *os << F.D << "*2^" << F.S;
+}
+
+typedef ScaledPair<uint32_t> SP32;
+typedef ScaledPair<uint64_t> SP64;
+
+TEST(ScaledNumberHelpersTest, getRounded) {
+  EXPECT_EQ(getRounded<uint32_t>(0, 0, false), SP32(0, 0));
+  EXPECT_EQ(getRounded<uint32_t>(0, 0, true), SP32(1, 0));
+  EXPECT_EQ(getRounded<uint32_t>(20, 21, true), SP32(21, 21));
+  EXPECT_EQ(getRounded<uint32_t>(UINT32_MAX, 0, false), SP32(UINT32_MAX, 0));
+  EXPECT_EQ(getRounded<uint32_t>(UINT32_MAX, 0, true), SP32(1 << 31, 1));
+
+  EXPECT_EQ(getRounded<uint64_t>(0, 0, false), SP64(0, 0));
+  EXPECT_EQ(getRounded<uint64_t>(0, 0, true), SP64(1, 0));
+  EXPECT_EQ(getRounded<uint64_t>(20, 21, true), SP64(21, 21));
+  EXPECT_EQ(getRounded<uint64_t>(UINT32_MAX, 0, false), SP64(UINT32_MAX, 0));
+  EXPECT_EQ(getRounded<uint64_t>(UINT32_MAX, 0, true),
+            SP64(UINT64_C(1) << 32, 0));
+  EXPECT_EQ(getRounded<uint64_t>(UINT64_MAX, 0, false), SP64(UINT64_MAX, 0));
+  EXPECT_EQ(getRounded<uint64_t>(UINT64_MAX, 0, true),
+            SP64(UINT64_C(1) << 63, 1));
+}
+}

From caa2bd600c65e9c04d24fe0199d6a61e215d4682 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Fri, 20 Jun 2014 01:36:00 +0000
Subject: [PATCH 148/157] Fix .cpp files claiming to be header files

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211334 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Support/Atomic.cpp         | 2 +-
 lib/Support/DynamicLibrary.cpp | 2 +-
 lib/Support/Host.cpp           | 2 +-
 lib/Support/Process.cpp        | 2 +-
 lib/Support/Program.cpp        | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
index 2ef32b08ef28..ac4ff3eb5c66 100644
--- a/lib/Support/Atomic.cpp
+++ b/lib/Support/Atomic.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This header file implements atomic operations.
+//  This file implements atomic operations.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 82d7c0cc6d19..d2b551e8a0a6 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This header file implements the operating system DynamicLibrary concept.
+//  This file implements the operating system DynamicLibrary concept.
 //
 // FIXME: This file leaks ExplicitSymbols and OpenedHandles!
 //
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index ce0a3b6bed77..03187e97eef7 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This header file implements the operating system Host concept.
+//  This file implements the operating system Host concept.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index 0380ed955dd5..087c459e187e 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This header file implements the operating system Process concept.
+//  This file implements the operating system Process concept.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index eb700e3a8591..b84b82b1f10b 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This header file implements the operating system Program concept.
+//  This file implements the operating system Program concept.
 //
 //===----------------------------------------------------------------------===//
 

From 8483dacfd804c71adc73764cee7f1304d1c8977c Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Fri, 20 Jun 2014 01:37:35 +0000
Subject: [PATCH 149/157] Small clanups:

Use static instead of anonymous namespace.
Delete write only variables.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211335 91177308-0d34-0410-b5e6-96231b3b80d8
---
 tools/gold/gold-plugin.cpp | 58 +++++++++++++++-----------------------
 1 file changed, 22 insertions(+), 36 deletions(-)

diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 4e273de3404a..3c2da94af532 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -50,35 +50,30 @@
 using namespace llvm;
 
 namespace {
-  ld_plugin_status discard_message(int level, const char *format, ...) {
-    // Die loudly. Recent versions of Gold pass ld_plugin_message as the first
-    // callback in the transfer vector. This should never be called.
-    abort();
-  }
+struct claimed_file {
+  void *handle;
+  std::vector<ld_plugin_symbol> syms;
+};
+}
 
-  ld_plugin_add_symbols add_symbols = NULL;
-  ld_plugin_get_symbols get_symbols = NULL;
-  ld_plugin_add_input_file add_input_file = NULL;
-  ld_plugin_add_input_library add_input_library = NULL;
-  ld_plugin_set_extra_library_path set_extra_library_path = NULL;
-  ld_plugin_get_view get_view = NULL;
-  ld_plugin_message message = discard_message;
-
-  int api_version = 0;
-  int gold_version = 0;
-
-  struct claimed_file {
-    void *handle;
-    std::vector<ld_plugin_symbol> syms;
-  };
-
-  lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
-  std::string output_name = "";
-  std::list<claimed_file> Modules;
-  std::vector<std::string> Cleanup;
-  LTOCodeGenerator *CodeGen = nullptr;
-  StringSet<> CannotBeHidden;
+static ld_plugin_status discard_message(int level, const char *format, ...) {
+  // Die loudly. Recent versions of Gold pass ld_plugin_message as the first
+  // callback in the transfer vector. This should never be called.
+  abort();
 }
+
+static ld_plugin_add_symbols add_symbols = NULL;
+static ld_plugin_get_symbols get_symbols = NULL;
+static ld_plugin_add_input_file add_input_file = NULL;
+static ld_plugin_set_extra_library_path set_extra_library_path = NULL;
+static ld_plugin_get_view get_view = NULL;
+static ld_plugin_message message = discard_message;
+static lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
+static std::string output_name = "";
+static std::list<claimed_file> Modules;
+static std::vector<std::string> Cleanup;
+static LTOCodeGenerator *CodeGen = nullptr;
+static StringSet<> CannotBeHidden;
 static llvm::TargetOptions TargetOpts;
 
 namespace options {
@@ -151,12 +146,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
 
   for (; tv->tv_tag != LDPT_NULL; ++tv) {
     switch (tv->tv_tag) {
-      case LDPT_API_VERSION:
-        api_version = tv->tv_u.tv_val;
-        break;
-      case LDPT_GOLD_VERSION:  // major * 100 + minor
-        gold_version = tv->tv_u.tv_val;
-        break;
       case LDPT_OUTPUT_NAME:
         output_name = tv->tv_u.tv_string;
         break;
@@ -213,9 +202,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
       case LDPT_ADD_INPUT_FILE:
         add_input_file = tv->tv_u.tv_add_input_file;
         break;
-      case LDPT_ADD_INPUT_LIBRARY:
-        add_input_library = tv->tv_u.tv_add_input_file;
-        break;
       case LDPT_SET_EXTRA_LIBRARY_PATH:
         set_extra_library_path = tv->tv_u.tv_set_extra_library_path;
         break;

From 5cf39383da201784a40eb47cfd289cbe98972145 Mon Sep 17 00:00:00 2001
From: "Duncan P. N. Exon Smith" <dexonsmith@apple.com>
Date: Fri, 20 Jun 2014 02:31:03 +0000
Subject: [PATCH 150/157] Support: Write ScaledNumbers::getAdjusted()

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211336 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../llvm/Analysis/BlockFrequencyInfoImpl.h    | 24 ++++++-------
 include/llvm/Support/ScaledNumber.h           | 35 +++++++++++++++++++
 unittests/Support/ScaledNumberTest.cpp        | 23 ++++++++++++
 3 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 7ad76e5350e2..264aff372581 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -327,20 +327,16 @@ template <class DigitsT> class UnsignedFloat : UnsignedFloatBase {
     return countLeadingZeros32(Digits) + Width - 32;
   }
 
-  static UnsignedFloat adjustToWidth(uint64_t N, int32_t S) {
-    assert(S >= MinExponent);
-    assert(S <= MaxExponent);
-    if (Width == 64 || N <= DigitsLimits::max())
-      return UnsignedFloat(N, S);
-
-    // Shift right.
-    int Shift = 64 - Width - countLeadingZeros64(N);
-    DigitsType Shifted = N >> Shift;
-
-    // Round.
-    assert(S + Shift <= MaxExponent);
-    return getRounded(UnsignedFloat(Shifted, S + Shift),
-                      N & UINT64_C(1) << (Shift - 1));
+  /// \brief Adjust a number to width, rounding up if necessary.
+  ///
+  /// Should only be called for \c Shift close to zero.
+  ///
+  /// \pre Shift >= MinExponent && Shift + 64 <= MaxExponent.
+  static UnsignedFloat adjustToWidth(uint64_t N, int32_t Shift) {
+    assert(Shift >= MinExponent && "Shift should be close to 0");
+    assert(Shift <= MaxExponent - 64 && "Shift should be close to 0");
+    auto Adjusted = ScaledNumbers::getAdjusted<DigitsT>(N, Shift);
+    return Adjusted;
   }
 
   static UnsignedFloat getRounded(UnsignedFloat P, bool Round) {
diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h
index afdc7dc1888a..54b5386c5ac2 100644
--- a/include/llvm/Support/ScaledNumber.h
+++ b/include/llvm/Support/ScaledNumber.h
@@ -22,6 +22,8 @@
 #ifndef LLVM_SUPPORT_SCALEDNUMBER_H
 #define LLVM_SUPPORT_SCALEDNUMBER_H
 
+#include "llvm/Support/MathExtras.h"
+
 #include <cstdint>
 #include <limits>
 #include <utility>
@@ -50,6 +52,39 @@ inline std::pair<DigitsT, int16_t> getRounded(DigitsT Digits, int16_t Scale,
       return std::make_pair(DigitsT(1) << (getWidth<DigitsT>() - 1), Scale + 1);
   return std::make_pair(Digits, Scale);
 }
+
+/// \brief Adjust a 64-bit scaled number down to the appropriate width.
+///
+/// Adjust a soft float with 64-bits of digits down, keeping as much
+/// information as possible, and rounding up on half.
+///
+/// \pre Adding 1 to \c Scale will not overflow INT16_MAX.
+template <class DigitsT>
+inline std::pair<DigitsT, int16_t> getAdjusted(uint64_t Digits,
+                                               int16_t Scale = 0) {
+  static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
+
+  const int Width = getWidth<DigitsT>();
+  if (Width == 64 || Digits <= std::numeric_limits<DigitsT>::max())
+    return std::make_pair(Digits, Scale);
+
+  // Shift right and round.
+  int Shift = 64 - Width - countLeadingZeros(Digits);
+  return getRounded<DigitsT>(Digits >> Shift, Scale + Shift,
+                             Digits & (UINT64_C(1) << (Shift - 1)));
+}
+
+/// \brief Convenience helper for adjusting to 32 bits.
+inline std::pair<uint32_t, int16_t> getAdjusted32(uint64_t Digits,
+                                                  int16_t Scale = 0) {
+  return getAdjusted<uint32_t>(Digits, Scale);
+}
+
+/// \brief Convenience helper for adjusting to 64 bits.
+inline std::pair<uint64_t, int16_t> getAdjusted64(uint64_t Digits,
+                                                  int16_t Scale = 0) {
+  return getAdjusted<uint64_t>(Digits, Scale);
+}
 }
 }
 
diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp
index 1fa54ff81d75..dac24051a62d 100644
--- a/unittests/Support/ScaledNumberTest.cpp
+++ b/unittests/Support/ScaledNumberTest.cpp
@@ -57,4 +57,27 @@ TEST(ScaledNumberHelpersTest, getRounded) {
   EXPECT_EQ(getRounded<uint64_t>(UINT64_MAX, 0, true),
             SP64(UINT64_C(1) << 63, 1));
 }
+
+TEST(FloatsTest, getAdjusted) {
+  const uint64_t Max32In64 = UINT32_MAX;
+  EXPECT_EQ(getAdjusted32(0), SP32(0, 0));
+  EXPECT_EQ(getAdjusted32(0, 5), SP32(0, 5));
+  EXPECT_EQ(getAdjusted32(UINT32_MAX), SP32(UINT32_MAX, 0));
+  EXPECT_EQ(getAdjusted32(Max32In64 << 1), SP32(UINT32_MAX, 1));
+  EXPECT_EQ(getAdjusted32(Max32In64 << 1, 1), SP32(UINT32_MAX, 2));
+  EXPECT_EQ(getAdjusted32(Max32In64 << 31), SP32(UINT32_MAX, 31));
+  EXPECT_EQ(getAdjusted32(Max32In64 << 32), SP32(UINT32_MAX, 32));
+  EXPECT_EQ(getAdjusted32(Max32In64 + 1), SP32(1u << 31, 1));
+  EXPECT_EQ(getAdjusted32(UINT64_MAX), SP32(1u << 31, 33));
+
+  EXPECT_EQ(getAdjusted64(0), SP64(0, 0));
+  EXPECT_EQ(getAdjusted64(0, 5), SP64(0, 5));
+  EXPECT_EQ(getAdjusted64(UINT32_MAX), SP64(UINT32_MAX, 0));
+  EXPECT_EQ(getAdjusted64(Max32In64 << 1), SP64(Max32In64 << 1, 0));
+  EXPECT_EQ(getAdjusted64(Max32In64 << 1, 1), SP64(Max32In64 << 1, 1));
+  EXPECT_EQ(getAdjusted64(Max32In64 << 31), SP64(Max32In64 << 31, 0));
+  EXPECT_EQ(getAdjusted64(Max32In64 << 32), SP64(Max32In64 << 32, 0));
+  EXPECT_EQ(getAdjusted64(Max32In64 + 1), SP64(Max32In64 + 1, 0));
+  EXPECT_EQ(getAdjusted64(UINT64_MAX), SP64(UINT64_MAX, 0));
+}
 }

From af08b8b820acc71b02e7e8e4d0de3679c7971773 Mon Sep 17 00:00:00 2001
From: "Duncan P. N. Exon Smith" <dexonsmith@apple.com>
Date: Fri, 20 Jun 2014 02:31:07 +0000
Subject: [PATCH 151/157] Support: Clean up getRounded() tests

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211337 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/ScaledNumber.h    | 12 ++++++++++++
 unittests/Support/ScaledNumberTest.cpp | 26 ++++++++++++--------------
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h
index 54b5386c5ac2..f6594ec6aa55 100644
--- a/include/llvm/Support/ScaledNumber.h
+++ b/include/llvm/Support/ScaledNumber.h
@@ -53,6 +53,18 @@ inline std::pair<DigitsT, int16_t> getRounded(DigitsT Digits, int16_t Scale,
   return std::make_pair(Digits, Scale);
 }
 
+/// \brief Convenience helper for 32-bit rounding.
+inline std::pair<uint32_t, int16_t> getRounded32(uint32_t Digits, int16_t Scale,
+                                                 bool ShouldRound) {
+  return getRounded(Digits, Scale, ShouldRound);
+}
+
+/// \brief Convenience helper for 64-bit rounding.
+inline std::pair<uint64_t, int16_t> getRounded64(uint64_t Digits, int16_t Scale,
+                                                 bool ShouldRound) {
+  return getRounded(Digits, Scale, ShouldRound);
+}
+
 /// \brief Adjust a 64-bit scaled number down to the appropriate width.
 ///
 /// Adjust a soft float with 64-bits of digits down, keeping as much
diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp
index dac24051a62d..d1277d0479a6 100644
--- a/unittests/Support/ScaledNumberTest.cpp
+++ b/unittests/Support/ScaledNumberTest.cpp
@@ -41,21 +41,19 @@ typedef ScaledPair<uint32_t> SP32;
 typedef ScaledPair<uint64_t> SP64;
 
 TEST(ScaledNumberHelpersTest, getRounded) {
-  EXPECT_EQ(getRounded<uint32_t>(0, 0, false), SP32(0, 0));
-  EXPECT_EQ(getRounded<uint32_t>(0, 0, true), SP32(1, 0));
-  EXPECT_EQ(getRounded<uint32_t>(20, 21, true), SP32(21, 21));
-  EXPECT_EQ(getRounded<uint32_t>(UINT32_MAX, 0, false), SP32(UINT32_MAX, 0));
-  EXPECT_EQ(getRounded<uint32_t>(UINT32_MAX, 0, true), SP32(1 << 31, 1));
+  EXPECT_EQ(getRounded32(0, 0, false), SP32(0, 0));
+  EXPECT_EQ(getRounded32(0, 0, true), SP32(1, 0));
+  EXPECT_EQ(getRounded32(20, 21, true), SP32(21, 21));
+  EXPECT_EQ(getRounded32(UINT32_MAX, 0, false), SP32(UINT32_MAX, 0));
+  EXPECT_EQ(getRounded32(UINT32_MAX, 0, true), SP32(1 << 31, 1));
 
-  EXPECT_EQ(getRounded<uint64_t>(0, 0, false), SP64(0, 0));
-  EXPECT_EQ(getRounded<uint64_t>(0, 0, true), SP64(1, 0));
-  EXPECT_EQ(getRounded<uint64_t>(20, 21, true), SP64(21, 21));
-  EXPECT_EQ(getRounded<uint64_t>(UINT32_MAX, 0, false), SP64(UINT32_MAX, 0));
-  EXPECT_EQ(getRounded<uint64_t>(UINT32_MAX, 0, true),
-            SP64(UINT64_C(1) << 32, 0));
-  EXPECT_EQ(getRounded<uint64_t>(UINT64_MAX, 0, false), SP64(UINT64_MAX, 0));
-  EXPECT_EQ(getRounded<uint64_t>(UINT64_MAX, 0, true),
-            SP64(UINT64_C(1) << 63, 1));
+  EXPECT_EQ(getRounded64(0, 0, false), SP64(0, 0));
+  EXPECT_EQ(getRounded64(0, 0, true), SP64(1, 0));
+  EXPECT_EQ(getRounded64(20, 21, true), SP64(21, 21));
+  EXPECT_EQ(getRounded64(UINT32_MAX, 0, false), SP64(UINT32_MAX, 0));
+  EXPECT_EQ(getRounded64(UINT32_MAX, 0, true), SP64(UINT64_C(1) << 32, 0));
+  EXPECT_EQ(getRounded64(UINT64_MAX, 0, false), SP64(UINT64_MAX, 0));
+  EXPECT_EQ(getRounded64(UINT64_MAX, 0, true), SP64(UINT64_C(1) << 63, 1));
 }
 
 TEST(FloatsTest, getAdjusted) {

From d2ce9392dca6349a22c8f3e21e74aa59a82d8900 Mon Sep 17 00:00:00 2001
From: Karthik Bhat <kv.bhat@samsung.com>
Date: Fri, 20 Jun 2014 04:32:48 +0000
Subject: [PATCH 152/157] Add Support to Recognize and Vectorize NON SIMD
 instructions in SLPVectorizer.

This patch adds support to recognize patterns such as fadd,fsub,fadd,fsub.../add,sub,add,sub... and
vectorizes them as vector shuffles if they are profitable.
These patterns of vector shuffle can later be converted to instructions such as addsubpd etc on X86.
Thanks to Arnold and Hal for the reviews. http://reviews.llvm.org/D4015


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211339 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Analysis/TargetTransformInfo.h |   1 +
 lib/CodeGen/BasicTargetTransformInfo.cpp    |  23 +++
 lib/Target/ARM/ARMTargetTransformInfo.cpp   |  67 +++++---
 lib/Target/X86/X86TargetTransformInfo.cpp   |  46 ++++-
 lib/Transforms/Vectorize/SLPVectorizer.cpp  | 165 ++++++++++++++++--
 test/Transforms/SLPVectorizer/X86/addsub.ll | 181 ++++++++++++++++++++
 6 files changed, 441 insertions(+), 42 deletions(-)
 create mode 100644 test/Transforms/SLPVectorizer/X86/addsub.ll

diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 79fe1dcae639..f57f3eb009a1 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -322,6 +322,7 @@ class TargetTransformInfo {
   enum ShuffleKind {
     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
     SK_Reverse,         ///< Reverse the order of the vector.
+    SK_Alternate,       ///< Choose alternate elements from vector.
     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
     SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
   };
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 7f31b1a982fc..b2737bf754f9 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -39,6 +39,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
   /// are set if the result needs to be inserted and/or extracted from vectors.
   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
 
+  /// Estimate the cost overhead of SK_Alternate shuffle.
+  unsigned getAltShuffleOverhead(Type *Ty) const;
+
   const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
 
 public:
@@ -327,8 +330,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   return OpCost;
 }
 
+unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const {
+  assert(Ty->isVectorTy() && "Can only shuffle vectors");
+  unsigned Cost = 0;
+  // Shuffle cost is equal to the cost of extracting element from its argument
+  // plus the cost of inserting them onto the result vector.
+
+  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index
+  // 0 of first vector, index 1 of second vector,index 2 of first vector and
+  // finally index 3 of second vector and insert them at index <0,1,2,3> of
+  // result vector.
+  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+    Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+    Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+  }
+  return Cost;
+}
+
 unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                                   Type *SubTp) const {
+  if (Kind == SK_Alternate) {
+    return getAltShuffleOverhead(Tp);
+  }
   return 1;
 }
 
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 57df7da7f310..a2ace629baa1 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -443,31 +443,58 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
 
 unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                                 Type *SubTp) const {
-  // We only handle costs of reverse shuffles for now.
-  if (Kind != SK_Reverse)
+  // We only handle costs of reverse and alternate shuffles for now.
+  if (Kind != SK_Reverse && Kind != SK_Alternate)
     return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 
-  static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
-    // Reverse shuffle cost one instruction if we are shuffling within a double
-    // word (vrev) or two if we shuffle a quad word (vrev, vext).
-    { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
-    { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 },
-    { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 },
-    { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 },
-
-    { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 },
-    { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 },
-    { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 },
-    { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 }
-  };
+  if (Kind == SK_Reverse) {
+    static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
+        // Reverse shuffle cost one instruction if we are shuffling within a
+        // double word (vrev) or two if we shuffle a quad word (vrev, vext).
+        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
 
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
 
-  int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
-  if (Idx == -1)
-    return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+    int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+    if (Idx == -1)
+      return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 
-  return LT.first * NEONShuffleTbl[Idx].Cost;
+    return LT.first * NEONShuffleTbl[Idx].Cost;
+  }
+  if (Kind == SK_Alternate) {
+    static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
+        // Alt shuffle cost table for ARM. Cost is the number of instructions
+        // required to create the shuffled vector.
+
+        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
+
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+    int Idx =
+        CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+    if (Idx == -1)
+      return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+    return LT.first * NEONAltShuffleTbl[Idx].Cost;
+  }
+  return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
 unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 299f9a581b8d..9fa911967e7a 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -402,17 +402,47 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
 
 unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                                 Type *SubTp) const {
-  // We only estimate the cost of reverse shuffles.
-  if (Kind != SK_Reverse)
+  // We only estimate the cost of reverse and alternate shuffles.
+  if (Kind != SK_Reverse && Kind != SK_Alternate)
     return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
-  unsigned Cost = 1;
-  if (LT.second.getSizeInBits() > 128)
-    Cost = 3; // Extract + insert + copy.
+  if (Kind == SK_Reverse) {
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+    unsigned Cost = 1;
+    if (LT.second.getSizeInBits() > 128)
+      Cost = 3; // Extract + insert + copy.
 
-  // Multiple by the number of parts.
-  return Cost * LT.first;
+    // Multiple by the number of parts.
+    return Cost * LT.first;
+  }
+
+  if (Kind == SK_Alternate) {
+    static const CostTblEntry<MVT::SimpleValueType> X86AltShuffleTbl[] = {
+        // Alt shuffle cost table for X86. Cost is the number of instructions
+        // required to create the shuffled vector.
+
+        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v4i16, 8},
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 49}};
+
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+    int Idx = CostTableLookup(X86AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+    if (Idx == -1)
+      return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+    return LT.first * X86AltShuffleTbl[Idx].Cost;
+  }
+
+  return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
 unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f18202be167e..53a43d9851e9 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -149,6 +149,48 @@ static bool isSplat(ArrayRef<Value *> VL) {
   return true;
 }
 
+///\returns Opcode that can be clubbed with \p Op to create an alternate
+/// sequence which can later be merged as a ShuffleVector instruction.
+static unsigned getAltOpcode(unsigned Op) {
+  switch (Op) {
+  case Instruction::FAdd:
+    return Instruction::FSub;
+  case Instruction::FSub:
+    return Instruction::FAdd;
+  case Instruction::Add:
+    return Instruction::Sub;
+  case Instruction::Sub:
+    return Instruction::Add;
+  default:
+    return 0;
+  }
+}
+
+///\returns bool representing if Opcode \p Op can be part
+/// of an alternate sequence which can later be merged as
+/// a ShuffleVector instruction.
+static bool canCombineAsAltInst(unsigned Op) {
+  if (Op == Instruction::FAdd || Op == Instruction::FSub ||
+      Op == Instruction::Sub || Op == Instruction::Add)
+    return true;
+  return false;
+}
+
+/// \returns ShuffleVector instruction if intructions in \p VL have
+///  alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence.
+/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...)
+static unsigned isAltInst(ArrayRef<Value *> VL) {
+  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+  unsigned Opcode = I0->getOpcode();
+  unsigned AltOpcode = getAltOpcode(Opcode);
+  for (int i = 1, e = VL.size(); i < e; i++) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode))
+      return 0;
+  }
+  return Instruction::ShuffleVector;
+}
+
 /// \returns The opcode if all of the Instructions in \p VL have the same
 /// opcode, or zero.
 static unsigned getSameOpcode(ArrayRef<Value *> VL) {
@@ -158,8 +200,11 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
   unsigned Opcode = I0->getOpcode();
   for (int i = 1, e = VL.size(); i < e; i++) {
     Instruction *I = dyn_cast<Instruction>(VL[i]);
-    if (!I || Opcode != I->getOpcode())
+    if (!I || Opcode != I->getOpcode()) {
+      if (canCombineAsAltInst(Opcode) && i == 1)
+        return isAltInst(VL);
       return 0;
+    }
   }
   return Opcode;
 }
@@ -377,6 +422,7 @@ class BoUpSLP {
 
   /// \brief Perform LICM and CSE on the newly generated gather sequences.
   void optimizeGatherSequence();
+
 private:
   struct TreeEntry;
 
@@ -594,6 +640,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
 
 void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
   bool SameTy = getSameType(VL); (void)SameTy;
+  bool isAltShuffle = false;
   assert(SameTy && "Invalid types!");
 
   if (Depth == RecursionMaxDepth) {
@@ -615,10 +662,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
       newTreeEntry(VL, false);
       return;
     }
+  unsigned Opcode = getSameOpcode(VL);
+
+  // Check that this shuffle vector refers to the alternate
+  // sequence of opcodes.
+  if (Opcode == Instruction::ShuffleVector) {
+    Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+    unsigned Op = I0->getOpcode();
+    if (Op != Instruction::ShuffleVector)
+      isAltShuffle = true;
+  }
 
   // If all of the operands are identical or constant we have a simple solution.
-  if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) ||
-      !getSameOpcode(VL)) {
+  if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) {
     DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
     newTreeEntry(VL, false);
     return;
@@ -754,8 +810,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
 
   DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
 
-  unsigned Opcode = getSameOpcode(VL);
-
   // Check if it is safe to sink the loads or the stores.
   if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
     Instruction *Last = getLastInstruction(VL);
@@ -1057,6 +1111,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
       }
       return;
     }
+    case Instruction::ShuffleVector: {
+      // If this is not an alternate sequence of opcode like add-sub
+      // then do not vectorize this instruction.
+      if (!isAltShuffle) {
+        newTreeEntry(VL, false);
+        DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
+        return;
+      }
+      newTreeEntry(VL, true);
+      DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
+      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+        ValueList Operands;
+        // Prepare the operand vector.
+        for (unsigned j = 0; j < VL.size(); ++j)
+          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+        buildTree_rec(Operands, Depth + 1);
+      }
+      return;
+    }
     default:
       newTreeEntry(VL, false);
       DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
@@ -1080,11 +1154,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
     }
     return getGatherCost(E->Scalars);
   }
-
-  assert(getSameOpcode(VL) && getSameType(VL) && getSameBlock(VL) &&
-         "Invalid VL");
+  unsigned Opcode = getSameOpcode(VL);
+  assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL");
   Instruction *VL0 = cast<Instruction>(VL[0]);
-  unsigned Opcode = VL0->getOpcode();
   switch (Opcode) {
     case Instruction::PHI: {
       return 0;
@@ -1242,6 +1314,32 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
 
       return VecCallCost - ScalarCallCost;
     }
+    case Instruction::ShuffleVector: {
+      TargetTransformInfo::OperandValueKind Op1VK =
+          TargetTransformInfo::OK_AnyValue;
+      TargetTransformInfo::OperandValueKind Op2VK =
+          TargetTransformInfo::OK_AnyValue;
+      int ScalarCost = 0;
+      int VecCost = 0;
+      for (unsigned i = 0; i < VL.size(); ++i) {
+        Instruction *I = cast<Instruction>(VL[i]);
+        if (!I)
+          break;
+        ScalarCost +=
+            TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK);
+      }
+      // VecCost is equal to sum of the cost of creating 2 vectors
+      // and the cost of creating shuffle.
+      Instruction *I0 = cast<Instruction>(VL[0]);
+      VecCost =
+          TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK);
+      Instruction *I1 = cast<Instruction>(VL[1]);
+      VecCost +=
+          TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK);
+      VecCost +=
+          TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0);
+      return VecCost - ScalarCost;
+    }
     default:
       llvm_unreachable("Unknown instruction");
   }
@@ -1522,9 +1620,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     setInsertPointAfterBundle(E->Scalars);
     return Gather(E->Scalars, VecTy);
   }
-
-  unsigned Opcode = VL0->getOpcode();
-  assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode");
+  unsigned Opcode = getSameOpcode(E->Scalars);
 
   switch (Opcode) {
     case Instruction::PHI: {
@@ -1797,6 +1893,49 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       E->VectorizedValue = V;
       return V;
     }
+    case Instruction::ShuffleVector: {
+      ValueList LHSVL, RHSVL;
+      for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+        LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+        RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+      }
+      setInsertPointAfterBundle(E->Scalars);
+
+      Value *LHS = vectorizeTree(LHSVL);
+      Value *RHS = vectorizeTree(RHSVL);
+
+      if (Value *V = alreadyVectorized(E->Scalars))
+        return V;
+
+      // Create a vector of LHS op1 RHS
+      BinaryOperator *BinOp0 = cast<BinaryOperator>(VL0);
+      Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS);
+
+      // Create a vector of LHS op2 RHS
+      Instruction *VL1 = cast<Instruction>(E->Scalars[1]);
+      BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1);
+      Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS);
+
+      // Create appropriate shuffle to take alternative operations from
+      // the vector.
+      std::vector<Constant *> Mask(E->Scalars.size());
+      unsigned e = E->Scalars.size();
+      for (unsigned i = 0; i < e; ++i) {
+        if (i & 1)
+          Mask[i] = Builder.getInt32(e + i);
+        else
+          Mask[i] = Builder.getInt32(i);
+      }
+
+      Value *ShuffleMask = ConstantVector::get(Mask);
+
+      Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+      E->VectorizedValue = V;
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        return propagateMetadata(I, E->Scalars);
+
+      return V;
+    }
     default:
     llvm_unreachable("unknown inst");
   }
@@ -1865,7 +2004,6 @@ Value *BoUpSLP::vectorizeTree() {
     // For each lane:
     for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
       Value *Scalar = Entry->Scalars[Lane];
-
       // No need to handle users of gathered values.
       if (Entry->NeedToGather)
         continue;
@@ -2049,7 +2187,6 @@ struct SLPVectorizer : public FunctionPass {
     for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
          e = po_end(&F.getEntryBlock()); it != e; ++it) {
       BasicBlock *BB = *it;
-
       // Vectorize trees that end at stores.
       if (unsigned count = collectStores(BB, R)) {
         (void)count;
diff --git a/test/Transforms/SLPVectorizer/X86/addsub.ll b/test/Transforms/SLPVectorizer/X86/addsub.ll
new file mode 100644
index 000000000000..8303bc8181ef
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -0,0 +1,181 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [4 x i32] zeroinitializer, align 16
+@c = common global [4 x i32] zeroinitializer, align 16
+@d = common global [4 x i32] zeroinitializer, align 16
+@e = common global [4 x i32] zeroinitializer, align 16
+@a = common global [4 x i32] zeroinitializer, align 16
+@fb = common global [4 x float] zeroinitializer, align 16
+@fc = common global [4 x float] zeroinitializer, align 16
+@fa = common global [4 x float] zeroinitializer, align 16
+
+; CHECK-LABEL: @addsub
+; CHECK: %5 = add <4 x i32> %3, %4
+; CHECK: %6 = add <4 x i32> %2, %5
+; CHECK: %7 = sub <4 x i32> %2, %5
+; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+
+; Function Attrs: nounwind uwtable
+define void @addsub() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %add1 = add nsw i32 %2, %3
+  %add2 = add nsw i32 %add, %add1
+  store i32 %add2, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
+  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %add3 = add nsw i32 %4, %5
+  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %add4 = add nsw i32 %6, %7
+  %sub = sub nsw i32 %add3, %add4
+  store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
+  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %add5 = add nsw i32 %8, %9
+  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %add6 = add nsw i32 %10, %11
+  %add7 = add nsw i32 %add5, %add6
+  store i32 %add7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
+  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %add8 = add nsw i32 %12, %13
+  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %add9 = add nsw i32 %14, %15
+  %sub10 = sub nsw i32 %add8, %add9
+  store i32 %sub10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @subadd
+; CHECK:  %5 = add <4 x i32> %3, %4
+; CHECK:  %6 = sub <4 x i32> %2, %5
+; CHECK:  %7 = add <4 x i32> %2, %5
+; CHECK:  %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+
+; Function Attrs: nounwind uwtable
+define void @subadd() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %add1 = add nsw i32 %2, %3
+  %sub = sub nsw i32 %add, %add1
+  store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
+  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %add2 = add nsw i32 %4, %5
+  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %add3 = add nsw i32 %6, %7
+  %add4 = add nsw i32 %add2, %add3
+  store i32 %add4, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
+  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %add5 = add nsw i32 %8, %9
+  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %add6 = add nsw i32 %10, %11
+  %sub7 = sub nsw i32 %add5, %add6
+  store i32 %sub7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
+  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %add8 = add nsw i32 %12, %13
+  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %add9 = add nsw i32 %14, %15
+  %add10 = add nsw i32 %add8, %add9
+  store i32 %add10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @faddfsub
+; CHECK: %2 = fadd <4 x float> %0, %1
+; CHECK: %3 = fsub <4 x float> %0, %1
+; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; Function Attrs: nounwind uwtable
+define void @faddfsub() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %add = fadd float %0, %1
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %sub = fsub float %2, %3
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %add1 = fadd float %4, %5
+  store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %sub2 = fsub float %6, %7
+  store float %sub2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @fsubfadd
+; CHECK: %2 = fsub <4 x float> %0, %1
+; CHECK: %3 = fadd <4 x float> %0, %1
+; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; Function Attrs: nounwind uwtable
+define void @fsubfadd() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %sub = fsub float %0, %1
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %add = fadd float %2, %3
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %sub1 = fsub float %4, %5
+  store float %sub1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %add2 = fadd float %6, %7
+  store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @No_faddfsub
+; CHECK-NOT: fadd <4 x float>
+; CHECK-NOT: fsub <4 x float>
+; CHECK-NOT: shufflevector
+; Function Attrs: nounwind uwtable
+define void @No_faddfsub() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %add = fadd float %0, %1
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %add1 = fadd float %2, %3
+  store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %add2 = fadd float %4, %5
+  store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %sub = fsub float %6, %7
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+

From 14d8c9e5502b58aead5d390416c3c4eea31841d1 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Fri, 20 Jun 2014 04:36:29 +0000
Subject: [PATCH 153/157] ArgList: use MakeArgList overloads in subclasses and
 clean up some calls.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211340 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Option/ArgList.h |  2 ++
 lib/Option/ArgList.cpp        | 26 +++++++++++---------------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h
index ab40a1a0d40a..af8cae15fc2d 100644
--- a/include/llvm/Option/ArgList.h
+++ b/include/llvm/Option/ArgList.h
@@ -328,6 +328,7 @@ class InputArgList : public ArgList  {
   unsigned MakeIndex(StringRef String0) const;
   unsigned MakeIndex(StringRef String0, StringRef String1) const;
 
+  using ArgList::MakeArgString;
   const char *MakeArgString(StringRef Str) const override;
 
   /// @}
@@ -365,6 +366,7 @@ class DerivedArgList : public ArgList {
   /// (to be freed).
   void AddSynthesizedArg(Arg *A);
 
+  using ArgList::MakeArgString;
   const char *MakeArgString(StringRef Str) const override;
 
   /// AddFlagArg - Construct a new FlagArg for the given option \p Id and
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index a5ab8d747d87..28b42a8f8290 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -350,30 +350,27 @@ void DerivedArgList::AddSynthesizedArg(Arg *A) {
 }
 
 Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const {
-  SynthesizedArgs.push_back(make_unique<Arg>(
-      Opt,
-      ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
-      BaseArgs.MakeIndex(Opt.getName()), BaseArg));
+  SynthesizedArgs.push_back(
+      make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+                       BaseArgs.MakeIndex(Opt.getName()), BaseArg));
   return SynthesizedArgs.back().get();
 }
 
 Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt,
                                        StringRef Value) const {
   unsigned Index = BaseArgs.MakeIndex(Value);
-  SynthesizedArgs.push_back(make_unique<Arg>(
-      Opt,
-      ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
-      Index, BaseArgs.getArgString(Index), BaseArg));
+  SynthesizedArgs.push_back(
+      make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+                       Index, BaseArgs.getArgString(Index), BaseArg));
   return SynthesizedArgs.back().get();
 }
 
 Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
                                      StringRef Value) const {
   unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
-  SynthesizedArgs.push_back(make_unique<Arg>(
-      Opt,
-      ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
-      Index, BaseArgs.getArgString(Index + 1), BaseArg));
+  SynthesizedArgs.push_back(
+      make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+                       Index, BaseArgs.getArgString(Index + 1), BaseArg));
   return SynthesizedArgs.back().get();
 }
 
@@ -381,8 +378,7 @@ Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
                                    StringRef Value) const {
   unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str());
   SynthesizedArgs.push_back(make_unique<Arg>(
-      Opt,
-      ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
-      Index, BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
+      Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index,
+      BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
   return SynthesizedArgs.back().get();
 }

From 7d1f7798dbb2242bceb4b8578d4b0de0788088c4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 20 Jun 2014 06:50:05 +0000
Subject: [PATCH 154/157] R600: Trivial subtarget feature cleanups.

Remove an unused AMDIL leftover, correct extra periods
appearing in the help menu.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211341 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/AMDGPU.td         | 15 +++++----------
 lib/Target/R600/AMDGPUSubtarget.h |  1 -
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index d3dff531a7f6..6ff9ab7ab7d2 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -32,30 +32,25 @@ def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
         "false",
         "Disable the if conversion pass">;
 
-def FeatureFP64     : SubtargetFeature<"fp64",
+def FeatureFP64 : SubtargetFeature<"fp64",
         "FP64",
         "true",
-        "Enable 64bit double precision operations">;
+        "Enable double precision operations">;
 
 def Feature64BitPtr : SubtargetFeature<"64BitPtr",
         "Is64bit",
         "true",
-        "Specify if 64bit addressing should be used.">;
-
-def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
-        "Is32on64bit",
-        "false",
-        "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+        "Specify if 64-bit addressing should be used">;
 
 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
         "R600ALUInst",
         "false",
-        "Older version of ALU instructions encoding.">;
+        "Older version of ALU instructions encoding">;
 
 def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
         "HasVertexCache",
         "true",
-        "Specify use of dedicated vertex cache.">;
+        "Specify use of dedicated vertex cache">;
 
 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
         "CaymanISA",
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 9c78f35df3e2..bbbd997f6af8 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -44,7 +44,6 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
 private:
   std::string DevName;
   bool Is64bit;
-  bool Is32on64bit;
   bool DumpCode;
   bool R600ALUInst;
   bool HasVertexCache;

From a5efeb6b39304f5d34d74a5e32e5142d84eb2a43 Mon Sep 17 00:00:00 2001
From: Zoran Jovanovic <zoran.jovanovic@imgtec.com>
Date: Fri, 20 Jun 2014 09:28:09 +0000
Subject: [PATCH 155/157] ps][mips64r6] Added LSA/DLSA instructions
 Differential Revision: http://reviews.llvm.org/D3897

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211346 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/Mips32r6InstrFormats.td | 19 +++++++++++++++++++
 lib/Target/Mips/Mips32r6InstrInfo.td    | 14 +++++++++++++-
 lib/Target/Mips/Mips64r6InstrInfo.td    |  4 +++-
 test/CodeGen/Mips/msa/special.ll        |  4 ++++
 test/MC/Mips/mips32r6/valid.s           |  1 +
 test/MC/Mips/mips64r6/valid.s           |  2 ++
 6 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td
index 051db7525f6d..9b7ada68ee26 100644
--- a/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -95,6 +95,8 @@ def OPCODE6_CLO      : OPCODE6<0b010001>;
 def OPCODE6_CLZ      : OPCODE6<0b010000>;
 def OPCODE6_DCLO     : OPCODE6<0b010011>;
 def OPCODE6_DCLZ     : OPCODE6<0b010010>;
+def OPCODE6_LSA      : OPCODE6<0b000101>;
+def OPCODE6_DLSA     : OPCODE6<0b010101>;
 
 class FIELD_FMT<bits<5> Val> {
   bits<5> Value = Val;
@@ -453,6 +455,23 @@ class SPECIAL3_LL_SC_FM<OPCODE6 Operation> : MipsR6Inst {
   string DecoderMethod = "DecodeSpecial3LlSc";
 }
 
+class SPECIAL_LSA_FM<OPCODE6 Operation> : MipsR6Inst {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+  bits<2> imm2;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = OPGROUP_SPECIAL.Value;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-8}  = 0b000;
+  let Inst{7-6}   = imm2;
+  let Inst{5-0}   = Operation.Value;
+}
+
 class REGIMM_FM<OPCODE5 Operation> : MipsR6Inst {
   bits<5> rs;
   bits<16> imm;
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 52e12fcf9462..9ec41a20b394 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -160,6 +160,8 @@ class LWC2_R6_ENC : COP2LDST_FM<OPCODE5_LWC2>;
 class SDC2_R6_ENC : COP2LDST_FM<OPCODE5_SDC2>;
 class SWC2_R6_ENC : COP2LDST_FM<OPCODE5_SWC2>;
 
+class LSA_R6_ENC : SPECIAL_LSA_FM<OPCODE6_LSA>;
+
 class LL_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_LL>;
 class SC_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SC>;
 
@@ -574,6 +576,16 @@ class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
 class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>;
 class SWC2_R6_DESC : COP2ST_DESC_BASE<"swc2", COP2Opnd>;
 
+class LSA_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                       Operand ImmOpnd> {
+  dag OutOperandList = (outs GPROpnd:$rd);
+  dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2);
+  string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $imm2");
+  list<dag> Pattern = [];
+}
+
+class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+
 class LL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
   dag OutOperandList = (outs GPROpnd:$rt);
   dag InOperandList = (ins mem_simm9:$addr);
@@ -667,7 +679,7 @@ def JIC : JIC_ENC, JIC_DESC, ISA_MIPS32R6;
 def JR_HB_R6 : JR_HB_R6_ENC, JR_HB_R6_DESC, ISA_MIPS32R6;
 def LDC2_R6 : LDC2_R6_ENC, LDC2_R6_DESC, ISA_MIPS32R6;
 def LL_R6 : LL_R6_ENC, LL_R6_DESC, ISA_MIPS32R6;
-// def LSA; // See MSA
+def LSA_R6 : LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6;
 def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6;
 def LWPC : LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
 def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td
index dbfb9fbbf9bc..96d111f64b26 100644
--- a/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -29,6 +29,7 @@ class DCLO_R6_ENC : SPECIAL_2R_FM<OPCODE6_DCLO>;
 class DCLZ_R6_ENC : SPECIAL_2R_FM<OPCODE6_DCLZ>;
 class DDIV_ENC    : SPECIAL_3R_FM<0b00010, 0b011110>;
 class DDIVU_ENC   : SPECIAL_3R_FM<0b00010, 0b011111>;
+class DLSA_R6_ENC : SPECIAL_LSA_FM<OPCODE6_DLSA>;
 class DMOD_ENC    : SPECIAL_3R_FM<0b00011, 0b011110>;
 class DMODU_ENC   : SPECIAL_3R_FM<0b00011, 0b011111>;
 class DMUH_ENC    : SPECIAL_3R_FM<0b00011, 0b111000>;
@@ -61,6 +62,7 @@ class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd>;
 class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd>;
 class DDIV_DESC    : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, sdiv>;
 class DDIVU_DESC   : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, udiv>;
+class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2>;
 class DMOD_DESC    : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, srem>;
 class DMODU_DESC   : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, urem>;
 class DMUH_DESC    : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, mulhs>;
@@ -88,7 +90,7 @@ def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6;
 def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6;
 def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6;
 def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6;
-// def DLSA; // See MSA
+def DLSA_R6 : DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6;
 def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6;
 def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6;
 def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6;
diff --git a/test/CodeGen/Mips/msa/special.ll b/test/CodeGen/Mips/msa/special.ll
index f65a14f7bb1f..b9badf5dc582 100644
--- a/test/CodeGen/Mips/msa/special.ll
+++ b/test/CodeGen/Mips/msa/special.ll
@@ -4,6 +4,10 @@
 ; RUN:   FileCheck %s --check-prefix=MIPS32
 ; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
 ; RUN:   FileCheck %s --check-prefix=MIPS64
+; RUN: llc -march=mips -mcpu=mips32r6 -mattr=+msa < %s | \
+; RUN:   FileCheck %s --check-prefix=MIPS32
+; RUN: llc -march=mips64 -mcpu=mips64r6 -mattr=+msa < %s | \
+; RUN:   FileCheck %s --check-prefix=MIPS64
 
 define i32 @llvm_mips_lsa_test(i32 %a, i32 %b) nounwind {
 entry:
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index 0766079fda5b..e6a6a6636158 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -100,6 +100,7 @@
         divu    $2,$3,$4         # CHECK: divu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9b]
         jialc   $5, 256          # CHECK: jialc $5, 256    # encoding: [0xf8,0x05,0x01,0x00]
         jic     $5, 256          # CHECK: jic $5, 256      # encoding: [0xd8,0x05,0x01,0x00]
+        lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
         lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
         lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]
         mod     $2,$3,$4         # CHECK: mod $2, $3, $4   # encoding: [0x00,0x64,0x10,0xda]
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index 9f32562191a4..1148a5246609 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -111,6 +111,8 @@
         ddivu   $2,$3,$4         # CHECK: ddivu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9f]
         dmod    $2,$3,$4         # CHECK: dmod $2, $3, $4  # encoding: [0x00,0x64,0x10,0xde]
         dmodu   $2,$3,$4         # CHECK: dmodu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdf]
+        lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
+        dlsa    $2, $3, $4, 3    # CHECK: dlsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xd5]
         ldpc    $2,123456        # CHECK: ldpc $2, 123456  # encoding: [0xec,0x58,0x3c,0x48]
         lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
         lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]

From e5241cc48858af7c6ca58ae677b69323e8f84eb5 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard@arm.com>
Date: Fri, 20 Jun 2014 10:08:11 +0000
Subject: [PATCH 156/157] Emit the ARM build attributes ABI_PCS_wchar_t and
 ABI_enum_size.

Emit the ARM build attributes ABI_PCS_wchar_t and ABI_enum_size based on
module flags metadata.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211349 91177308-0d34-0410-b5e6-96231b3b80d8
---
 docs/LangRef.rst                          | 36 +++++++++++++++++++++++
 include/llvm/Support/ARMBuildAttributes.h | 15 ++++++++++
 lib/Target/ARM/ARMAsmPrinter.cpp          | 26 ++++++++++++++++
 test/CodeGen/ARM/metadata-default.ll      | 16 ++++++++++
 test/CodeGen/ARM/metadata-short-enums.ll  | 16 ++++++++++
 test/CodeGen/ARM/metadata-short-wchar.ll  | 16 ++++++++++
 6 files changed, 125 insertions(+)
 create mode 100644 test/CodeGen/ARM/metadata-default.ll
 create mode 100644 test/CodeGen/ARM/metadata-short-enums.ll
 create mode 100644 test/CodeGen/ARM/metadata-short-wchar.ll

diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index b75058fa9a5f..c535443e2c57 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -3146,6 +3146,42 @@ Each individual option is required to be either a valid option for the target's
 linker, or an option that is reserved by the target specific assembly writer or
 object file emitter. No other aspect of these options is defined by the IR.
 
+C type width Module Flags Metadata
+----------------------------------
+
+The ARM backend emits a section into each generated object file describing the
+options that it was compiled with (in a compiler-independent way) to prevent
+linking incompatible objects, and to allow automatic library selection. Some
+of these options are not visible at the IR level, namely wchar_t width and enum
+width.
+
+To pass this information to the backend, these options are encoded in module
+flags metadata, using the following key-value pairs:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 70
+
+   * - Key
+     - Value
+
+   * - short_wchar
+     - * 0 --- sizeof(wchar_t) == 4
+       * 1 --- sizeof(wchar_t) == 2
+
+   * - short_enum
+     - * 0 --- Enums are at least as large as an ``int``.
+       * 1 --- Enums are stored in the smallest integer type which can
+         represent all of its values.
+
+For example, the following metadata section specifies that the module was
+compiled with a ``wchar_t`` width of 4 bytes, and the underlying type of an
+enum is the smallest type which can represent all of its values::
+
+    !llvm.module.flags = !{!0, !1}
+    !0 = metadata !{i32 1, metadata !"short_wchar", i32 1}
+    !1 = metadata !{i32 1, metadata !"short_enum", i32 0}
+
 .. _intrinsicglobalvariables:
 
 Intrinsic Global Variables
diff --git a/include/llvm/Support/ARMBuildAttributes.h b/include/llvm/Support/ARMBuildAttributes.h
index 16312004c871..f63e0a61f639 100644
--- a/include/llvm/Support/ARMBuildAttributes.h
+++ b/include/llvm/Support/ARMBuildAttributes.h
@@ -159,6 +159,11 @@ enum {
   AddressDirect = 1, // Address imported data directly
   AddressGOT = 2, // Address imported data indirectly (via GOT)
 
+  // Tag_ABI_PCS_wchar_t, (=18), uleb128
+  WCharProhibited = 0,  // wchar_t is not used
+  WCharWidth2Bytes = 2, // sizeof(wchar_t) == 2
+  WCharWidth4Bytes = 4, // sizeof(wchar_t) == 4
+
   // Tag_ABI_FP_denormal, (=20), uleb128
   PreserveFPSign = 2, // sign when flushed-to-zero is preserved
 
@@ -166,6 +171,16 @@ enum {
   AllowRTABI = 2,  // numbers, infinities, and one quiet NaN (see [RTABI])
   AllowIEE754 = 3, // this code to use all the IEEE 754-defined FP encodings
 
+  // Tag_ABI_enum_size, (=26), uleb128
+  EnumProhibited = 0, // The user prohibited the use of enums when building
+                      // this entity.
+  EnumSmallest = 1,   // Enum is smallest container big enough to hold all
+                      // values.
+  Enum32Bit = 2,      // Enum is at least 32 bits.
+  Enum32BitABI = 3,   // Every enumeration visible across an ABI-complying
+                      // interface contains a value needing 32 bits to encode
+                      // it; other enums can be containerized.
+
   // Tag_ABI_HardFP_use, (=27), uleb128
   HardFPImplied = 0,          // FP use should be implied by Tag_FP_arch
   HardFPSinglePrecision = 1,  // Single-precision only
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 55e9fe5f5c57..d2c54f3b71b6 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -730,6 +730,32 @@ void ARMAsmPrinter::emitAttributes() {
   if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops())
       ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
 
+  if (MMI) {
+    if (const Module *SourceModule = MMI->getModule()) {
+      // ABI_PCS_wchar_t to indicate wchar_t width
+      // FIXME: There is no way to emit value 0 (wchar_t prohibited).
+      if (auto WCharWidthValue = cast_or_null<ConstantInt>(
+              SourceModule->getModuleFlag("wchar_size"))) {
+        int WCharWidth = WCharWidthValue->getZExtValue();
+        assert((WCharWidth == 2 || WCharWidth == 4) &&
+               "wchar_t width must be 2 or 4 bytes");
+        ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth);
+      }
+
+      // ABI_enum_size to indicate enum width
+      // FIXME: There is no way to emit value 0 (enums prohibited) or value 3
+      //        (all enums contain a value needing 32 bits to encode).
+      if (auto EnumWidthValue = cast_or_null<ConstantInt>(
+              SourceModule->getModuleFlag("min_enum_size"))) {
+        int EnumWidth = EnumWidthValue->getZExtValue();
+        assert((EnumWidth == 1 || EnumWidth == 4) &&
+               "Minimum enum width must be 1 or 4 bytes");
+        int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
+        ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
+      }
+    }
+  }
+
   if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
       ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
                         ARMBuildAttrs::AllowTZVirtualization);
diff --git a/test/CodeGen/ARM/metadata-default.ll b/test/CodeGen/ARM/metadata-default.ll
new file mode 100644
index 000000000000..f6a3fe289cc1
--- /dev/null
+++ b/test/CodeGen/ARM/metadata-default.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7--none-eabi"
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = metadata !{i32 1, metadata !"wchar_size", i32 4}
+!1 = metadata !{i32 1, metadata !"min_enum_size", i32 4}
+
+; CHECK: .eabi_attribute 18, 4   @ Tag_ABI_PCS_wchar_t
+; CHECK: .eabi_attribute 26, 2   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/ARM/metadata-short-enums.ll b/test/CodeGen/ARM/metadata-short-enums.ll
new file mode 100644
index 000000000000..bccd3327e5b5
--- /dev/null
+++ b/test/CodeGen/ARM/metadata-short-enums.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7--none-eabi"
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = metadata !{i32 1, metadata !"wchar_size", i32 4}
+!1 = metadata !{i32 1, metadata !"min_enum_size", i32 1}
+
+; CHECK: .eabi_attribute 18, 4   @ Tag_ABI_PCS_wchar_t
+; CHECK: .eabi_attribute 26, 1   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/ARM/metadata-short-wchar.ll b/test/CodeGen/ARM/metadata-short-wchar.ll
new file mode 100644
index 000000000000..6de9bf174317
--- /dev/null
+++ b/test/CodeGen/ARM/metadata-short-wchar.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7--none-eabi"
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = metadata !{i32 1, metadata !"wchar_size", i32 2}
+!1 = metadata !{i32 1, metadata !"min_enum_size", i32 4}
+
+; CHECK: .eabi_attribute 18, 2   @ Tag_ABI_PCS_wchar_t
+; CHECK: .eabi_attribute 26, 2   @ Tag_ABI_enum_size

From c995fb2a6013b511243a4391acf23d7d9d8c8e71 Mon Sep 17 00:00:00 2001
From: Carl-Anton Ingmarsson <mail@carlanton.se>
Date: Tue, 24 Dec 2013 21:49:55 +0100
Subject: [PATCH 157/157] Add getPassInfo() function to llvm::Pass

PassRegistry::getPass() is quite heavy since it involves taking a lock and
looking up the info in a hashtable.
---
 include/llvm/Pass.h          | 11 ++++++++++-
 lib/IR/LegacyPassManager.cpp | 21 ++++++++-------------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index c2b9f95956e8..22d57edb74e4 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -29,6 +29,7 @@
 #ifndef LLVM_PASS_H
 #define LLVM_PASS_H
 
+#include "llvm/PassRegistry.h"
 #include "llvm/Support/Compiler.h"
 #include <string>
 
@@ -82,13 +83,14 @@ enum PassKind {
 class Pass {
   AnalysisResolver *Resolver;  // Used to resolve analysis
   const void *PassID;
+  mutable const PassInfo *PI;
   PassKind Kind;
   void operator=(const Pass&) LLVM_DELETED_FUNCTION;
   Pass(const Pass &) LLVM_DELETED_FUNCTION;
 
 public:
   explicit Pass(PassKind K, char &pid)
-    : Resolver(nullptr), PassID(&pid), Kind(K) { }
+    : Resolver(nullptr), PassID(&pid), PI(nullptr), Kind(K) { }
   virtual ~Pass();
 
 
@@ -105,6 +107,13 @@ class Pass {
     return PassID;
   }
 
+  /// getPassInfo - Return the PassInfo associated with this pass.
+  const PassInfo *getPassInfo() const {
+    if (!PI)
+      PI = PassRegistry::getPassRegistry()->getPassInfo(PassID);
+    return PI;
+  }
+
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary initialization before any pass is run.
   ///
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index d3f3482dc024..d14f139db185 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -607,8 +607,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
   // If P is an analysis pass and it is available then do not
   // generate the analysis again. Stale analysis info should not be
   // available at this point.
-  const PassInfo *PI =
-    PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+  const PassInfo *PI = P->getPassInfo();
   if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
     delete P;
     return;
@@ -723,8 +722,7 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
       return *I;
 
     // If Pass not found then check the interfaces implemented by Immutable Pass
-    const PassInfo *PassInf =
-      PassRegistry::getPassRegistry()->getPassInfo(PI);
+    const PassInfo *PassInf = (*I)->getPassInfo();
     assert(PassInf && "Expected all immutable passes to be initialized");
     const std::vector<const PassInfo*> &ImmPI =
       PassInf->getInterfacesImplemented();
@@ -766,8 +764,7 @@ void PMTopLevelManager::dumpArguments() const {
   dbgs() << "Pass Arguments: ";
   for (SmallVectorImpl<ImmutablePass *>::const_iterator I =
        ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
-    if (const PassInfo *PI =
-        PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
+    if (const PassInfo *PI = (*I)->getPassInfo()) {
       assert(PI && "Expected all immutable passes to be initialized");
       if (!PI->isAnalysisGroup())
         dbgs() << " -" << PI->getPassArgument();
@@ -831,8 +828,8 @@ void PMDataManager::recordAvailableAnalysis(Pass *P) {
 
   // This pass is the current implementation of all of the interfaces it
   // implements as well.
-  const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
-  if (!PInf) return;
+  const PassInfo *PInf = P->getPassInfo();
+  if (PInf == 0) return;
   const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
   for (unsigned i = 0, e = II.size(); i != e; ++i)
     AvailableAnalysis[II[i]->getTypeInfo()] = P;
@@ -963,10 +960,9 @@ void PMDataManager::freePass(Pass *P, StringRef Msg,
     P->releaseMemory();
   }
 
-  AnalysisID PI = P->getPassID();
-  if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
+  if (const PassInfo *PInf = P->getPassInfo()) {
     // Remove the pass itself (if it is not already removed).
-    AvailableAnalysis.erase(PI);
+    AvailableAnalysis.erase(P->getPassID());
 
     // Remove all interfaces this pass implements, for which it is also
     // listed as the available implementation.
@@ -1148,8 +1144,7 @@ void PMDataManager::dumpPassArguments() const {
     if (PMDataManager *PMD = (*I)->getAsPMDataManager())
       PMD->dumpPassArguments();
     else
-      if (const PassInfo *PI =
-            PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+      if (const PassInfo *PI = (*I)->getPassInfo())
         if (!PI->isAnalysisGroup())
           dbgs() << " -" << PI->getPassArgument();
   }