From 474e3967d9204e7e48ec59935d6b9b0c931b285d Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Wed, 16 Nov 2016 11:34:51 -0800
Subject: [PATCH 1/7] Add a entryBB accessor to SILFunction

---
 include/swift/SIL/SILFunction.h | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/include/swift/SIL/SILFunction.h b/include/swift/SIL/SILFunction.h
index 7f3c4f1035ad5..6f9670cb0aedf 100644
--- a/include/swift/SIL/SILFunction.h
+++ b/include/swift/SIL/SILFunction.h
@@ -618,6 +618,9 @@ class SILFunction
   SILBasicBlock &front() { return *begin(); }
   const SILBasicBlock &front() const { return *begin(); }
 
+  SILBasicBlock *entryBB() { return &front(); }
+  const SILBasicBlock *entryBB() const { return &front(); }
+
   SILBasicBlock *createBasicBlock();
   SILBasicBlock *createBasicBlock(SILBasicBlock *After);
 

From b8a3c162e9ecc7d4a2ed9acaf54d35eb9d737e0b Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Thu, 15 Dec 2016 14:39:36 -0800
Subject: [PATCH 2/7] Change a cl::opt that conflicts once we link llvm passes
 and sil passes

---
 lib/SILOptimizer/Analysis/AliasAnalysis.cpp | 2 +-
 test/SILOptimizer/basic-aa.sil              | 2 +-
 test/SILOptimizer/mem-behavior.sil          | 2 +-
 test/SILOptimizer/typed-access-tb-aa.sil    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/SILOptimizer/Analysis/AliasAnalysis.cpp b/lib/SILOptimizer/Analysis/AliasAnalysis.cpp
index d331089d514a9..48baaae99b1b4 100644
--- a/lib/SILOptimizer/Analysis/AliasAnalysis.cpp
+++ b/lib/SILOptimizer/Analysis/AliasAnalysis.cpp
@@ -55,7 +55,7 @@ enum class AAKind : unsigned {
 } // end anonymous namespace
 
 static llvm::cl::opt<AAKind>
-DebugAAKinds("aa", llvm::cl::desc("Alias Analysis Kinds:"),
+DebugAAKinds("aa-kind", llvm::cl::desc("Alias Analysis Kinds:"),
              llvm::cl::init(AAKind::All),
              llvm::cl::values(clEnumValN(AAKind::None,
                                          "none",
diff --git a/test/SILOptimizer/basic-aa.sil b/test/SILOptimizer/basic-aa.sil
index d921a5edd74b3..679a3bdbb3158 100644
--- a/test/SILOptimizer/basic-aa.sil
+++ b/test/SILOptimizer/basic-aa.sil
@@ -1,4 +1,4 @@
-// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -module-name Swift %s -aa=basic-aa -aa-dump -o /dev/null | %FileCheck %s
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -module-name Swift %s -aa-kind=basic-aa -aa-dump -o /dev/null | %FileCheck %s
 
 // REQUIRES: asserts
 
diff --git a/test/SILOptimizer/mem-behavior.sil b/test/SILOptimizer/mem-behavior.sil
index 79e357c7e9d03..469e3133c9aa0 100644
--- a/test/SILOptimizer/mem-behavior.sil
+++ b/test/SILOptimizer/mem-behavior.sil
@@ -1,4 +1,4 @@
-// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil %s -aa=basic-aa -mem-behavior-dump -o /dev/null | %FileCheck %s
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil %s -aa-kind=basic-aa -mem-behavior-dump -o /dev/null | %FileCheck %s
 
 // REQUIRES: asserts
 
diff --git a/test/SILOptimizer/typed-access-tb-aa.sil b/test/SILOptimizer/typed-access-tb-aa.sil
index 6b223c4d9d565..90d7f5b01433f 100644
--- a/test/SILOptimizer/typed-access-tb-aa.sil
+++ b/test/SILOptimizer/typed-access-tb-aa.sil
@@ -1,4 +1,4 @@
-// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil %s -aa=typed-access-tb-aa -aa-dump -o /dev/null | %FileCheck %s
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil %s -aa-kind=typed-access-tb-aa -aa-dump -o /dev/null | %FileCheck %s
 
 // REQUIRES: asserts
 

From cd1037b799402f4d68322011ef1b196f94281cea Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Wed, 16 Nov 2016 10:51:42 -0800
Subject: [PATCH 3/7] IRGen: Allocate generic/resilient values on the stack
 instead of on the heap

Allocate buffers for local generic/resilient values on the stack. alloc_stack
instructions in the entry block are translated using a dynamic alloca
instruction with variables size. All other alloc_stack instructions in addition
use llvm's stacksave/restore instrinsics to reset the stack (they could be
executed multiple times and with varying sizes).
---
 lib/IRGen/Address.h                        |  27 ++++
 lib/IRGen/FixedTypeInfo.h                  |   8 +-
 lib/IRGen/GenCall.cpp                      |  21 +--
 lib/IRGen/GenFunc.cpp                      |  12 +-
 lib/IRGen/GenInit.cpp                      |  17 +--
 lib/IRGen/GenOpaque.cpp                    |  38 +++++
 lib/IRGen/GenOpaque.h                      |  15 ++
 lib/IRGen/IRGenSIL.cpp                     | 155 ++++++++-------------
 lib/IRGen/NonFixedTypeInfo.h               |  30 ++--
 lib/IRGen/TypeInfo.h                       |  12 +-
 test/IRGen/enum_resilience.swift           |  14 +-
 test/IRGen/fixed_size_buffer_peepholes.sil |  74 +---------
 test/IRGen/generic_casts.swift             |  13 +-
 test/IRGen/generic_tuples.swift            |  35 +++--
 test/IRGen/lifetime.sil                    |  84 +++++------
 test/IRGen/struct_resilience.swift         |  21 ++-
 16 files changed, 280 insertions(+), 296 deletions(-)

diff --git a/lib/IRGen/Address.h b/lib/IRGen/Address.h
index 4c3c25ccb8cd7..a55e4e224d075 100644
--- a/lib/IRGen/Address.h
+++ b/lib/IRGen/Address.h
@@ -104,6 +104,33 @@ class ContainedAddress {
   bool isValid() const { return Addr.isValid(); }
 };
 
+/// An address on the stack together with an optional stack pointer reset
+/// location.
+class StackAddress {
+  /// The address of an object of type T.
+  Address Addr;
+  /// The stack pointer location to reset to when this stack object is
+  /// deallocated.
+  llvm::Value *StackPtrResetLocation;
+
+public:
+  StackAddress() : StackPtrResetLocation(nullptr) {}
+  StackAddress(Address address)
+    : Addr(address), StackPtrResetLocation(nullptr) {}
+  StackAddress(Address address, llvm::Value *SP)
+      : Addr(address), StackPtrResetLocation(SP) {}
+
+  llvm::Value *getAddressPointer() const { return Addr.getAddress(); }
+  Alignment getAlignment() const { return Addr.getAlignment(); }
+  Address getAddress() const { return Addr; }
+  bool needsSPRestore() const { return StackPtrResetLocation != nullptr; }
+  llvm::Value *getSavedSP() const {
+    assert(StackPtrResetLocation && "Expect a valid stacksave");
+    return StackPtrResetLocation; }
+
+  bool isValid() const { return Addr.isValid(); }
+};
+
 } // end namespace irgen
 } // end namespace swift
 
diff --git a/lib/IRGen/FixedTypeInfo.h b/lib/IRGen/FixedTypeInfo.h
index b21ed7e5d13bf..1a10dc63693fa 100644
--- a/lib/IRGen/FixedTypeInfo.h
+++ b/lib/IRGen/FixedTypeInfo.h
@@ -76,10 +76,10 @@ class FixedTypeInfo : public TypeInfo {
     return (isFixedSize(expansion) && StorageSize.isZero());
   }
 
-  ContainedAddress allocateStack(IRGenFunction &IGF, SILType T,
-                                 const llvm::Twine &name) const override;
-  void deallocateStack(IRGenFunction &IGF, Address addr, SILType T) const override;
-  void destroyStack(IRGenFunction &IGF, Address addr, SILType T) const override;
+  StackAddress allocateStack(IRGenFunction &IGF, SILType T, bool isEntryBlock,
+                             const llvm::Twine &name) const override;
+  void deallocateStack(IRGenFunction &IGF, StackAddress addr, SILType T) const override;
+  void destroyStack(IRGenFunction &IGF, StackAddress addr, SILType T) const override;
 
   // We can give these reasonable default implementations.
 
diff --git a/lib/IRGen/GenCall.cpp b/lib/IRGen/GenCall.cpp
index 2b3320ac32398..f7e0608275c06 100644
--- a/lib/IRGen/GenCall.cpp
+++ b/lib/IRGen/GenCall.cpp
@@ -1214,15 +1214,15 @@ void CallEmission::emitToExplosion(Explosion &out) {
   // If the call is naturally to memory, emit it that way and then
   // explode that temporary.
   if (LastArgWritten == 1) {
-    ContainedAddress ctemp = substResultTI.allocateStack(IGF, substResultType,
-                                                         "call.aggresult");
+    StackAddress ctemp = substResultTI.allocateStack(IGF, substResultType,
+                                                     false, "call.aggresult");
     Address temp = ctemp.getAddress();
     emitToMemory(temp, substResultTI);
  
     // We can use a take.
     substResultTI.loadAsTake(IGF, temp, out);
 
-    substResultTI.deallocateStack(IGF, ctemp.getContainer(), substResultType);
+    substResultTI.deallocateStack(IGF, ctemp, substResultType);
     return;
   }
 
@@ -1381,7 +1381,7 @@ static void emitCoerceAndExpand(IRGenFunction &IGF,
 
   // Otherwise, materialize to a temporary.
   Address temporary =
-    paramTI.allocateStack(IGF, paramTy, "coerce-and-expand.temp").getAddress();
+    paramTI.allocateStack(IGF, paramTy, false, "coerce-and-expand.temp").getAddress();
 
   auto coercionTyLayout = IGF.IGM.DataLayout.getStructLayout(coercionTy);
 
@@ -1440,7 +1440,7 @@ static void emitCoerceAndExpand(IRGenFunction &IGF,
     paramTI.loadAsTake(IGF, temporary, out);
   }
 
-  paramTI.deallocateStack(IGF, temporary, paramTy);
+  paramTI.deallocateStack(IGF, StackAddress(temporary), paramTy);
 }
 
 static void emitDirectExternalArgument(IRGenFunction &IGF,
@@ -1476,6 +1476,7 @@ static void emitDirectExternalArgument(IRGenFunction &IGF,
 
   // Store to a temporary.
   Address temporary = argTI.allocateStack(IGF, argType,
+                                          false,
                                           "coerced-arg").getAddress();
   argTI.initializeFromParams(IGF, in, temporary, argType);
 
@@ -1497,7 +1498,7 @@ static void emitDirectExternalArgument(IRGenFunction &IGF,
     out.add(IGF.Builder.CreateLoad(coercedAddr));
   }
 
-  argTI.deallocateStack(IGF, temporary, argType);
+  argTI.deallocateStack(IGF, StackAddress(temporary), argType);
 }
 
 namespace {
@@ -1547,7 +1548,7 @@ static void emitClangExpandedArgument(IRGenFunction &IGF,
   }
 
   // Otherwise, materialize to a temporary.
-  Address temp = swiftTI.allocateStack(IGF, swiftType,
+  Address temp = swiftTI.allocateStack(IGF, swiftType, false,
                                        "clang-expand-arg.temp").getAddress();
   swiftTI.initialize(IGF, in, temp);
 
@@ -1569,7 +1570,7 @@ void irgen::emitClangExpandedParameter(IRGenFunction &IGF,
   }
 
   // Otherwise, materialize to a temporary.
-  Address temp = swiftTI.allocateStack(IGF, swiftType,
+  Address temp = swiftTI.allocateStack(IGF, swiftType, false,
                                        "clang-expand-param.temp").getAddress();
   Address castTemp = IGF.Builder.CreateBitCast(temp, IGF.IGM.Int8PtrTy);
   ClangExpandStoreEmitter(IGF, in).visit(clangType, castTemp);
@@ -1652,7 +1653,7 @@ static void externalizeArguments(IRGenFunction &IGF, const Callee &callee,
     }
     case clang::CodeGen::ABIArgInfo::Indirect: {
       auto &ti = cast<LoadableTypeInfo>(IGF.getTypeInfo(paramType));
-      Address addr = ti.allocateStack(IGF, paramType,
+      Address addr = ti.allocateStack(IGF, paramType, false,
                                       "indirect-temporary").getAddress();
       ti.initialize(IGF, in, addr);
 
@@ -1779,7 +1780,7 @@ static void emitDirectForeignParameter(IRGenFunction &IGF,
 
   // Deallocate the temporary.
   // `deallocateStack` emits the lifetime.end marker for us.
-  paramTI.deallocateStack(IGF, temporary, paramType);
+  paramTI.deallocateStack(IGF, StackAddress(temporary), paramType);
 }
 
 void irgen::emitForeignParameter(IRGenFunction &IGF, Explosion &params,
diff --git a/lib/IRGen/GenFunc.cpp b/lib/IRGen/GenFunc.cpp
index e6626e0c309e5..e531211236d0b 100644
--- a/lib/IRGen/GenFunc.cpp
+++ b/lib/IRGen/GenFunc.cpp
@@ -778,7 +778,7 @@ static llvm::Function *emitPartialApplicationForwarder(IRGenModule &IGM,
   struct AddressToDeallocate {
     SILType Type;
     const TypeInfo &TI;
-    Address Addr;
+    StackAddress Addr;
   };
   SmallVector<AddressToDeallocate, 4> addressesToDeallocate;
 
@@ -958,14 +958,14 @@ static llvm::Function *emitPartialApplicationForwarder(IRGenModule &IGM,
         // The +1 argument is passed indirectly, so we need to copy into a
         // temporary.
         needsAllocas = true;
-        auto caddr = fieldTI.allocateStack(subIGF, fieldTy, "arg.temp");
-        fieldTI.initializeWithCopy(subIGF, caddr.getAddress(), fieldAddr,
-                                   fieldTy);
-        param.add(caddr.getAddressPointer());
+        auto stackAddr = fieldTI.allocateStack(subIGF, fieldTy, false, "arg.temp");
+        auto addressPointer = stackAddr.getAddress().getAddress();
+        fieldTI.initializeWithCopy(subIGF, stackAddr.getAddress(), fieldAddr, fieldTy);
+        param.add(addressPointer);
         
         // Remember to deallocate later.
         addressesToDeallocate.push_back(
-                  AddressToDeallocate{fieldTy, fieldTI, caddr.getContainer()});
+            AddressToDeallocate{fieldTy, fieldTI, stackAddr});
 
         break;
       }
diff --git a/lib/IRGen/GenInit.cpp b/lib/IRGen/GenInit.cpp
index 478edca8d2adf..94c084f8e8dca 100644
--- a/lib/IRGen/GenInit.cpp
+++ b/lib/IRGen/GenInit.cpp
@@ -63,30 +63,31 @@ Address IRGenModule::emitSILGlobalVariable(SILGlobalVariable *var) {
   return addr;
 }
 
-ContainedAddress FixedTypeInfo::allocateStack(IRGenFunction &IGF, SILType T,
-                                              const Twine &name) const {
+StackAddress FixedTypeInfo::allocateStack(IRGenFunction &IGF, SILType T,
+                                          bool isEntryBlock,
+                                          const Twine &name) const {
   // If the type is known to be empty, don't actually allocate anything.
   if (isKnownEmpty(ResilienceExpansion::Maximal)) {
     auto addr = getUndefAddress();
-    return { addr, addr };
+    return { addr };
   }
 
   Address alloca =
     IGF.createAlloca(getStorageType(), getFixedAlignment(), name);
   IGF.Builder.CreateLifetimeStart(alloca, getFixedSize());
   
-  return { alloca, alloca };
+  return { alloca };
 }
 
-void FixedTypeInfo::destroyStack(IRGenFunction &IGF, Address addr,
+void FixedTypeInfo::destroyStack(IRGenFunction &IGF, StackAddress addr,
                                  SILType T) const {
-  destroy(IGF, addr, T);
+  destroy(IGF, addr.getAddress(), T);
   FixedTypeInfo::deallocateStack(IGF, addr, T);
 }
 
-void FixedTypeInfo::deallocateStack(IRGenFunction &IGF, Address addr,
+void FixedTypeInfo::deallocateStack(IRGenFunction &IGF, StackAddress addr,
                                     SILType T) const {
   if (isKnownEmpty(ResilienceExpansion::Maximal))
     return;
-  IGF.Builder.CreateLifetimeEnd(addr, getFixedSize());
+  IGF.Builder.CreateLifetimeEnd(addr.getAddress(), getFixedSize());
 }
diff --git a/lib/IRGen/GenOpaque.cpp b/lib/IRGen/GenOpaque.cpp
index 9e68e9131d07d..d1b0fc8bcab38 100644
--- a/lib/IRGen/GenOpaque.cpp
+++ b/lib/IRGen/GenOpaque.cpp
@@ -435,6 +435,44 @@ llvm::Value *irgen::emitInitializeBufferWithCopyOfBufferCall(IRGenFunction &IGF,
   return call;
 }
 
+/// Emit a dynamic alloca call to allocate enough memory to hold an object of
+/// type 'T' and an optional llvm.stackrestore point if 'isInEntryBlock' is
+/// false.
+DynamicAlloca irgen::emitDynamicAlloca(IRGenFunction &IGF, SILType T,
+                                       bool isInEntryBlock) {
+  llvm::Value *stackRestorePoint = nullptr;
+
+  // Save the stack pointer if we are not in the entry block (we could be
+  // executed more than once).
+  if (!isInEntryBlock) {
+    auto *stackSaveFn = llvm::Intrinsic::getDeclaration(
+        &IGF.IGM.Module, llvm::Intrinsic::ID::stacksave);
+
+    stackRestorePoint =  IGF.Builder.CreateCall(stackSaveFn, {}, "spsave");
+  }
+
+  // Emit the dynamic alloca.
+  llvm::Value *size = emitLoadOfSize(IGF, T);
+  auto *alloca = IGF.Builder.CreateAlloca(IGF.IGM.Int8Ty, size, "alloca");
+  alloca->setAlignment(16);
+  assert(!isInEntryBlock ||
+         IGF.getActiveDominancePoint().isUniversal() &&
+             "Must be in entry block if we insert dynamic alloca's without "
+             "stackrestores");
+  return {alloca, stackRestorePoint};
+}
+
+/// Deallocate dynamic alloca's memory if requested by restoring the stack
+/// location before the dynamic alloca's call.
+void irgen::emitDeallocateDynamicAlloca(IRGenFunction &IGF,
+                                        StackAddress address) {
+  if (!address.needsSPRestore())
+    return;
+  auto *stackRestoreFn = llvm::Intrinsic::getDeclaration(
+      &IGF.IGM.Module, llvm::Intrinsic::ID::stackrestore);
+  IGF.Builder.CreateCall(stackRestoreFn, address.getSavedSP());
+}
+
 /// Emit a call to do an 'allocateBuffer' operation.
 llvm::Value *irgen::emitAllocateBufferCall(IRGenFunction &IGF,
                                            SILType T,
diff --git a/lib/IRGen/GenOpaque.h b/lib/IRGen/GenOpaque.h
index 921f9c17fac72..9815b68c2affa 100644
--- a/lib/IRGen/GenOpaque.h
+++ b/lib/IRGen/GenOpaque.h
@@ -230,6 +230,21 @@ namespace irgen {
   /// The type must be dynamically known to have extra inhabitant witnesses.
   llvm::Value *emitLoadOfExtraInhabitantCount(IRGenFunction &IGF, SILType T);
 
+  /// Emit a dynamic alloca call to allocate enough memory to hold an object of
+  /// type 'T' and an optional llvm.stackrestore point if 'isInEntryBlock' is
+  /// false.
+  struct DynamicAlloca {
+    llvm::Value *Alloca;
+    llvm::Value *SavedSP;
+    DynamicAlloca(llvm::Value *A, llvm::Value *SP) : Alloca(A), SavedSP(SP) {}
+  };
+  DynamicAlloca emitDynamicAlloca(IRGenFunction &IGF, SILType T,
+                                  bool isInEntryBlock);
+
+  /// Deallocate dynamic alloca's memory if the stack address has an SP restore
+  /// point associated with it.
+  void emitDeallocateDynamicAlloca(IRGenFunction &IGF, StackAddress address);
+
 } // end namespace irgen
 } // end namespace swift
 
diff --git a/lib/IRGen/IRGenSIL.cpp b/lib/IRGen/IRGenSIL.cpp
index a01ec0f295d29..7b6a7d9bee973 100644
--- a/lib/IRGen/IRGenSIL.cpp
+++ b/lib/IRGen/IRGenSIL.cpp
@@ -108,14 +108,24 @@ class StaticFunction {
 class LoweredValue {
 public:
   enum class Kind {
-    /// This LoweredValue corresponds to a SIL address value.
-    /// The LoweredValue of an alloc_stack keeps an owning container in
-    /// addition to the address of the allocated buffer.
+    /// The first two LoweredValue kinds correspond to a SIL address value.
+    ///
+    /// The LoweredValue of an existential alloc_stack keeps an owning container
+    /// in addition to the address of the allocated buffer.
     /// Depending on the allocated type, the container may be equal to the
     /// buffer itself (for types with known sizes) or it may be the address
     /// of a fixed-size container which points to the heap-allocated buffer.
     /// In this case the address-part may be null, which means that the buffer
     /// is not allocated yet.
+    ContainedAddress,
+
+    /// The LoweredValue of a resilient, generic, or loadable typed alloc_stack
+    /// keeps an optional stackrestore point in addition to the address of the
+    /// allocated buffer. For all other address values the stackrestore point is
+    /// just null.
+    /// If the stackrestore point is set (currently, this might happen for
+    /// opaque types: generic and resilient) the deallocation of the stack must
+    /// reset the stack pointer to this point.
     Address,
 
     /// The following kinds correspond to SIL non-address values.
@@ -142,7 +152,8 @@ class LoweredValue {
   using ExplosionVector = SmallVector<llvm::Value *, 4>;
   
   union {
-    ContainedAddress address;
+    ContainedAddress containedAddress;
+    StackAddress address;
     OwnedAddress boxWithAddress;
     struct {
       ExplosionVector values;
@@ -153,9 +164,14 @@ class LoweredValue {
 
 public:
 
-  /// Create an address value without a container (the usual case).
+  /// Create an address value without a stack restore point.
   LoweredValue(const Address &address)
-    : kind(Kind::Address), address(Address(), address)
+    : kind(Kind::Address), address(address)
+  {}
+
+  /// Create an address value with an optional stack restore point.
+  LoweredValue(const StackAddress &address)
+    : kind(Kind::Address), address(address)
   {}
   
   enum ContainerForUnallocatedAddress_t { ContainerForUnallocatedAddress };
@@ -163,13 +179,13 @@ class LoweredValue {
   /// Create an address value for an alloc_stack, consisting of a container and
   /// a not yet allocated buffer.
   LoweredValue(const Address &container, ContainerForUnallocatedAddress_t)
-    : kind(Kind::Address), address(container, Address())
+    : kind(Kind::ContainedAddress), containedAddress(container, Address())
   {}
   
   /// Create an address value for an alloc_stack, consisting of a container and
   /// the address of the allocated buffer.
   LoweredValue(const ContainedAddress &address)
-  : kind(Kind::Address), address(address)
+    : kind(Kind::ContainedAddress), containedAddress(address)
   {}
   
   LoweredValue(StaticFunction &&staticFunction)
@@ -194,8 +210,11 @@ class LoweredValue {
     : kind(lv.kind)
   {    
     switch (kind) {
+    case Kind::ContainedAddress:
+      ::new (&containedAddress) ContainedAddress(std::move(lv.containedAddress));
+      break;
     case Kind::Address:
-      ::new (&address) ContainedAddress(std::move(lv.address));
+      ::new (&address) StackAddress(std::move(lv.address));
       break;
     case Kind::Explosion:
       ::new (&explosion.values) ExplosionVector(std::move(lv.explosion.values));
@@ -223,7 +242,8 @@ class LoweredValue {
     return kind == Kind::Address && address.getAddress().isValid();
   }
   bool isUnallocatedAddressInBuffer() const {
-    return kind == Kind::Address && !address.getAddress().isValid();
+    return kind == Kind::ContainedAddress &&
+           !containedAddress.getAddress().isValid();
   }
   bool isValue() const {
     return kind >= Kind::Value_First && kind <= Kind::Value_Last;
@@ -236,11 +256,16 @@ class LoweredValue {
     assert(isAddress() && "not an allocated address");
     return address.getAddress();
   }
+
+  StackAddress getStackAddress() const {
+    assert(isAddress() && "not an allocated address");
+    return address;
+  }
   
   Address getContainerOfAddress() const {
-    assert(kind == Kind::Address);
-    assert(address.getContainer().isValid() && "address has no container");
-    return address.getContainer();
+    assert(kind == Kind::ContainedAddress);
+    assert(containedAddress.getContainer().isValid() && "address has no container");
+    return containedAddress.getContainer();
   }
   
   void getExplosion(IRGenFunction &IGF, Explosion &ex) const;
@@ -271,7 +296,10 @@ class LoweredValue {
   ~LoweredValue() {
     switch (kind) {
     case Kind::Address:
-      address.~ContainedAddress();
+      address.~StackAddress();
+      break;
+    case Kind::ContainedAddress:
+      containedAddress.~ContainedAddress();
       break;
     case Kind::Explosion:
       explosion.values.~ExplosionVector();
@@ -326,28 +354,6 @@ class IRGenSILFunction :
   unsigned NumAnonVars = 0;
   unsigned NumCondFails = 0;
 
-  /// Notes about instructions for which we're supposed to perform some
-  /// sort of non-standard emission.  This enables some really simply local
-  /// peepholing in cases where you can't just do that with the lowered value.
-  ///
-  /// Since emission notes generally change semantics, we enforce that all
-  /// notes must be claimed.
-  ///
-  /// This uses a set because the current peepholes don't need to record any
-  /// extra structure; if you need extra structure, feel free to make it a
-  /// map.  This set is generally very small because claiming a note removes
-  /// it.
-  llvm::SmallPtrSet<SILInstruction *, 4> EmissionNotes;
-
-  void addEmissionNote(SILInstruction *inst) {
-    assert(inst);
-    EmissionNotes.insert(inst);
-  }
-
-  bool claimEmissionNote(SILInstruction *inst) {
-    return EmissionNotes.erase(inst);
-  }
-
   /// Accumulative amount of allocated bytes on the stack. Used to limit the
   /// size for stack promoted objects.
   /// We calculate it on demand, so that we don't have to do it if the
@@ -386,7 +392,7 @@ class IRGenSILFunction :
     setLoweredValue(v, address);
   }
 
-  void setLoweredContainedAddress(SILValue v, const ContainedAddress &address) {
+  void setLoweredStackAddress(SILValue v, const StackAddress &address) {
     assert(v->getType().isAddress() && "address for non-address value?!");
     setLoweredValue(v, address);
   }
@@ -508,6 +514,11 @@ class IRGenSILFunction :
   Address getLoweredAddress(SILValue v) {
     return getLoweredValue(v).getAddress();
   }
+
+  StackAddress getLoweredStackAddress(SILValue v) {
+    return getLoweredValue(v).getStackAddress();
+  }
+
   Address getLoweredContainerOfAddress(SILValue v) {
     return getLoweredValue(v).getContainerOfAddress();
   }
@@ -952,6 +963,7 @@ llvm::Value *StaticFunction::getExplosionValue(IRGenFunction &IGF) const {
 void LoweredValue::getExplosion(IRGenFunction &IGF, Explosion &ex) const {
   switch (kind) {
   case Kind::Address:
+  case Kind::ContainedAddress:
     llvm_unreachable("not a value");
       
   case Kind::Explosion:
@@ -976,6 +988,7 @@ void LoweredValue::getExplosion(IRGenFunction &IGF, Explosion &ex) const {
 llvm::Value *LoweredValue::getSingletonExplosion(IRGenFunction &IGF) const {
   switch (kind) {
   case Kind::Address:
+  case Kind::ContainedAddress:
     llvm_unreachable("not a value");
 
   case Kind::Explosion:
@@ -1456,8 +1469,6 @@ void IRGenSILFunction::emitSILFunction() {
     if (!visitedBlocks.count(&bb))
       LoweredBBs[&bb].bb->eraseFromParent();
 
-  assert(EmissionNotes.empty() &&
-         "didn't claim emission notes for all instructions!");
 }
 
 void IRGenSILFunction::estimateStackSize() {
@@ -1560,8 +1571,6 @@ void IRGenSILFunction::visitSILBasicBlock(SILBasicBlock *BB) {
     }
     visit(&I);
 
-    assert(!EmissionNotes.count(&I) &&
-           "didn't claim emission note for instruction!");
   }
   
   assert(Builder.hasPostTerminatorIP() && "SIL bb did not terminate block?!");
@@ -1943,6 +1952,7 @@ static CallEmission getCallEmissionForLoweredValue(IRGenSILFunction &IGF,
       
   case LoweredValue::Kind::BoxWithAddress:
     llvm_unreachable("@box isn't a valid callee");
+  case LoweredValue::Kind::ContainedAddress:
   case LoweredValue::Kind::Address:
     llvm_unreachable("sil address isn't a valid callee");
   }
@@ -2095,6 +2105,7 @@ getPartialApplicationFunction(IRGenSILFunction &IGF, SILValue v,
   auto fnType = v->getType().castTo<SILFunctionType>();
 
   switch (lv.kind) {
+  case LoweredValue::Kind::ContainedAddress:
   case LoweredValue::Kind::Address:
     llvm_unreachable("can't partially apply an address");
   case LoweredValue::Kind::BoxWithAddress:
@@ -3517,19 +3528,15 @@ void IRGenSILFunction::visitAllocStackInst(swift::AllocStackInst *i) {
 # endif
 
   (void) Decl;
-  // If a dynamic alloc_stack is immediately initialized by a copy_addr
-  // operation, we can combine the allocation and initialization using an
-  // optimized value witness.
-  if (tryDeferFixedSizeBufferInitialization(*this, i, type, Address(), dbgname))
-    return;
 
-  auto addr = type.allocateStack(*this,
-                                 i->getElementType(),
-                                 dbgname);
+  bool isEntryBlock =
+      i->getParentBlock() == i->getFunction()->entryBB();
+  auto addr =
+      type.allocateStack(*this, i->getElementType(), isEntryBlock, dbgname);
 
   emitDebugInfoForAllocStack(i, type, addr.getAddress().getAddress());
   
-  setLoweredContainedAddress(i, addr);
+  setLoweredStackAddress(i, addr);
 }
 
 static void
@@ -3584,15 +3591,9 @@ void IRGenSILFunction::visitAllocRefDynamicInst(swift::AllocRefDynamicInst *i) {
 void IRGenSILFunction::visitDeallocStackInst(swift::DeallocStackInst *i) {
   auto allocatedType = i->getOperand()->getType();
   const TypeInfo &allocatedTI = getTypeInfo(allocatedType);
-  Address container = getLoweredContainerOfAddress(i->getOperand());
+  StackAddress stackAddr = getLoweredStackAddress(i->getOperand());
 
-  // If the type isn't fixed-size, check whether we added an emission note.
-  // If so, we should deallocate and destroy at the same time.
-  if (!isa<FixedTypeInfo>(allocatedTI) && claimEmissionNote(i)) {
-    allocatedTI.destroyStack(*this, container, allocatedType);
-  } else {
-    allocatedTI.deallocateStack(*this, container, allocatedType);
-  }
+  allocatedTI.deallocateStack(*this, stackAddr, allocatedType);
 }
 
 void IRGenSILFunction::visitDeallocRefInst(swift::DeallocRefInst *i) {
@@ -4600,48 +4601,10 @@ void IRGenSILFunction::visitCopyAddrInst(swift::CopyAddrInst *i) {
 // does not produce any values.
 void IRGenSILFunction::visitBindMemoryInst(swift::BindMemoryInst *) {}
 
-static DeallocStackInst *
-findPairedDeallocStackForDestroyAddr(DestroyAddrInst *destroyAddr) {
-  // This peephole only applies if the address being destroyed is the
-  // result of an alloc_stack.
-  auto allocStack = dyn_cast<AllocStackInst>(destroyAddr->getOperand());
-  if (!allocStack) return nullptr;
-
-  for (auto inst = &*std::next(destroyAddr->getIterator()); !isa<TermInst>(inst);
-       inst = &*std::next(inst->getIterator())) {
-    // If we find a dealloc_stack of the right memory, great.
-    if (auto deallocStack = dyn_cast<DeallocStackInst>(inst))
-      if (deallocStack->getOperand() == allocStack)
-        return deallocStack;
-
-    // Otherwise, if the instruction uses the alloc_stack result, treat it
-    // as interfering.  This assumes that any re-initialization of
-    // the alloc_stack will be obvious in the function.
-    for (auto &operand : inst->getAllOperands())
-      if (operand.get() == allocStack)
-        return nullptr;
-  }
-
-  // If we ran into the terminator, stop; only apply this peephole locally.
-  // TODO: this could use a fancier dominance analysis, maybe.
-  return nullptr;
-}
-
 void IRGenSILFunction::visitDestroyAddrInst(swift::DestroyAddrInst *i) {
   SILType addrTy = i->getOperand()->getType();
   const TypeInfo &addrTI = getTypeInfo(addrTy);
 
-  // Try to fold a destroy_addr of a dynamic alloc_stack into a single
-  // destroyBuffer operation.
-  if (!isa<FixedTypeInfo>(addrTI)) {
-    // If we can find a matching dealloc stack, just set an emission note
-    // on it; that will cause it to destroy the current value.
-    if (auto deallocStack = findPairedDeallocStackForDestroyAddr(i)) {
-      addEmissionNote(deallocStack);
-      return;
-    }
-  }
-
   // Otherwise, do the normal thing.
   Address base = getLoweredAddress(i->getOperand());
   addrTI.destroy(*this, base, addrTy);
diff --git a/lib/IRGen/NonFixedTypeInfo.h b/lib/IRGen/NonFixedTypeInfo.h
index c86a30c943e2d..e6a38472e0680 100644
--- a/lib/IRGen/NonFixedTypeInfo.h
+++ b/lib/IRGen/NonFixedTypeInfo.h
@@ -57,28 +57,30 @@ class WitnessSizedTypeInfo : public IndirectTypeInfo<Impl, TypeInfo> {
   // This is useful for metaprogramming.
   static bool isFixed() { return false; }
 
-  ContainedAddress allocateStack(IRGenFunction &IGF,
+  StackAddress allocateStack(IRGenFunction &IGF,
                                  SILType T,
+                                 bool isInEntryBlock,
                                  const llvm::Twine &name) const override {
-    // Make a fixed-size buffer.
-    Address buffer = IGF.createFixedSizeBufferAlloca(name);
-    IGF.Builder.CreateLifetimeStart(buffer, getFixedBufferSize(IGF.IGM));
-
-    // Allocate an object of the appropriate type within it.
-    llvm::Value *address = emitAllocateBufferCall(IGF, T, buffer);
-    return { buffer, getAsBitCastAddress(IGF, address) };
+    // Allocate memory on the stack.
+    auto alloca = emitDynamicAlloca(IGF, T, isInEntryBlock);
+    assert((isInEntryBlock && alloca.SavedSP == nullptr) ||
+           (!isInEntryBlock && alloca.SavedSP != nullptr) &&
+               "stacksave/restore operations can only be skipped in the entry "
+               "block");
+    IGF.Builder.CreateLifetimeStart(alloca.Alloca);
+    return { getAsBitCastAddress(IGF, alloca.Alloca), alloca.SavedSP };
   }
 
-  void deallocateStack(IRGenFunction &IGF, Address buffer,
+  void deallocateStack(IRGenFunction &IGF, StackAddress stackAddress,
                        SILType T) const override {
-    emitDeallocateBufferCall(IGF, T, buffer);
-    IGF.Builder.CreateLifetimeEnd(buffer, getFixedBufferSize(IGF.IGM));
+    IGF.Builder.CreateLifetimeEnd(stackAddress.getAddress().getAddress());
+    emitDeallocateDynamicAlloca(IGF, stackAddress);
   }
 
-  void destroyStack(IRGenFunction &IGF, Address buffer,
+  void destroyStack(IRGenFunction &IGF, StackAddress stackAddress,
                     SILType T) const override {
-    emitDestroyBufferCall(IGF, T, buffer);
-    IGF.Builder.CreateLifetimeEnd(buffer, getFixedBufferSize(IGF.IGM));
+    emitDestroyCall(IGF, T, stackAddress.getAddress());
+    deallocateStack(IGF, stackAddress, T);
   }
 
   llvm::Value *getValueWitnessTable(IRGenFunction &IGF, SILType T) const {
diff --git a/lib/IRGen/TypeInfo.h b/lib/IRGen/TypeInfo.h
index b498cfa186c36..f23a8f9c3225b 100644
--- a/lib/IRGen/TypeInfo.h
+++ b/lib/IRGen/TypeInfo.h
@@ -39,7 +39,7 @@ namespace swift {
 
 namespace irgen {
   class Address;
-  class ContainedAddress;
+  class StackAddress;
   class IRGenFunction;
   class IRGenModule;
   class Explosion;
@@ -257,17 +257,17 @@ class TypeInfo {
   ExplosionSchema getSchema() const;
 
   /// Allocate a variable of this type on the stack.
-  virtual ContainedAddress allocateStack(IRGenFunction &IGF,
-                                         SILType T,
-                                         const llvm::Twine &name) const = 0;
+  virtual StackAddress allocateStack(IRGenFunction &IGF, SILType T,
+                                     bool isInEntryBlock,
+                                     const llvm::Twine &name) const = 0;
 
   /// Deallocate a variable of this type.
-  virtual void deallocateStack(IRGenFunction &IGF, Address addr,
+  virtual void deallocateStack(IRGenFunction &IGF, StackAddress addr,
                                SILType T) const = 0;
 
   /// Destroy the value of a variable of this type, then deallocate its
   /// memory.
-  virtual void destroyStack(IRGenFunction &IGF, Address addr,
+  virtual void destroyStack(IRGenFunction &IGF, StackAddress addr,
                             SILType T) const = 0;
 
   /// Copy or take a value out of one address and into another, destroying
diff --git a/test/IRGen/enum_resilience.swift b/test/IRGen/enum_resilience.swift
index 7a9e0b36b920f..0af9ce8156098 100644
--- a/test/IRGen/enum_resilience.swift
+++ b/test/IRGen/enum_resilience.swift
@@ -139,22 +139,26 @@ public func constructResilientEnumPayload(_ s: Size) -> Medium {
 }
 
 // CHECK-LABEL: define{{( protected)?}} {{i32|i64}} @_TF15enum_resilience19resilientSwitchTestFO14resilient_enum6MediumSi(%swift.opaque* noalias nocapture)
-// CHECK: [[BUFFER:%.*]] = alloca [[BUFFER_TYPE:\[(12|24) x i8\]]]
-
 // CHECK: [[METADATA:%.*]] = call %swift.type* @_TMaO14resilient_enum6Medium()
 // CHECK: [[METADATA_ADDR:%.*]] = bitcast %swift.type* [[METADATA]] to i8***
 // CHECK: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[METADATA_ADDR]], [[INT]] -1
 // CHECK: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
 
-// CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 5
+// CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+// CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]]
+// CHECK: [[WITNESS_FOR_SIZE:%.*]] = ptrtoint i8* [[WITNESS]]
+// CHECK: [[ALLOCA:%.*]] = alloca i8, {{.*}} [[WITNESS_FOR_SIZE]], align 16
+// CHECK: [[ENUM_STORAGE:%.*]] = bitcast i8* [[ALLOCA]] to %swift.opaque*
+
+// CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
 // CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]]
 // CHECK: [[WITNESS_FN:%.*]] = bitcast i8* [[WITNESS]]
-// CHECK: [[ENUM_COPY:%.*]] = call %swift.opaque* [[WITNESS_FN]]([[BUFFER_TYPE]]* [[BUFFER]], %swift.opaque* %0, %swift.type* [[METADATA]])
+// CHECK: [[ENUM_COPY:%.*]] = call %swift.opaque* [[WITNESS_FN]](%swift.opaque* [[ENUM_STORAGE]], %swift.opaque* %0, %swift.type* [[METADATA]])
 
 // CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 23
 // CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]]
 // CHECK: [[WITNESS_FN:%.*]] = bitcast i8* [[WITNESS]]
-// CHECK: [[TAG:%.*]] = call i32 %getEnumTag(%swift.opaque* [[ENUM_COPY]], %swift.type* [[METADATA]])
+// CHECK: [[TAG:%.*]] = call i32 %getEnumTag(%swift.opaque* [[ENUM_STORAGE]], %swift.type* [[METADATA]])
 
 // CHECK: switch i32 [[TAG]], label %[[DEFAULT_CASE:.*]] [
 // CHECK:   i32 -1, label %[[PAMPHLET_CASE:.*]]
diff --git a/test/IRGen/fixed_size_buffer_peepholes.sil b/test/IRGen/fixed_size_buffer_peepholes.sil
index 837592e034e43..56d41e5264c65 100644
--- a/test/IRGen/fixed_size_buffer_peepholes.sil
+++ b/test/IRGen/fixed_size_buffer_peepholes.sil
@@ -2,86 +2,14 @@
 
 import Builtin
 
-sil @consume : $@convention(thin) <T> (@in T) -> ()
-sil @produce : $@convention(thin) <T> () -> @out T
-
-// CHECK-LABEL: define{{( protected)?}} void @join_alloc_stack_copy_addr
-sil @join_alloc_stack_copy_addr : $@convention(thin) <T> (@in T) -> () {
-entry(%x : $*T):
-  // CHECK: [[BUFFER:%.*]] = alloca [[BUFFER_TYPE:\[.* x i8\]]]
-  // CHECK: [[BUFFERLIFE:%.*]] = bitcast [[BUFFER_TYPE]]* [[BUFFER]]
-  // CHECK: llvm.lifetime.start(i64 [[BUFFER_SIZE:12|24]], i8* [[BUFFERLIFE]])
-  %a = alloc_stack $T
-  // CHECK: [[ADDR:%.*]] = call %swift.opaque* %initializeBufferWithCopy([[BUFFER_TYPE]]* [[BUFFER]], %swift.opaque* %0, %swift.type* %T)
-  copy_addr %x to [initialization] %a : $*T
-  // CHECK: call void @consume(%swift.opaque* noalias nocapture [[ADDR]], %swift.type* %T)
-  %u = function_ref @consume : $@convention(thin) <T> (@in T) -> ()
-  %z = apply %u<T>(%a) : $@convention(thin) <T> (@in T) -> ()
-  // CHECK: [[BUFFERLIFE:%.*]] = bitcast [[BUFFER_TYPE]]* [[BUFFER]]
-  // CHECK: llvm.lifetime.end(i64 [[BUFFER_SIZE]], i8* [[BUFFERLIFE]])
-  dealloc_stack %a : $*T
-  return undef : $()
-}
-
 protocol P {}
 
 // CHECK-LABEL: define{{( protected)?}} void @join_init_existential_copy_addr(%P27fixed_size_buffer_peepholes1P_* noalias nocapture sret, %swift.opaque* noalias nocapture, %swift.type* %T, i8** %T.P)
 // CHECK:         [[BUFFER:%.*]] = getelementptr inbounds %P27fixed_size_buffer_peepholes1P_, %P27fixed_size_buffer_peepholes1P_* %0, i32 0, i32 0
-// CHECK:         call %swift.opaque* %initializeBufferWithTake([[BUFFER_TYPE]]* [[BUFFER]], %swift.opaque* %1
+// CHECK:         call %swift.opaque* %initializeBufferWithTake([[BUFFER_TYPE:\[.* x i8\]]]* [[BUFFER]], %swift.opaque* %1
 sil @join_init_existential_copy_addr : $@convention(thin) <T: P> (@in T) -> @out P {
 entry(%p : $*P, %x: $*T):
   %y = init_existential_addr %p : $*P, $T
   copy_addr [take] %x to [initialization] %y : $*T
   return undef : $()
 }
-
-// CHECK-LABEL: define{{( protected)?}} void @dont_join_alloc_stack_copy_addr_if_intervening_use
-sil @dont_join_alloc_stack_copy_addr_if_intervening_use : $@convention(thin) <T> (@in T) -> () {
-entry(%x : $*T):
-  // CHECK: [[BUFFER:%.*]] = alloca [[BUFFER_TYPE:\[.* x i8\]]]
-  // CHECK: [[ADDR:%.*]] = call %swift.opaque* %allocateBuffer([[BUFFER_TYPE]]* [[BUFFER]], %swift.type* %T)
-  %a = alloc_stack $T
-  %p = function_ref @produce : $@convention(thin) <T> () -> @out T
-  %y = apply %p<T>(%a) : $@convention(thin) <T> () -> @out T
-  destroy_addr %a : $*T
-
-  // CHECK: call %swift.opaque* %initializeWithCopy(%swift.opaque* [[ADDR]],
-  copy_addr %x to [initialization] %a : $*T
-  %u = function_ref @consume : $@convention(thin) <T> (@in T) -> ()
-  %z = apply %u<T>(%a) : $@convention(thin) <T> (@in T) -> ()
-  dealloc_stack %a : $*T
-  return undef : $()
-}
-
-// CHECK-LABEL: define{{( protected)?}} void @dont_join_alloc_stack_copy_addr_if_no_copy_addr
-sil @dont_join_alloc_stack_copy_addr_if_no_copy_addr : $@convention(thin) <T> (@in T) -> () {
-entry(%x : $*T):
-  // CHECK: [[BUFFER:%.*]] = alloca [[BUFFER_TYPE:\[.* x i8\]]]
-  // CHECK: [[ADDR:%.*]] = call %swift.opaque* %allocateBuffer([[BUFFER_TYPE]]* [[BUFFER]], %swift.type* %T)
-  %a = alloc_stack $T
-  %p = function_ref @produce : $@convention(thin) <T> () -> @out T
-  %y = apply %p<T>(%a) : $@convention(thin) <T> () -> @out T
-  destroy_addr %a : $*T
-  dealloc_stack %a : $*T
-  return undef : $()
-}
-
-// CHECK-LABEL: define{{( protected)?}} void @dont_join_alloc_stack_if_copy_addr_in_different_bb
-sil @dont_join_alloc_stack_if_copy_addr_in_different_bb : $@convention(thin) <T> (@in T) -> () {
-entry(%x : $*T):
-  // CHECK: [[BUFFER:%.*]] = alloca [[BUFFER_TYPE:\[.* x i8\]]]
-  // CHECK: [[ADDR:%.*]] = call %swift.opaque* %allocateBuffer([[BUFFER_TYPE]]* [[BUFFER]], %swift.type* %T)
-  %a = alloc_stack $T
-  br next
-
-next:
-  // CHECK: call %swift.opaque* %initializeWithCopy(%swift.opaque* [[ADDR]],
-  copy_addr %x to [initialization] %a : $*T
-  // CHECK: call void @consume(%swift.opaque* noalias nocapture [[ADDR]], %swift.type* %T)
-  %u = function_ref @consume : $@convention(thin) <T> (@in T) -> ()
-  %z = apply %u<T>(%a) : $@convention(thin) <T> (@in T) -> ()
-  dealloc_stack %a : $*T
-  return undef : $()
-}
-
-
diff --git a/test/IRGen/generic_casts.swift b/test/IRGen/generic_casts.swift
index f3203d4c3b15e..2d36ff80a8389 100644
--- a/test/IRGen/generic_casts.swift
+++ b/test/IRGen/generic_casts.swift
@@ -34,11 +34,18 @@ import gizmo
 // CHECK: define hidden i64 @_TF13generic_casts8allToInt{{.*}}(%swift.opaque* noalias nocapture, %swift.type* %T)
 func allToInt<T>(_ x: T) -> Int {
   return x as! Int
-  // CHECK: [[BUF:%.*]] = alloca [[BUFFER:.24 x i8.]],
   // CHECK: [[INT_TEMP:%.*]] = alloca %Si,
-  // CHECK: [[TEMP:%.*]] = call %swift.opaque* {{.*}}([[BUFFER]]* [[BUF]], %swift.opaque* %0, %swift.type* %T)
+	// CHECK: [[TYPE_ADDR:%.*]] = bitcast %swift.type* %T to i8***
+  // CHECK: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[TYPE_ADDR]], i64 -1
+  // CHECK: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
+  // CHECK: [[SIZE_WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+  // CHECK: [[SIZE_WITNESS:%.*]] = load i8*, i8** [[SIZE_WITNESS_ADDR]]
+  // CHECK: [[SIZE:%.*]] = ptrtoint i8* [[SIZE_WITNESS]]
+  // CHECK: [[T_ALLOCA:%.*]] = alloca i8, {{.*}} [[SIZE]], align 16
+  // CHECK: [[T_TMP:%.*]] = bitcast i8* [[T_ALLOCA]] to %swift.opaque*
+  // CHECK: [[TEMP:%.*]] = call %swift.opaque* {{.*}}(%swift.opaque* [[T_TMP]], %swift.opaque* %0, %swift.type* %T)
   // CHECK: [[T0:%.*]] = bitcast %Si* [[INT_TEMP]] to %swift.opaque*
-  // CHECK: call i1 @swift_rt_swift_dynamicCast(%swift.opaque* [[T0]], %swift.opaque* [[TEMP]], %swift.type* %T, %swift.type* @_TMSi, i64 7)
+  // CHECK: call i1 @swift_rt_swift_dynamicCast(%swift.opaque* [[T0]], %swift.opaque* [[T_TMP]], %swift.type* %T, %swift.type* @_TMSi, i64 7)
   // CHECK: [[T0:%.*]] = getelementptr inbounds %Si, %Si* [[INT_TEMP]], i32 0, i32 0
   // CHECK: [[INT_RESULT:%.*]] = load i64, i64* [[T0]],
   // CHECK: ret i64 [[INT_RESULT]]
diff --git a/test/IRGen/generic_tuples.swift b/test/IRGen/generic_tuples.swift
index 829ad7b958c59..f5d80fa91cc23 100644
--- a/test/IRGen/generic_tuples.swift
+++ b/test/IRGen/generic_tuples.swift
@@ -14,26 +14,25 @@ func dup<T>(_ x: T) -> (T, T) { var x = x; return (x,x) }
 // CHECK:    define hidden void @_TF14generic_tuples3dup{{.*}}(%swift.opaque* noalias nocapture, %swift.opaque* noalias nocapture, %swift.opaque* noalias nocapture, %swift.type* %T)
 // CHECK:    entry:
 //   Allocate a local variable for 'x'.
-// CHECK-NEXT: [[XBUF:%.*]] = alloca [[BUFFER:.*]], align 8
-// CHECK-NEXT: [[XBUFLIFE:%.*]] = bitcast {{.*}} [[XBUF]]
-// CHECK-NEXT: call void @llvm.lifetime.start({{.*}} [[XBUFLIFE]])
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[TYPE]]* %T to i8***
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8**, i8*** [[T0]], i64 -1
-// CHECK-NEXT: [[T_VALUE:%.*]] = load i8**, i8*** [[T1]], align 8
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8*, i8** [[T_VALUE]]
-// CHECK-NEXT: [[T1:%.*]] = load i8*, i8** [[T0]], align 8
-// CHECK-NEXT: [[INITIALIZE_BUFFER_FN:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]* ([[BUFFER]]*, [[OPAQUE]]*, [[TYPE]]*)*
-// CHECK-NEXT: [[X:%.*]] = call [[OPAQUE]]* [[INITIALIZE_BUFFER_FN]]([[BUFFER]]* [[XBUF]], [[OPAQUE]]* {{.*}}, [[TYPE]]* %T)
+// CHECK: [[TYPE_ADDR:%.*]] = bitcast %swift.type* %T to i8***
+// CHECK: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[TYPE_ADDR]], i64 -1
+// CHECK: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
+// CHECK: [[SIZE_WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+// CHECK: [[SIZE_WITNESS:%.*]] = load i8*, i8** [[SIZE_WITNESS_ADDR]]
+// CHECK: [[SIZE:%.*]] = ptrtoint i8* [[SIZE_WITNESS]]
+// CHECK: [[X_ALLOCA:%.*]] = alloca i8, {{.*}} [[SIZE]], align 16
+// CHECK: [[X_TMP:%.*]] = bitcast i8* [[X_ALLOCA]] to %swift.opaque*
+// CHECK-NEXT: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
+// CHECK-NEXT: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]], align 8
+// CHECK-NEXT: [[INITIALIZE_WITH_COPY:%.*]] = bitcast i8* [[WITNESS]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK-NEXT: [[X:%.*]] = call [[OPAQUE]]* [[INITIALIZE_WITH_COPY]]([[OPAQUE]]* [[X_TMP]], [[OPAQUE]]* {{.*}}, [[TYPE]]* %T)
 //   Copy 'x' into the first result.
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8*, i8** [[T_VALUE]], i32 6
-// CHECK-NEXT: [[T1:%.*]] = load i8*, i8** [[T0]], align 8
-// CHECK-NEXT: [[COPY_FN:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
-// CHECK-NEXT: call [[OPAQUE]]* [[COPY_FN]]([[OPAQUE]]* %0, [[OPAQUE]]* [[X]], [[TYPE]]* %T)
+// CHECK-NEXT: call [[OPAQUE]]* [[INITIALIZE_WITH_COPY]]([[OPAQUE]]* %0, [[OPAQUE]]* [[X_TMP]], [[TYPE]]* %T)
 //   Copy 'x' into the second element.
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8*, i8** [[T_VALUE]], i32 9
-// CHECK-NEXT: [[T1:%.*]] = load i8*, i8** [[T0]], align 8
-// CHECK-NEXT: [[TAKE_FN:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
-// CHECK-NEXT: call [[OPAQUE]]* [[TAKE_FN]]([[OPAQUE]]* %1, [[OPAQUE]]* [[X]], [[TYPE]]* %T)
+// CHECK-NEXT: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 9
+// CHECK-NEXT: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]], align 8
+// CHECK-NEXT: [[TAKE_FN:%.*]] = bitcast i8* [[WITNESS]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK-NEXT: call [[OPAQUE]]* [[TAKE_FN]]([[OPAQUE]]* %1, [[OPAQUE]]* [[X_TMP]], [[TYPE]]* %T)
 
 struct S {}
 
diff --git a/test/IRGen/lifetime.sil b/test/IRGen/lifetime.sil
index 9f44d9be3772f..7084b72837c76 100644
--- a/test/IRGen/lifetime.sil
+++ b/test/IRGen/lifetime.sil
@@ -18,29 +18,29 @@ bb0(%x : $*T):
   return %0 : $()
 }
 // CHECK:    define{{( protected)?}} void @generic([[OPAQUE]]* noalias nocapture, [[TYPE]]* %T) {{.*}} {
-//   The fixed-size buffer.
-// CHECK:      [[YBUF:%.*]] = alloca [[BUFFER:.*]], align
-// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[BUFFER]]* [[YBUF]] to i8*
-// CHECK-NEXT: call void @llvm.lifetime.start(i64 [[BUFFER_SIZE:12|24]], i8* [[YBUFLIFE]])
 //   Allocate it.
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[TYPE]]* %T to i8***
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8**, i8*** [[T0]], {{i32|i64}} -1
-// CHECK-NEXT: [[VWTABLE:%.*]] = load i8**, i8*** [[T1]], align
-// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWTABLE]], i32 5
-// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
-// CHECK-NEXT: [[BUFFER_COPY_FN:%.*]] = bitcast i8* [[T4]] to [[OPAQUE]]* ([[BUFFER]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK: [[TYPE_ADDR:%.*]] = bitcast %swift.type* %T to i8***
+// CHECK-NEXT: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[TYPE_ADDR]], {{(i32|i64)}} -1
+// CHECK-NEXT: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
+// CHECK-NEXT: [[SIZE_WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+// CHECK-NEXT: [[SIZE_WITNESS:%.*]] = load i8*, i8** [[SIZE_WITNESS_ADDR]]
+// CHECK-NEXT: [[SIZE:%.*]] = ptrtoint i8* [[SIZE_WITNESS]]
+// CHECK-NEXT: [[Y_ALLOCA:%.*]] = alloca i8, {{.*}} [[SIZE]], align 16
+// CHECK-NEXT: call void @llvm.lifetime.start({{(i32|i64)}} -1, i8* [[Y_ALLOCA]])
+// CHECK-NEXT: [[Y_TMP:%.*]] = bitcast i8* [[Y_ALLOCA]] to %swift.opaque*
 //   Copy 'x' into 'y'.
-// CHECK-NEXT: [[Y:%.*]] = call [[OPAQUE]]* [[BUFFER_COPY_FN]]([[BUFFER]]* [[YBUF]], [[OPAQUE]]* [[X:%.*]], [[TYPE]]* %T)
-//   Destroy and deallocate 'y'.
-// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[VWTABLE]], align
-// CHECK-NEXT: [[DESTROY_BUFFER_FN:%.*]] = bitcast i8* [[T4]] to void ([[BUFFER]]*, [[TYPE]]*)*
-// CHECK-NEXT: call void [[DESTROY_BUFFER_FN]]([[BUFFER]]* [[YBUF]], [[TYPE]]* %T)
-// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[BUFFER]]* [[YBUF]] to i8*
-// CHECK-NEXT: call void @llvm.lifetime.end(i64 [[BUFFER_SIZE]], i8* [[YBUFLIFE]])
-//   Destroy 'x'.
-// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWTABLE]], i32 4
+// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
+// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
+// CHECK-NEXT: [[INIT_WITH_COPY_FN:%.*]] = bitcast i8* [[T4]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK-NEXT: [[Y:%.*]] = call [[OPAQUE]]* [[INIT_WITH_COPY_FN]]([[OPAQUE]]* [[Y_TMP]], [[OPAQUE]]* [[X:%.*]], [[TYPE]]* %T)
+//   Destroy 'y'.
+// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 4
 // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
 // CHECK-NEXT: [[DESTROY_FN:%.*]] = bitcast i8* [[T4]] to void ([[OPAQUE]]*, [[TYPE]]*)*
+// CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[Y_TMP]], [[TYPE]]* %T)
+// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[OPAQUE]]* [[Y_TMP]] to i8*
+// CHECK-NEXT: call void @llvm.lifetime.end({{(i32|i64)}} -1, i8* [[YBUFLIFE]])
+//   Destroy 'x'.
 // CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[X]], [[TYPE]]* %T)
 //   Return.
 // CHECK-NEXT: ret void
@@ -57,35 +57,35 @@ bb0(%x : $*T):
   return %0 : $()
 }
 // CHECK:    define{{( protected)?}} void @generic_with_reuse([[OPAQUE]]* noalias nocapture, [[TYPE]]* %T) {{.*}} {
-//   The fixed-size buffer.
-// CHECK:      [[YBUF:%.*]] = alloca [[BUFFER:.*]], align
-// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[BUFFER]]* [[YBUF]] to i8*
-// CHECK-NEXT: call void @llvm.lifetime.start(i64 [[BUFFER_SIZE:12|24]], i8* [[YBUFLIFE]])
 //   Allocate it.
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[TYPE]]* %T to i8***
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8**, i8*** [[T0]], {{i32|i64}} -1
-// CHECK-NEXT: [[VWTABLE:%.*]] = load i8**, i8*** [[T1]], align
-// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWTABLE]], i32 5
-// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
-// CHECK-NEXT: [[BUFFER_COPY_FN:%.*]] = bitcast i8* [[T4]] to [[OPAQUE]]* ([[BUFFER]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK: [[TYPE_ADDR:%.*]] = bitcast %swift.type* %T to i8***
+// CHECK-NEXT: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[TYPE_ADDR]], {{(i32|i64)}} -1
+// CHECK-NEXT: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
+// CHECK-NEXT: [[SIZE_WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+// CHECK-NEXT: [[SIZE_WITNESS:%.*]] = load i8*, i8** [[SIZE_WITNESS_ADDR]]
+// CHECK-NEXT: [[SIZE:%.*]] = ptrtoint i8* [[SIZE_WITNESS]]
+// CHECK-NEXT: [[Y_ALLOCA:%.*]] = alloca i8, {{.*}} [[SIZE]], align 16
+// CHECK-NEXT: call void @llvm.lifetime.start({{(i32|i64)}} -1, i8* [[Y_ALLOCA]])
+// CHECK-NEXT: [[Y_TMP:%.*]] = bitcast i8* [[Y_ALLOCA]] to %swift.opaque*
 //   Copy 'x' into 'y'.
-// CHECK-NEXT: [[Y:%.*]] = call [[OPAQUE]]* [[BUFFER_COPY_FN]]([[BUFFER]]* [[YBUF]], [[OPAQUE]]* [[X:%.*]], [[TYPE]]* %T)
+// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
+// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
+// CHECK-NEXT: [[INIT_WITH_COPY_FN:%.*]] = bitcast i8* [[T4]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
+// CHECK-NEXT: [[Y:%.*]] = call [[OPAQUE]]* [[INIT_WITH_COPY_FN]]([[OPAQUE]]* [[Y_TMP]], [[OPAQUE]]* [[X:%.*]], [[TYPE]]* %T)
 //   Destroy 'y'.
-// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWTABLE]], i32 4
+// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 4
 // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
 // CHECK-NEXT: [[DESTROY_FN:%.*]] = bitcast i8* [[T4]] to void ([[OPAQUE]]*, [[TYPE]]*)*
-// CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[Y]], [[TYPE]]* %T)
+// CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[Y_TMP]], [[TYPE]]* %T)
 //   Copy 'x' into 'y' again, this time as a take.
-// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWTABLE]], i32 9
+// CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 9
 // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
 // CHECK-NEXT: [[TAKE_FN:%.*]] = bitcast i8* [[T4]] to [[OPAQUE]]* ([[OPAQUE]]*, [[OPAQUE]]*, [[TYPE]]*)*
-// CHECK-NEXT: call [[OPAQUE]]* [[TAKE_FN]]([[OPAQUE]]* [[Y]], [[OPAQUE]]* [[X]], [[TYPE]]* %T)
-//   Destroy and deallocate 'y'.
-// CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[VWTABLE]], align
-// CHECK-NEXT: [[DESTROY_BUFFER_FN:%.*]] = bitcast i8* [[T4]] to void ([[BUFFER]]*, [[TYPE]]*)*
-// CHECK-NEXT: call void [[DESTROY_BUFFER_FN]]([[BUFFER]]* [[YBUF]], [[TYPE]]* %T)
-// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[BUFFER]]* [[YBUF]] to i8*
-// CHECK-NEXT: call void @llvm.lifetime.end(i64 [[BUFFER_SIZE]], i8* [[YBUFLIFE]])
+// CHECK-NEXT: call [[OPAQUE]]* [[TAKE_FN]]([[OPAQUE]]* [[Y_TMP]], [[OPAQUE]]* [[X]], [[TYPE]]* %T)
+//   Destroy 'y'.
+// CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[Y_TMP]], [[TYPE]]* %T)
+// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[OPAQUE]]* [[Y_TMP]] to i8*
+// CHECK-NEXT: call void @llvm.lifetime.end({{(i32|i64)}} -1, i8* [[YBUFLIFE]])
 //   Return.
 // CHECK-NEXT: ret void
 
@@ -102,9 +102,9 @@ bb0(%x : $*Builtin.Int64):
 // CHECK-LABEL: define{{( protected)?}} void @fixed_size(i64* noalias nocapture dereferenceable(8))
 // CHECK:         [[XBUF:%.*]] = alloca i64
 // CHECK-NEXT:    [[XBUFLIFE:%.*]] = bitcast i64* [[XBUF]] to i8*
-// CHECK-NEXT:    call void @llvm.lifetime.start(i64 8, i8* [[XBUFLIFE]])
+// CHECK-NEXT:    call void @llvm.lifetime.start({{(i32|i64)}} 8, i8* [[XBUFLIFE]])
 // CHECK-NEXT:    load
 // CHECK-NEXT:    store
 // CHECK-NEXT:    [[XBUFLIFE:%.*]] = bitcast i64* [[XBUF]] to i8*
-// CHECK-NEXT:    call void @llvm.lifetime.end(i64 8, i8* [[XBUFLIFE]])
+// CHECK-NEXT:    call void @llvm.lifetime.end({{(i32|i64)}} 8, i8* [[XBUFLIFE]])
 
diff --git a/test/IRGen/struct_resilience.swift b/test/IRGen/struct_resilience.swift
index 9ef001e95236c..81bab4a877bec 100644
--- a/test/IRGen/struct_resilience.swift
+++ b/test/IRGen/struct_resilience.swift
@@ -15,25 +15,24 @@ import resilient_enum
 
 public func functionWithResilientTypes(_ s: Size, f: (Size) -> Size) -> Size {
 
-// CHECK: [[RESULT:%.*]] = alloca [[BUFFER_TYPE:\[.* x i8\]]]
-
 // CHECK: [[METADATA:%.*]] = call %swift.type* @_TMaV16resilient_struct4Size()
 // CHECK: [[METADATA_ADDR:%.*]] = bitcast %swift.type* [[METADATA]] to i8***
 // CHECK: [[VWT_ADDR:%.*]] = getelementptr inbounds i8**, i8*** [[METADATA_ADDR]], [[INT]] -1
 // CHECK: [[VWT:%.*]] = load i8**, i8*** [[VWT_ADDR]]
 
-// CHECK: [[WITNESS_PTR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 5
+// CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 17
+// CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]]
+// CHECK: [[WITNESS_FOR_SIZE:%.*]] = ptrtoint i8* [[WITNESS]]
+// CHECK: [[ALLOCA:%.*]] = alloca i8, {{.*}} [[WITNESS_FOR_SIZE]], align 16
+// CHECK: [[STRUCT_ADDR:%.*]] = bitcast i8* [[ALLOCA]] to %swift.opaque*
+
+// CHECK: [[WITNESS_PTR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
 // CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_PTR]]
-// CHECK: [[initializeBufferWithCopy:%.*]] = bitcast i8* [[WITNESS]]
-// CHECK: [[BUFFER:%.*]] = call %swift.opaque* [[initializeBufferWithCopy]]([[BUFFER_TYPE]]* [[RESULT]], %swift.opaque* %1, %swift.type* [[METADATA]])
+// CHECK: [[initializeWithCopy:%.*]] = bitcast i8* [[WITNESS]]
+// CHECK: [[STRUCT_LOC:%.*]] = call %swift.opaque* [[initializeWithCopy]](%swift.opaque* [[STRUCT_ADDR]], %swift.opaque* %1, %swift.type* [[METADATA]])
 
 // CHECK: [[FN:%.*]] = bitcast i8* %2 to void (%swift.opaque*, %swift.opaque*, %swift.refcounted*)*
-// CHECK: call void [[FN]](%swift.opaque* noalias nocapture sret %0, %swift.opaque* noalias nocapture [[BUFFER]], %swift.refcounted* %3)
-
-// CHECK: [[WITNESS_PTR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 3
-// CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_PTR]]
-// CHECK: [[deallocateBuffer:%.*]] = bitcast i8* [[WITNESS]]
-// CHECK: call void [[deallocateBuffer]]([[BUFFER_TYPE]]* [[RESULT]], %swift.type* [[METADATA]])
+// CHECK: call void [[FN]](%swift.opaque* noalias nocapture sret %0, %swift.opaque* noalias nocapture [[STRUCT_ADDR]], %swift.refcounted* %3)
 
 // CHECK: [[WITNESS_PTR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 4
 // CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_PTR]]

From f38c912878d11e3e90e1a0d17abb82a139075300 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Sat, 17 Dec 2016 08:33:49 -0800
Subject: [PATCH 4/7] Add support of a IRGen lowering SIL pipeline.

This pipeline is run as part of IRGen and has access to the IRGenModule.

Passes that run as part of this pipeline can query for the IRGenModule.

We will use it for the AllocStackHoisting pass. It wants to know if a type is of
non-fixed size.

To break the cyclic dependency between IRGen -> SILOptimizer -> IRGen that would
arise from the SILPassManager having to know about the createIRGENPASS()
function IRGen passes instead of exposing this function dynamically have to add
themselves to the pass manager.
---
 include/swift/IRGen/IRGenPublic.h             | 37 ++++++++++++++
 .../SILOptimizer/PassManager/PassManager.h    | 29 +++++++++++
 .../SILOptimizer/PassManager/PassPipeline.def |  1 +
 .../swift/SILOptimizer/PassManager/Passes.def |  9 ++++
 .../swift/SILOptimizer/PassManager/Passes.h   |  1 +
 .../SILOptimizer/PassManager/Transforms.h     |  6 +++
 lib/IRGen/CMakeLists.txt                      |  1 +
 lib/IRGen/IRGen.cpp                           | 51 +++++++++++++++++++
 lib/SILOptimizer/PassManager/PassManager.cpp  | 18 +++++++
 lib/SILOptimizer/PassManager/PassPipeline.cpp | 12 +++++
 tools/SourceKit/lib/SwiftLang/CMakeLists.txt  |  2 +-
 tools/sil-opt/CMakeLists.txt                  |  3 ++
 tools/sil-opt/SILOpt.cpp                      | 24 +++++++--
 13 files changed, 190 insertions(+), 4 deletions(-)
 create mode 100644 include/swift/IRGen/IRGenPublic.h

diff --git a/include/swift/IRGen/IRGenPublic.h b/include/swift/IRGen/IRGenPublic.h
new file mode 100644
index 0000000000000..a6bb01ddbdea5
--- /dev/null
+++ b/include/swift/IRGen/IRGenPublic.h
@@ -0,0 +1,37 @@
+//===---------IRGenPublic.h - Public interface to IRGen ---------*- C++ -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+#ifndef SWIFT_IRGEN_IRGENPUBLIC_H
+#define SWIFT_IRGEN_IRGENPUBLIC_H
+
+namespace llvm {
+  class LLVMContext;
+}
+
+namespace swift {
+class SILModule;
+
+namespace irgen {
+
+class IRGenerator;
+class IRGenModule;
+
+/// Create an IRGen module.
+std::pair<IRGenerator *, IRGenModule *>
+createIRGenModule(SILModule *SILMod, llvm::LLVMContext &LLVMContext);
+
+/// Delete the IRGenModule and IRGenerator obtained by the above call.
+void deleteIRGenModule(std::pair<IRGenerator *, IRGenModule *> &Module);
+
+} // end namespace irgen
+} // end namespace swift
+
+#endif
diff --git a/include/swift/SILOptimizer/PassManager/PassManager.h b/include/swift/SILOptimizer/PassManager/PassManager.h
index 32416f000e690..339a43d7e6666 100644
--- a/include/swift/SILOptimizer/PassManager/PassManager.h
+++ b/include/swift/SILOptimizer/PassManager/PassManager.h
@@ -32,11 +32,18 @@ class SILModuleTransform;
 class SILOptions;
 class SILTransform;
 
+namespace irgen {
+class IRGenModule;
+}
+
 /// \brief The SIL pass manager.
 class SILPassManager {
   /// The module that the pass manager will transform.
   SILModule *Mod;
 
+  /// An optional IRGenModule associated with this PassManager.
+  irgen::IRGenModule *IRMod;
+
   /// The list of transformations to run.
   llvm::SmallVector<SILTransform *, 16> Transformations;
 
@@ -90,11 +97,20 @@ class SILPassManager {
   /// same function.
   bool RestartPipeline = false;
 
+
+  /// The IRGen SIL passes. These have to be dynamically added by IRGen.
+  llvm::DenseMap<unsigned, SILFunctionTransform *> IRGenPasses;
+
 public:
   /// C'tor. It creates and registers all analysis passes, which are defined
   /// in Analysis.def.
   SILPassManager(SILModule *M, llvm::StringRef Stage = "");
 
+  /// C'tor. It creates an IRGen pass manager. Passes can query for the
+  /// IRGenModule.
+  SILPassManager(SILModule *M, irgen::IRGenModule *IRMod,
+                 llvm::StringRef Stage = "");
+
   const SILOptions &getOptions() const;
 
   /// \brief Searches for an analysis of type T in the list of registered
@@ -111,6 +127,10 @@ class SILPassManager {
   /// \returns the module that the pass manager owns.
   SILModule *getModule() { return Mod; }
 
+  /// \returns the associated IGenModule or null if this is not an IRGen
+  /// pass manager.
+  irgen::IRGenModule *getIRGenModule() { return IRMod; }
+
   /// \brief Run one iteration of the optimization pipeline.
   void runOneIteration();
 
@@ -218,6 +238,15 @@ class SILPassManager {
     }
   }
 
+  void registerIRGenPass(PassKind Kind, SILFunctionTransform *Transform) {
+    assert(IRGenPasses.find(unsigned(Kind)) == IRGenPasses.end() &&
+           "Pass already registered");
+    assert(
+        IRMod &&
+        "Attempting to register an IRGen pass with a non-IRGen pass manager");
+    IRGenPasses[unsigned(Kind)] = Transform;
+  }
+
 private:
   void execute() {
     runOneIteration();
diff --git a/include/swift/SILOptimizer/PassManager/PassPipeline.def b/include/swift/SILOptimizer/PassManager/PassPipeline.def
index 8b0ddcd583e77..71a78e017c5b5 100644
--- a/include/swift/SILOptimizer/PassManager/PassPipeline.def
+++ b/include/swift/SILOptimizer/PassManager/PassPipeline.def
@@ -35,6 +35,7 @@ PASSPIPELINE(OwnershipEliminator, "Utility pass to just run the ownership elimin
 PASSPIPELINE_WITH_OPTIONS(Performance, "Passes run at -O")
 PASSPIPELINE(Onone, "Passes run at -Onone")
 PASSPIPELINE(InstCount, "Utility pipeline to just run the inst count pass")
+PASSPIPELINE(IRGenPrepare, "Pipeline to run during IRGen")
 
 #undef PASSPIPELINE_WITH_OPTIONS
 #undef PASSPIPELINE
diff --git a/include/swift/SILOptimizer/PassManager/Passes.def b/include/swift/SILOptimizer/PassManager/Passes.def
index 00f198aa9d78c..6bc6f2571baad 100644
--- a/include/swift/SILOptimizer/PassManager/Passes.def
+++ b/include/swift/SILOptimizer/PassManager/Passes.def
@@ -23,6 +23,14 @@
 #error "Macro must be defined by includer"
 #endif
 
+/// IRGEN_PASS(Id, Name, Description)
+///   The pass is identified by PassKind::Id.
+///   An IRGen pass is created by IRGen and needs to be register with the pass
+///   manager dynamically.
+#ifndef IRGEN_PASS
+#define IRGEN_PASS(Id, Name, Description) PASS(Id, Name, Description)
+#endif
+
 /// PASS_RANGE(RANGE_ID, START, END)
 ///   Pass IDs between PassKind::START and PassKind::END, inclusive,
 ///   fall within the set known as
@@ -243,5 +251,6 @@ PASS(BugReducerTester, "bug-reducer-tester",
      "Utility pass for testing sil-bug-reducer. Asserts when visits an apply that calls a specific function")
 PASS_RANGE(AllPasses, AADumper, BugReducerTester)
 
+#undef IRGEN_PASS
 #undef PASS
 #undef PASS_RANGE
diff --git a/include/swift/SILOptimizer/PassManager/Passes.h b/include/swift/SILOptimizer/PassManager/Passes.h
index 1c2e580223e8c..b988a9c65b650 100644
--- a/include/swift/SILOptimizer/PassManager/Passes.h
+++ b/include/swift/SILOptimizer/PassManager/Passes.h
@@ -74,6 +74,7 @@ namespace swift {
   StringRef PassKindID(PassKind Kind);
 
 #define PASS(ID, NAME, DESCRIPTION) SILTransform *create##ID();
+#define IRGEN_PASS(ID, NAME, DESCRIPTION)
 #include "Passes.def"
 
 } // end namespace swift
diff --git a/include/swift/SILOptimizer/PassManager/Transforms.h b/include/swift/SILOptimizer/PassManager/Transforms.h
index 954a7fe1e28d3..3391b0f09b187 100644
--- a/include/swift/SILOptimizer/PassManager/Transforms.h
+++ b/include/swift/SILOptimizer/PassManager/Transforms.h
@@ -116,6 +116,12 @@ namespace swift {
   protected:
     SILFunction *getFunction() { return F; }
 
+    irgen::IRGenModule *getIRGenModule() {
+      auto *Mod = PM->getIRGenModule();
+      assert(Mod && "Expecting a valid module");
+      return Mod;
+    }
+
     void invalidateAnalysis(SILAnalysis::InvalidationKind K) {
       PM->invalidateAnalysis(F, K);
     }
diff --git a/lib/IRGen/CMakeLists.txt b/lib/IRGen/CMakeLists.txt
index 22723d0cd5ea1..6481781132a04 100644
--- a/lib/IRGen/CMakeLists.txt
+++ b/lib/IRGen/CMakeLists.txt
@@ -45,6 +45,7 @@ add_swift_library(swiftIRGen STATIC
     swiftLLVMPasses
     swiftSIL
     swiftSILGen
+    swiftSILOptimizer
 
     # Clang dependencies.
     # FIXME: Clang should really export these in some reasonable manner.
diff --git a/lib/IRGen/IRGen.cpp b/lib/IRGen/IRGen.cpp
index d7c94b43f7405..77ca7d783841d 100644
--- a/lib/IRGen/IRGen.cpp
+++ b/lib/IRGen/IRGen.cpp
@@ -27,8 +27,12 @@
 #include "swift/Basic/Timer.h"
 #include "swift/Basic/Version.h"
 #include "swift/ClangImporter/ClangImporter.h"
+#include "swift/IRGen/IRGenPublic.h"
 #include "swift/LLVMPasses/PassesFwd.h"
 #include "swift/LLVMPasses/Passes.h"
+#include "swift/SILOptimizer/PassManager/Passes.h"
+#include "swift/SILOptimizer/PassManager/PassManager.h"
+#include "swift/SILOptimizer/PassManager/PassPipeline.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/Bitcode/ReaderWriter.h"
@@ -596,6 +600,43 @@ static void initLLVMModule(const IRGenModule &IGM) {
   Module->setDataLayout(IGM.DataLayout.getStringRepresentation());
 }
 
+std::pair<IRGenerator *, IRGenModule *>
+swift::irgen::createIRGenModule(SILModule *SILMod,
+                                llvm::LLVMContext &LLVMContext) {
+
+  IRGenOptions Opts;
+  IRGenerator *irgen = new IRGenerator(Opts, *SILMod);
+  auto targetMachine = irgen->createTargetMachine();
+  if (!targetMachine)
+    return std::make_pair(nullptr, nullptr);
+
+  // Create the IR emitter.
+  IRGenModule *IGM =
+      new IRGenModule(*irgen, std::move(targetMachine), nullptr, LLVMContext,
+                      "", Opts.getSingleOutputFilename());
+
+  initLLVMModule(*IGM);
+
+  return std::pair<IRGenerator *, IRGenModule *>(irgen, IGM);
+}
+
+void swift::irgen::deleteIRGenModule(
+    std::pair<IRGenerator *, IRGenModule *> &IRGenPair) {
+  delete IRGenPair.second;
+  delete IRGenPair.first;
+}
+
+/// \brief Run the IRGen preparation SIL pipeline. Passes have access to the
+/// IRGenModule.
+static void runIRGenPreparePasses(SILModule &Module,
+                                  irgen::IRGenModule &IRModule) {
+  SILPassManager PM(&Module, &IRModule);
+  //PM.registerIRGenPass(swift::PassKind::YOURPASS,
+  //                     createYOURPASS());
+  PM.executePassPipelinePlan(
+      SILPassPipelinePlan::getIRGenPreparePassPipeline());
+}
+
 /// Generates LLVM IR, runs the LLVM passes and produces the output file.
 /// All this is done in a single thread.
 static std::unique_ptr<llvm::Module> performIRGeneration(IRGenOptions &Opts,
@@ -619,6 +660,9 @@ static std::unique_ptr<llvm::Module> performIRGeneration(IRGenOptions &Opts,
                   LLVMContext, ModuleName, Opts.getSingleOutputFilename());
 
   initLLVMModule(IGM);
+
+  // Run SIL level IRGen preparation passes.
+  runIRGenPreparePasses(*SILMod, IGM);
   
   {
     SharedTimer timer("IRGen");
@@ -769,6 +813,7 @@ static void performParallelIRGeneration(IRGenOptions &Opts,
 
   auto &Ctx = M->getASTContext();
   // Create an IRGenModule for each source file.
+  bool DidRunSILCodeGenPreparePasses = false;
   for (auto *File : M->getFiles()) {
     auto nextSF = dyn_cast<SourceFile>(File);
     if (!nextSF || nextSF->ASTStage < SourceFile::TypeChecked)
@@ -796,6 +841,12 @@ static void performParallelIRGeneration(IRGenOptions &Opts,
     IGMcreated = true;
 
     initLLVMModule(*IGM);
+    if (!DidRunSILCodeGenPreparePasses) {
+      // Run SIL level IRGen preparation passes on the module the first time
+      // around.
+      runIRGenPreparePasses(*SILMod, *IGM);
+      DidRunSILCodeGenPreparePasses = true;
+    }
   }
   
   if (!IGMcreated) {
diff --git a/lib/SILOptimizer/PassManager/PassManager.cpp b/lib/SILOptimizer/PassManager/PassManager.cpp
index 420fbdbb2b283..dfc5b0b31f3f1 100644
--- a/lib/SILOptimizer/PassManager/PassManager.cpp
+++ b/lib/SILOptimizer/PassManager/PassManager.cpp
@@ -241,6 +241,12 @@ SILPassManager::SILPassManager(SILModule *M, llvm::StringRef Stage) :
   }
 }
 
+SILPassManager::SILPassManager(SILModule *M, irgen::IRGenModule *IRMod,
+                               llvm::StringRef Stage)
+    : SILPassManager(M, Stage) {
+  this->IRMod = IRMod;
+}
+
 bool SILPassManager::continueTransforming() {
   return Mod->getStage() == SILStage::Raw ||
          NumPassesRun < SILNumOptPassesToRun;
@@ -513,6 +519,9 @@ void SILPassManager::runOneIteration() {
 
 /// D'tor.
 SILPassManager::~SILPassManager() {
+  assert(IRGenPasses.empty() && "Must add IRGen SIL passes that were "
+                                "registered to the list of transformations");
+
   // Free all transformations.
   for (auto *T : Transformations)
     delete T;
@@ -607,6 +616,15 @@ void SILPassManager::addPass(PassKind Kind) {
     Transformations.push_back(T);                                              \
     break;                                                                     \
   }
+#define IRGEN_PASS(ID, NAME, DESCRIPTION)                                      \
+  case PassKind::ID: {                                                         \
+    SILTransform *T = IRGenPasses[unsigned(Kind)];                             \
+    assert(T && "Missing IRGen pass?");                                        \
+    T->setPassKind(PassKind::ID);                                              \
+    Transformations.push_back(T);                                              \
+    IRGenPasses.erase(unsigned(Kind));                                         \
+    break;                                                                     \
+  }
 #include "swift/SILOptimizer/PassManager/Passes.def"
   case PassKind::invalidPassKind:
     llvm_unreachable("invalid pass kind");
diff --git a/lib/SILOptimizer/PassManager/PassPipeline.cpp b/lib/SILOptimizer/PassManager/PassPipeline.cpp
index d0fa3aba744d5..8eb94d3baf0e3 100644
--- a/lib/SILOptimizer/PassManager/PassPipeline.cpp
+++ b/lib/SILOptimizer/PassManager/PassPipeline.cpp
@@ -401,6 +401,18 @@ static void addSILDebugInfoGeneratorPipeline(SILPassPipelinePlan &P) {
   P.addSILDebugInfoGenerator();
 }
 
+/// Non-mandatory passes that should run as preparation for IRGen.
+static void addIRGenPreparePipeline(SILPassPipelinePlan &P) {
+  P.startPipeline("IRGen Preparation");
+  // Insert SIL passes to run during IRGen.
+}
+
+SILPassPipelinePlan SILPassPipelinePlan::getIRGenPreparePassPipeline() {
+  SILPassPipelinePlan P;
+  addIRGenPreparePipeline(P);
+  return P;
+}
+
 SILPassPipelinePlan
 SILPassPipelinePlan::getPerformancePassPipeline(SILOptions Options) {
   SILPassPipelinePlan P;
diff --git a/tools/SourceKit/lib/SwiftLang/CMakeLists.txt b/tools/SourceKit/lib/SwiftLang/CMakeLists.txt
index d62ed29f61d37..799d396d5ab7c 100644
--- a/tools/SourceKit/lib/SwiftLang/CMakeLists.txt
+++ b/tools/SourceKit/lib/SwiftLang/CMakeLists.txt
@@ -10,7 +10,7 @@ add_sourcekit_library(SourceKitSwiftLang
   SwiftSourceDocInfo.cpp
   DEPENDS SourceKitCore swiftDriver swiftFrontend swiftClangImporter swiftIndex swiftIDE
           swiftAST swiftMarkup swiftParse swiftSIL swiftSILGen swiftSILOptimizer
-          swiftSema swiftBasic swiftSerialization
+          swiftIRGen swiftSema swiftBasic swiftSerialization
           swiftOption cmark
     # Clang dependencies.
       clangIndex
diff --git a/tools/sil-opt/CMakeLists.txt b/tools/sil-opt/CMakeLists.txt
index f2ccbb8fbf0b8..45f2cf68853c8 100644
--- a/tools/sil-opt/CMakeLists.txt
+++ b/tools/sil-opt/CMakeLists.txt
@@ -5,6 +5,9 @@ add_swift_host_tool(sil-opt
     swiftIRGen
     swiftSILGen
     swiftSILOptimizer
+    # Clang libraries included to appease the linker on linux.
+    clangBasic
+    clangCodeGen
   LLVM_COMPONENT_DEPENDS
     DebugInfoCodeView
   SWIFT_COMPONENT tools
diff --git a/tools/sil-opt/SILOpt.cpp b/tools/sil-opt/SILOpt.cpp
index 45e2fc14588db..1c5343c517c13 100644
--- a/tools/sil-opt/SILOpt.cpp
+++ b/tools/sil-opt/SILOpt.cpp
@@ -20,6 +20,7 @@
 #include "swift/AST/DiagnosticsFrontend.h"
 #include "swift/AST/SILOptions.h"
 #include "swift/Basic/LLVMInitialize.h"
+#include "swift/Basic/LLVMContext.h"
 #include "swift/Frontend/DiagnosticVerifier.h"
 #include "swift/Frontend/Frontend.h"
 #include "swift/Frontend/PrintingDiagnosticConsumer.h"
@@ -29,6 +30,8 @@
 #include "swift/Serialization/SerializedModuleLoader.h"
 #include "swift/Serialization/SerializedSILLoader.h"
 #include "swift/Serialization/SerializationOptions.h"
+#include "swift/IRGen/IRGenPublic.h"
+#include "swift/IRGen/IRGenSILPasses.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
@@ -169,10 +172,20 @@ AssumeUnqualifiedOwnershipWhenParsing(
     "assume-parsing-unqualified-ownership-sil", llvm::cl::Hidden, llvm::cl::init(false),
     llvm::cl::desc("Assume all parsed functions have unqualified ownership"));
 
-static void runCommandLineSelectedPasses(SILModule *Module) {
-  SILPassManager PM(Module);
+static void runCommandLineSelectedPasses(SILModule *Module,
+                                         irgen::IRGenModule *IRGenMod) {
+  SILPassManager PM(Module, IRGenMod);
+  for (auto P : Passes) {
+#define PASS(ID, Name, Description)
+#define IRGEN_PASS(ID, Name, Description)                                      \
+  if (P == PassKind::ID)                                                       \
+    PM.registerIRGenPass(swift::PassKind::ID, irgen::create##ID());
+#include "swift/SILOptimizer/PassManager/Passes.def"
+  }
+
   PM.executePassPipelinePlan(
       SILPassPipelinePlan::getPassPipelineForKinds(Passes));
+
   if (Module->getOptions().VerifyAll)
     Module->verify();
 }
@@ -318,7 +331,12 @@ int main(int argc, char **argv) {
   } else if (OptimizationGroup == OptGroup::Performance) {
     runSILOptimizationPasses(*CI.getSILModule());
   } else {
-    runCommandLineSelectedPasses(CI.getSILModule());
+    auto *SILMod = CI.getSILModule();
+    {
+      auto T = irgen::createIRGenModule(SILMod, getGlobalLLVMContext());
+      runCommandLineSelectedPasses(SILMod, T.second);
+      irgen::deleteIRGenModule(T);
+    }
   }
 
   if (EmitSIB) {

From f1c2dcf1fa3803a4e16de194b3a5db30d1ec687a Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Wed, 14 Dec 2016 07:56:26 -0800
Subject: [PATCH 5/7] Add an alloc_stack hoisting pass.

Hoist alloc_stack instructions of 'generic' or resilient type to the entry
block. At the same time also perform a very simple stack coloring analysis.
This does not use a true liveness-analysis yet but rather employs some simple
conservative checks to see whether the live ranges of two alloc_stacks might
interfere.

AllocStackHoisting is an IRGen SIL pass. This allows for using IRGen's type
lowering information. Furthermore, hoisting and merging the alloc_stack
instructions this late does not interfere with SIL optimizations because the
resulting SIL never gets serialized.
---
 include/swift/IRGen/IRGenSILPasses.h          |  23 +
 .../swift/SILOptimizer/PassManager/Passes.def |   2 +
 .../swift/SILOptimizer/PassManager/Passes.h   |   4 +
 lib/IRGen/AllocStackHoisting.cpp              | 430 ++++++++++++++++++
 lib/IRGen/CMakeLists.txt                      |   1 +
 lib/IRGen/IRGen.cpp                           |   5 +-
 lib/IRGen/NonFixedTypeInfo.h                  |   1 +
 lib/SILOptimizer/PassManager/PassPipeline.cpp |   3 +
 test/IRGen/enum_resilience.swift              |   2 +
 test/IRGen/lifetime.sil                       |   4 +-
 test/Runtime/linux-fatal-backtrace.swift      |   1 -
 test/SILOptimizer/allocstack_hoisting.sil     | 207 +++++++++
 12 files changed, 678 insertions(+), 5 deletions(-)
 create mode 100644 include/swift/IRGen/IRGenSILPasses.h
 create mode 100644 lib/IRGen/AllocStackHoisting.cpp
 create mode 100644 test/SILOptimizer/allocstack_hoisting.sil

diff --git a/include/swift/IRGen/IRGenSILPasses.h b/include/swift/IRGen/IRGenSILPasses.h
new file mode 100644
index 0000000000000..8f382825be19e
--- /dev/null
+++ b/include/swift/IRGen/IRGenSILPasses.h
@@ -0,0 +1,23 @@
+//===--- IRGenSILPasses.cpp - The IRGen Prepare SIL Passes ----------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+namespace swift {
+
+class SILFunctionTransform;
+
+namespace irgen {
+
+/// Create a pass to hoist alloc_stack instructions with non-fixed size.
+SILFunctionTransform *createAllocStackHoisting();
+
+} // end namespace irgen
+} // end namespace swift
diff --git a/include/swift/SILOptimizer/PassManager/Passes.def b/include/swift/SILOptimizer/PassManager/Passes.def
index 6bc6f2571baad..90550b1f3e461 100644
--- a/include/swift/SILOptimizer/PassManager/Passes.def
+++ b/include/swift/SILOptimizer/PassManager/Passes.def
@@ -44,6 +44,8 @@ PASS(ABCOpt, "abcopts",
      "Optimization of array bounds checks")
 PASS(AllocBoxToStack, "allocbox-to-stack",
      "Promote heap allocations to stack allocations")
+IRGEN_PASS(AllocStackHoisting, "alloc-stack-hoisting",
+           "Hoist generic alloc_stack instructions to the entry block")
 PASS(ArrayCountPropagation, "array-count-propagation",
      "Propagate the count of arrays")
 PASS(ArrayElementPropagation, "array-element-propagation",
diff --git a/include/swift/SILOptimizer/PassManager/Passes.h b/include/swift/SILOptimizer/PassManager/Passes.h
index b988a9c65b650..1b2c166e2e9b3 100644
--- a/include/swift/SILOptimizer/PassManager/Passes.h
+++ b/include/swift/SILOptimizer/PassManager/Passes.h
@@ -23,6 +23,10 @@ namespace swift {
   class SILOptions;
   class SILTransform;
 
+  namespace irgen {
+    class IRGenModule;
+  }
+
   /// \brief Run all the SIL diagnostic passes on \p M.
   ///
   /// \returns true if the diagnostic passes produced an error
diff --git a/lib/IRGen/AllocStackHoisting.cpp b/lib/IRGen/AllocStackHoisting.cpp
new file mode 100644
index 0000000000000..f9741a887219c
--- /dev/null
+++ b/lib/IRGen/AllocStackHoisting.cpp
@@ -0,0 +1,430 @@
+//===--- AllocStackHoisting.cpp - Hoist alloc_stack instructions ----------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alloc-stack-hoisting"
+
+#include "swift/IRGen/IRGenSILPasses.h"
+#include "swift/SILOptimizer/Analysis/Analysis.h"
+#include "swift/SILOptimizer/PassManager/Passes.h"
+#include "swift/SILOptimizer/PassManager/Transforms.h"
+#include "swift/SIL/DebugUtils.h"
+#include "swift/SIL/SILBuilder.h"
+#include "swift/SIL/SILInstruction.h"
+#include "swift/SIL/SILArgument.h"
+
+#include "IRGenModule.h"
+#include "NonFixedTypeInfo.h"
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace swift;
+
+llvm::cl::opt<bool> SILUseStackSlotMerging(
+    "sil-merge-stack-slots", llvm::cl::init(true),
+    llvm::cl::desc("Merge generic alloc_stack instructions"));
+
+/// Hoist generic alloc_stack instructions to the entry basic block and merge
+/// alloc_stack instructions if their users span non-overlapping live-ranges.
+///
+/// This helps avoid llvm.stacksave/stackrestore intrinsic calls during code
+/// generation. IRGen will only dynamic alloca instructions if the alloc_stack
+/// is in the entry block but will emit a dynamic alloca and
+/// llvm.stacksave/stackrestore for all other basic blocks.
+///
+/// Merging alloc_stack instructions saves code size and stack size.
+
+/// An alloc_stack instructions is hoistable if it is of generic type and the
+/// type parameter is not dependent on an opened type.
+static bool isHoistable(AllocStackInst *Inst, irgen::IRGenModule &Mod) {
+  auto SILTy = Inst->getType();
+  // We don't need to hoist types that have reference semantics no dynamic
+  // alloca will be generated as they are fixed size.
+  if (SILTy.hasReferenceSemantics())
+    return false;
+
+  // Only hoist types that are dynamically sized (generics and resilient types).
+  auto &TI = Mod.getTypeInfo(SILTy);
+  if (TI.isFixedSize())
+    return false;
+
+  // Don't hoist generics with opened archetypes. We would have to hoist the
+  // open archetype instruction which might not be possible.
+  if (!Inst->getTypeDependentOperands().empty())
+    return false;
+  return true;
+}
+
+/// A partition of alloc_stack instructions.
+///
+/// Initially, a partition contains alloc_stack instructions of one type.
+/// After merging non-overlapping alloc_stack live ranges, a partition contains
+/// a set of alloc_stack instructions that can be assigned a single stack
+/// location.
+namespace {
+class Partition {
+public:
+  SmallVector<AllocStackInst *, 4> Elts;
+
+  Partition(AllocStackInst *A) : Elts(1, A) {}
+  Partition() {}
+
+  /// Assign a single alloc_stack instruction to all the alloc_stacks in the
+  /// partiion.
+  ///
+  /// This assumes that the live ranges of the alloc_stack instructions are
+  /// non-overlapping.
+  void assignStackLocation(SmallVectorImpl<SILInstruction *> &FunctionExits);
+};
+} // end anonymous namespace.
+
+/// Erases all dealloc_stack users of an alloc_stack
+static void eraseDeallocStacks(AllocStackInst *AllocStack) {
+  // Delete dealloc_stacks.
+  SmallVector<DeallocStackInst *, 16> DeallocStacksToDelete;
+  for (auto *U : AllocStack->getUses()) {
+    if (auto *DeallocStack = dyn_cast<DeallocStackInst>(U->getUser()))
+      DeallocStacksToDelete.push_back(DeallocStack);
+  }
+  for (auto *D : DeallocStacksToDelete)
+    D->eraseFromParent();
+}
+
+/// Inserts a dealloc_stack at all the function exits.
+static void
+insertDeallocStackAtEndOf(SmallVectorImpl<SILInstruction *> &FunctionExits,
+                          AllocStackInst *AllocStack) {
+  // Insert dealloc_stack in the exit blocks.
+  for (auto *Exit : FunctionExits) {
+    SILBuilder Builder(Exit);
+    Builder.createDeallocStack(AllocStack->getLoc(), AllocStack);
+  }
+}
+
+/// Assign a single alloc_stack instruction to all the alloc_stacks in the
+/// partiion.
+void Partition::assignStackLocation(
+    SmallVectorImpl<SILInstruction *> &FunctionExits) {
+  assert(!Elts.empty() && "Must have a least one location");
+  // The assigned location is the first alloc_stack in our partition.
+  auto *AssignedLoc = Elts[0];
+
+  // Move this assigned location to the beginning of the entry block.
+  auto *EntryBB = AssignedLoc->getFunction()->entryBB();
+  AssignedLoc->removeFromParent();
+  EntryBB->push_front(AssignedLoc);
+
+  // Erase the dealloc_stacks.
+  eraseDeallocStacks(AssignedLoc);
+
+  // Insert a new dealloc_stack at the exit(s) of the function.
+  insertDeallocStackAtEndOf(FunctionExits, AssignedLoc);
+
+  // Rewrite all the other alloc_stacks in the partition to use the assigned
+  // location.
+  for (auto *AllocStack : Elts) {
+    if (AssignedLoc == AllocStack) continue;
+    eraseDeallocStacks(AllocStack);
+    AllocStack->replaceAllUsesWith(AssignedLoc);
+    AllocStack->eraseFromParent();
+  }
+}
+
+/// Returns a single dealloc_stack user of the alloc_stack or nullptr otherwise.
+static SILInstruction *getSingleDeallocStack(AllocStackInst *ASI) {
+  SILInstruction *Dealloc = nullptr;
+  for (auto *U : ASI->getUses()) {
+    auto *Inst = U->getUser();
+    if (isa<DeallocStackInst>(Inst)) {
+      if (Dealloc == nullptr) {
+        Dealloc = Inst;
+        continue;
+      }
+      // Already saw a dealloc_stack.
+      return nullptr;
+    }
+  }
+  assert(Dealloc != nullptr);
+  return Dealloc;
+}
+
+namespace {
+/// Compute liveness for the partition to allow for an interference check
+/// between two alloc_stack instructions.
+///
+/// For now now liveness is computed and  this just performs a simple check
+/// whether two regions of alloc_stack instructions might overlap.
+class Liveness {
+public:
+  Liveness(Partition &P) {}
+
+  /// Check whether the live ranges of the two alloc_stack instructions
+  /// might overlap.
+  ///
+  /// Currently this does not use a liveness analysis. Rather we check that for
+  /// both alloc_stack we have:
+  /// * a single dealloc_stack user
+  /// * the dealloc_stack is in the same basic block
+  /// If the alloc_stack instructions are in different basic blocks we know that
+  /// the live-ranges can't overlap.
+  /// If they are in the same basic block we scan the basic block to determine
+  /// whether one dealloc_stack dominates the other alloc_stack. If this is the
+  /// case the live ranges can't overlap.
+  bool mayOverlap(AllocStackInst *A, AllocStackInst *B) {
+    assert(A != B);
+
+    // Check that we have a single dealloc_stack user in the same block.
+    auto *singleDeallocA = getSingleDeallocStack(A);
+    if (singleDeallocA == nullptr ||
+        singleDeallocA->getParent() != A->getParent())
+      return true;
+    auto *singleDeallocB = getSingleDeallocStack(B);
+    if (singleDeallocB == nullptr ||
+        singleDeallocB->getParent() != B->getParent())
+      return true;
+
+    // Different basic blocks.
+    if (A->getParent() != B->getParent())
+      return false;
+    bool ALive = false;
+    bool BLive = false;
+    for (auto &Inst : *A->getParent()) {
+      if (A == &Inst) {
+        ALive = true;
+      } else if (singleDeallocA == &Inst) {
+        ALive = false;
+      } else if (B == &Inst) {
+        BLive = true;
+      } else if (singleDeallocB == &Inst) {
+        BLive = false;
+      }
+
+      if (ALive && BLive)
+        return true;
+    }
+    return false;
+  }
+};
+} // end anonymous namespace.
+
+namespace {
+/// Merge alloc_stack instructions.
+///
+/// This merges alloc_stack instructions of one type by:
+/// * building partitions of alloc_stack instructions of one type
+/// * merging alloc_stack instructions in each partition into one alloc_stack
+///   if the live ranges spanned by the alloc_stack users are known not to
+///   overlap.
+class MergeStackSlots {
+  /// Contains partitions of alloc_stack instructions by type.
+  SmallVector<Partition, 2> PartitionByType;
+  /// The function exits.
+  SmallVectorImpl<SILInstruction *> &FunctionExits;
+
+public:
+  MergeStackSlots(SmallVectorImpl<AllocStackInst *> &AllocStacks,
+                  SmallVectorImpl<SILInstruction *> &FuncExits);
+
+  /// Merge alloc_stack instructions if possible and hoist them to the entry
+  /// block.
+  void mergeSlots();
+};
+} // end anonymous namespace.
+
+MergeStackSlots::MergeStackSlots(SmallVectorImpl<AllocStackInst *> &AllocStacks,
+                                 SmallVectorImpl<SILInstruction *> &FuncExits)
+    : FunctionExits(FuncExits) {
+  // Build initial partitions based on the type.
+  llvm::DenseMap<SILType, unsigned> TypeToPartitionMap;
+  for (auto *AS : AllocStacks) {
+    auto Ty = AS->getType();
+    auto It = TypeToPartitionMap.find(Ty);
+    if (It != TypeToPartitionMap.end()) {
+      PartitionByType[It->second].Elts.push_back(AS);
+    } else {
+      PartitionByType.push_back(Partition(AS));
+      TypeToPartitionMap[Ty] = PartitionByType.size() - 1;
+    }
+  }
+}
+
+/// Merge alloc_stack instructions if possible and hoist them to the entry
+/// block.
+void MergeStackSlots::mergeSlots() {
+  for (auto &PartitionOfOneType : PartitionByType) {
+    Liveness Live(PartitionOfOneType);
+
+    // Paritions that are know to contain non-overlapping alloc_stack
+    // live-ranges.
+    SmallVector<Partition, 4> DisjointPartitions(1, Partition());
+
+    // Look at all the alloc_stacks of one type.
+    for (auto *CurAllocStack : PartitionOfOneType.Elts) {
+      bool FoundAPartition = false;
+      // Check if we can add it to an existing parition that we have show to be
+      // non-interfering.
+      for (auto &CandidateP : DisjointPartitions) {
+        // If the candidate partition is empty (the very first time we look at an
+        // alloc_stack) we can just add the alloc_stack.
+        if (CandidateP.Elts.empty()) {
+          CandidateP.Elts.push_back(CurAllocStack);
+          FoundAPartition = true;
+          break;
+        }
+        // Otherwise, we check interference of the current alloc_stack with the
+        // candidate partition.
+        bool InterferesWithCandidateP = false;
+        for (auto *AllocStackInParition : CandidateP.Elts) {
+          if (Live.mayOverlap(AllocStackInParition, CurAllocStack)) {
+            InterferesWithCandidateP = true;
+            break;
+          }
+        }
+        // No interference add the current alloc_stack to the candidate
+        // partition.
+        if (InterferesWithCandidateP == false) {
+          CandidateP.Elts.push_back(CurAllocStack);
+          FoundAPartition = true;
+          break;
+        }
+        // Otherwise, we look at the next partition.
+      }
+      // If not partition was found add a new one.
+      if (FoundAPartition == false) {
+        DisjointPartitions.push_back(Partition(CurAllocStack));
+      }
+    }
+
+    // Assign stack locations to disjoint partition hoisting alloc_stacks to the
+    // entry block at the same time.
+    for (auto &Par : DisjointPartitions) {
+      Par.assignStackLocation(FunctionExits);
+    }
+  }
+}
+
+
+namespace {
+/// Hoist alloc_stack instructions to the entry block and merge them.
+class HoistAllocStack {
+  /// The function to process.
+  SILFunction *F;
+  /// The current IRGenModule.
+  irgen::IRGenModule &IRGenMod;
+
+  SmallVector<AllocStackInst *, 16> AllocStackToHoist;
+  SmallVector<SILInstruction *, 8> FunctionExits;
+
+public:
+  HoistAllocStack(SILFunction *F, irgen::IRGenModule &Mod)
+      : F(F), IRGenMod(Mod) {}
+
+  /// Try to hoist generic alloc_stack instructions to the entry block.
+  /// Returns true if the function was changed.
+  bool run();
+
+private:
+  /// Collect generic alloc_stack instructions that can be moved to the entry
+  /// block.
+  void collectHoistableInstructions();
+
+  /// Move the hoistable alloc_stack instructions to the entry block.
+  void hoist();
+};
+}
+
+/// Collect generic alloc_stack instructions in the current function can be
+/// hoisted.
+/// We can hoist generic alloc_stack instructions if they are not dependent on a
+/// another instruction that we would have to hoist.
+/// A generic alloc_stack could reference an opened archetype that was not
+/// opened in the entry block.
+void HoistAllocStack::collectHoistableInstructions() {
+  for (auto &BB : *F) {
+    for (auto &Inst : BB) {
+      // Terminators that are function exits are our dealloc_stack
+      // insertion points.
+      if (auto *Term = dyn_cast<TermInst>(&Inst)) {
+        if (Term->isFunctionExiting())
+          FunctionExits.push_back(Term);
+        continue;
+      }
+
+      auto *ASI = dyn_cast<AllocStackInst>(&Inst);
+      if (!ASI) {
+        continue;
+      }
+      if (isHoistable(ASI, IRGenMod)) {
+        DEBUG(llvm::dbgs() << "Hoisting     " << Inst);
+        AllocStackToHoist.push_back(ASI);
+      } else {
+        DEBUG(llvm::dbgs() << "Not hoisting " << Inst);
+      }
+    }
+  }
+}
+
+/// Hoist the alloc_stack instructions to the entry block and sink the
+/// dealloc_stack instructions to the function exists.
+void HoistAllocStack::hoist() {
+
+  if (SILUseStackSlotMerging) {
+    MergeStackSlots Merger(AllocStackToHoist, FunctionExits);
+    Merger.mergeSlots();
+  } else {
+    // Hoist alloc_stacks to the entry block and delete dealloc_stacks.
+    auto *EntryBB = F->entryBB();
+    for (auto *AllocStack : AllocStackToHoist) {
+      // Insert at the beginning of the entry block.
+      AllocStack->removeFromParent();
+      EntryBB->push_front(AllocStack);
+      // Delete dealloc_stacks.
+      eraseDeallocStacks(AllocStack);
+    }
+    // Insert dealloc_stack in the exit blocks.
+    for (auto *AllocStack : AllocStackToHoist) {
+      insertDeallocStackAtEndOf(FunctionExits, AllocStack);
+    }
+  }
+}
+
+/// Try to hoist generic alloc_stack instructions to the entry block.
+/// Returns true if the function was changed.
+bool HoistAllocStack::run() {
+  collectHoistableInstructions();
+
+  // Nothing to hoist?
+  if (AllocStackToHoist.empty())
+    return false;
+
+  hoist();
+  return true;
+}
+
+namespace {
+class AllocStackHoisting : public SILFunctionTransform {
+  void run() override {
+    auto *F = getFunction();
+    auto *Mod = getIRGenModule();
+    assert(Mod && "This pass must be run as part of an IRGen pipeline");
+    bool Changed = HoistAllocStack(F, *Mod).run();
+    if (Changed) {
+      PM->invalidateAnalysis(F, SILAnalysis::InvalidationKind::Instructions);
+    }
+  }
+  StringRef getName() override { return "alloc_stack Hoisting"; }
+};
+} // end anonymous namespace
+
+SILFunctionTransform *irgen::createAllocStackHoisting() {
+  return new AllocStackHoisting();
+}
diff --git a/lib/IRGen/CMakeLists.txt b/lib/IRGen/CMakeLists.txt
index 6481781132a04..3749f7cd320e3 100644
--- a/lib/IRGen/CMakeLists.txt
+++ b/lib/IRGen/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_swift_library(swiftIRGen STATIC
+  AllocStackHoisting.cpp
   DebugTypeInfo.cpp
   EnumPayload.cpp
   ExtraInhabitants.cpp
diff --git a/lib/IRGen/IRGen.cpp b/lib/IRGen/IRGen.cpp
index 77ca7d783841d..087af608ab641 100644
--- a/lib/IRGen/IRGen.cpp
+++ b/lib/IRGen/IRGen.cpp
@@ -28,6 +28,7 @@
 #include "swift/Basic/Version.h"
 #include "swift/ClangImporter/ClangImporter.h"
 #include "swift/IRGen/IRGenPublic.h"
+#include "swift/IRGen/IRGenSILPasses.h"
 #include "swift/LLVMPasses/PassesFwd.h"
 #include "swift/LLVMPasses/Passes.h"
 #include "swift/SILOptimizer/PassManager/Passes.h"
@@ -631,8 +632,8 @@ void swift::irgen::deleteIRGenModule(
 static void runIRGenPreparePasses(SILModule &Module,
                                   irgen::IRGenModule &IRModule) {
   SILPassManager PM(&Module, &IRModule);
-  //PM.registerIRGenPass(swift::PassKind::YOURPASS,
-  //                     createYOURPASS());
+  PM.registerIRGenPass(swift::PassKind::AllocStackHoisting,
+                       createAllocStackHoisting());
   PM.executePassPipelinePlan(
       SILPassPipelinePlan::getIRGenPreparePassPipeline());
 }
diff --git a/lib/IRGen/NonFixedTypeInfo.h b/lib/IRGen/NonFixedTypeInfo.h
index e6a38472e0680..921ddd5d172b0 100644
--- a/lib/IRGen/NonFixedTypeInfo.h
+++ b/lib/IRGen/NonFixedTypeInfo.h
@@ -23,6 +23,7 @@
 #ifndef SWIFT_IRGEN_NONFIXEDTYPEINFO_H
 #define SWIFT_IRGEN_NONFIXEDTYPEINFO_H
 
+#include "Address.h"
 #include "GenOpaque.h"
 #include "IndirectTypeInfo.h"
 
diff --git a/lib/SILOptimizer/PassManager/PassPipeline.cpp b/lib/SILOptimizer/PassManager/PassPipeline.cpp
index 8eb94d3baf0e3..99c76f6093f08 100644
--- a/lib/SILOptimizer/PassManager/PassPipeline.cpp
+++ b/lib/SILOptimizer/PassManager/PassPipeline.cpp
@@ -405,6 +405,9 @@ static void addSILDebugInfoGeneratorPipeline(SILPassPipelinePlan &P) {
 static void addIRGenPreparePipeline(SILPassPipelinePlan &P) {
   P.startPipeline("IRGen Preparation");
   // Insert SIL passes to run during IRGen.
+  // Hoist generic alloc_stack instructions to the entry block to enable better
+  // llvm-ir generation for dynamic alloca instructions.
+  P.addAllocStackHoisting();
 }
 
 SILPassPipelinePlan SILPassPipelinePlan::getIRGenPreparePassPipeline() {
diff --git a/test/IRGen/enum_resilience.swift b/test/IRGen/enum_resilience.swift
index 0af9ce8156098..5b44a75c21681 100644
--- a/test/IRGen/enum_resilience.swift
+++ b/test/IRGen/enum_resilience.swift
@@ -148,6 +148,8 @@ public func constructResilientEnumPayload(_ s: Size) -> Medium {
 // CHECK: [[WITNESS:%.*]] = load i8*, i8** [[WITNESS_ADDR]]
 // CHECK: [[WITNESS_FOR_SIZE:%.*]] = ptrtoint i8* [[WITNESS]]
 // CHECK: [[ALLOCA:%.*]] = alloca i8, {{.*}} [[WITNESS_FOR_SIZE]], align 16
+// CHECK: [[ALLOCA:%.*]] = alloca i8, {{.*}} [[WITNESS_FOR_SIZE]], align 16
+// CHECK: [[ALLOCA:%.*]] = alloca i8, {{.*}} [[WITNESS_FOR_SIZE]], align 16
 // CHECK: [[ENUM_STORAGE:%.*]] = bitcast i8* [[ALLOCA]] to %swift.opaque*
 
 // CHECK: [[WITNESS_ADDR:%.*]] = getelementptr inbounds i8*, i8** [[VWT]], i32 6
diff --git a/test/IRGen/lifetime.sil b/test/IRGen/lifetime.sil
index 7084b72837c76..bdf43eb468aa0 100644
--- a/test/IRGen/lifetime.sil
+++ b/test/IRGen/lifetime.sil
@@ -38,10 +38,10 @@ bb0(%x : $*T):
 // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** [[T3]], align
 // CHECK-NEXT: [[DESTROY_FN:%.*]] = bitcast i8* [[T4]] to void ([[OPAQUE]]*, [[TYPE]]*)*
 // CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[Y_TMP]], [[TYPE]]* %T)
-// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[OPAQUE]]* [[Y_TMP]] to i8*
-// CHECK-NEXT: call void @llvm.lifetime.end({{(i32|i64)}} -1, i8* [[YBUFLIFE]])
 //   Destroy 'x'.
 // CHECK-NEXT: call void [[DESTROY_FN]]([[OPAQUE]]* [[X]], [[TYPE]]* %T)
+// CHECK-NEXT: [[YBUFLIFE:%.*]] = bitcast [[OPAQUE]]* [[Y_TMP]] to i8*
+// CHECK-NEXT: call void @llvm.lifetime.end({{(i32|i64)}} -1, i8* [[YBUFLIFE]])
 //   Return.
 // CHECK-NEXT: ret void
 
diff --git a/test/Runtime/linux-fatal-backtrace.swift b/test/Runtime/linux-fatal-backtrace.swift
index c569d7c82fe73..03eaced45ffb2 100644
--- a/test/Runtime/linux-fatal-backtrace.swift
+++ b/test/Runtime/linux-fatal-backtrace.swift
@@ -2,7 +2,6 @@
 // RUN: mkdir -p %t
 // RUN: %target-build-swift %s -o %t/a.out
 // RUN: not --crash %t/a.out 2>&1 | PYTHONPATH=%lldb-python-path %utils/symbolicate-linux-fatal %t/a.out - | %utils/backtrace-check -u
-
 // REQUIRES: executable_test
 // REQUIRES: OS=linux-gnu
 // REQUIRES: lldb
diff --git a/test/SILOptimizer/allocstack_hoisting.sil b/test/SILOptimizer/allocstack_hoisting.sil
new file mode 100644
index 0000000000000..1cc8cbbfebbd8
--- /dev/null
+++ b/test/SILOptimizer/allocstack_hoisting.sil
@@ -0,0 +1,207 @@
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all %s -alloc-stack-hoisting | %FileCheck %s
+// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all %s -alloc-stack-hoisting -sil-merge-stack-slots=false | %FileCheck %s
+sil_stage canonical
+
+import Builtin
+import Swift
+import SwiftShims
+
+protocol P {
+}
+
+struct Generic<T> {
+  var x : T
+}
+
+struct FixedSize {
+  var x : Builtin.Int8
+}
+
+// CHECK-LABEL: sil @hoist_generic
+// CHECK: bb0({{.*}}):
+// CHECK: [[AS:%.*]] = alloc_stack $T
+// CHECK: bb1:
+// CHECK-NOT: alloc_stack
+// CHECK-NOT: dealloc_stack
+// CHECK: bb2
+// CHECK: bb3:
+// CHECK: dealloc_stack [[AS]]
+// CHECK:   return
+
+sil @hoist_generic : $@convention(thin) <T> (@in T, Builtin.Int1) -> () {
+bb0(%0 : $*T, %1: $Builtin.Int1):
+  cond_br %1, bb1, bb2
+bb1:
+  %2 = alloc_stack $T
+  copy_addr [take] %0 to [initialization] %2 : $*T
+  destroy_addr %2 : $*T
+  dealloc_stack %2 : $*T
+  br bb3
+bb2:
+  destroy_addr %0 : $*T
+  br bb3
+bb3:
+  %3 = tuple ()
+  return %3 : $()
+}
+
+sil @throwing_fun : $@convention(thin) () -> @error Error
+
+// CHECK-LABEL: sil @hoist_generic
+// CHECK: bb0({{.*}}):
+// CHECK: [[AS:%.*]] = alloc_stack $T
+// CHECK: bb1:
+// CHECK-NOT: alloc_stack
+// CHECK-NOT: dealloc_stack
+// CHECK: bb2
+// CHECK: bb3:
+// CHECK:   try_apply
+// CHECK: bb4({{.*}}:
+// CHECK:   dealloc_stack [[AS]]
+// CHECK:   return
+// CHECK: bb5({{.*}}):
+// CHECK:   dealloc_stack [[AS]]
+// CHECK:   throw
+
+sil @hoist_generic_throwing : $@convention(thin) <T> (@in T, Builtin.Int1) -> @error Error {
+bb0(%0 : $*T, %1: $Builtin.Int1):
+  cond_br %1, bb1, bb2
+bb1:
+  %2 = alloc_stack $T
+  copy_addr [take] %0 to [initialization] %2 : $*T
+  destroy_addr %2 : $*T
+  dealloc_stack %2 : $*T
+  br bb3
+bb2:
+  destroy_addr %0 : $*T
+  br bb3
+bb3:
+  %3 = function_ref @throwing_fun : $@convention(thin) () -> @error Error
+  try_apply %3() : $@convention(thin) () -> @error Error, normal bb4, error bb5
+
+bb4(%6: $()):
+  %4 = tuple ()
+  return %4 : $()
+
+bb5(%5: $Error):
+  throw  %5: $Error
+}
+
+// CHECK-LABEL: sil @hoist_generic
+// CHECK: bb0({{.*}}):
+// CHECK: [[AS:%.*]] = alloc_stack $Generic<T>
+// CHECK: bb1:
+// CHECK-NOT: alloc_stack
+// CHECK-NOT: dealloc_stack
+// CHECK: bb2
+// CHECK: bb3:
+// CHECK: dealloc_stack [[AS]]
+// CHECK:   return
+
+sil @hoist_generic_type : $@convention(thin) <T> (@in Generic<T>, Builtin.Int1) -> () {
+bb0(%0 : $*Generic<T>, %1: $Builtin.Int1):
+  cond_br %1, bb1, bb2
+bb1:
+  %2 = alloc_stack $Generic<T>
+  copy_addr [take] %0 to [initialization] %2 : $*Generic<T>
+  destroy_addr %2 : $*Generic<T>
+  dealloc_stack %2 : $*Generic<T>
+  br bb3
+bb2:
+  destroy_addr %0 : $*Generic<T>
+  br bb3
+bb3:
+  %3 = tuple ()
+  return %3 : $()
+}
+
+// CHECK-LABEL: sil @hoist_generic_nesting
+// CHECK: bb0({{.*}}):
+// CHECK: [[AS:%.*]] = alloc_stack $T
+// CHECK: [[AS2:%.*]] = alloc_stack $T
+// CHECK: [[FIXED:%.*]] = alloc_stack $FixedSize
+// CHECK: bb1:
+// CHECK-NOT: alloc_stack
+// CHECK-NOT: dealloc_stack
+// CHECK: bb2
+// CHECK: bb3:
+// CHECK  dealloc_stack [[FIXED]]
+// CHECK: dealloc_stack [[AS2]]
+// CHECK: dealloc_stack [[AS]]
+// CHECK:   return
+
+sil @hoist_generic_nesting : $@convention(thin) <T> (@in T, Builtin.Int1) -> () {
+bb0(%0 : $*T, %1: $Builtin.Int1):
+  %2 = alloc_stack $FixedSize
+  cond_br %1, bb1, bb2
+bb1:
+  %3 = alloc_stack $T
+  %4 = alloc_stack $T
+  copy_addr [take] %0 to [initialization] %3 : $*T
+  destroy_addr %3 : $*T
+  dealloc_stack %4: $*T
+  dealloc_stack %3 : $*T
+  br bb3
+bb2:
+  destroy_addr %0 : $*T
+  br bb3
+bb3:
+  dealloc_stack %2: $*FixedSize
+  %5 = tuple ()
+  return %5 : $()
+}
+
+// CHECK-LABEL: sil @dont_hoist_opened_generic
+// CHECK: bb0({{.*}}):
+// CHECK-NOT: alloc_stack
+// CHECK: bb1:
+// CHECK: alloc_stack
+// CHECK: bb2:
+// CHECK: bb3:
+// CHECK-NOT: dealloc_stack
+// CHECK:   return
+
+sil @dont_hoist_opened_generic : $@convention(thin) (@in P, Builtin.Int1) -> () {
+bb0(%0 : $*P, %1: $Builtin.Int1):
+  cond_br %1, bb1, bb2
+bb1:
+  %2 = open_existential_addr %0 : $*P to $*@opened("1B6851A6-4796-11E6-B7DF-B8E856428C60") P
+  %3 = alloc_stack $@opened("1B6851A6-4796-11E6-B7DF-B8E856428C60") P
+	copy_addr [take] %2 to [initialization] %3 : $*@opened("1B6851A6-4796-11E6-B7DF-B8E856428C60") P
+  destroy_addr %3 : $*@opened("1B6851A6-4796-11E6-B7DF-B8E856428C60") P
+  dealloc_stack %3 : $*@opened("1B6851A6-4796-11E6-B7DF-B8E856428C60") P
+  br bb3
+bb2:
+  destroy_addr %0 : $*P
+  br bb3
+bb3:
+  %4 = tuple ()
+  return %4 : $()
+}
+
+// CHECK-LABEL: sil @dont_hoist_protocol
+// CHECK: bb0({{.*}}):
+// CHECK-NOT: alloc_stack
+// CHECK: bb1:
+// CHECK: alloc_stack
+// CHECK: bb2:
+// CHECK: bb3:
+// CHECK-NOT: dealloc_stack
+// CHECK:   return
+
+sil @dont_hoist_protocol : $@convention(thin) (@in P, Builtin.Int1) -> () {
+bb0(%0 : $*P, %1: $Builtin.Int1):
+  cond_br %1, bb1, bb2
+bb1:
+  %2 = alloc_stack $P
+	copy_addr [take] %0 to [initialization] %2 : $*P
+  destroy_addr %2 : $*P
+  dealloc_stack %2 : $*P
+  br bb3
+bb2:
+  destroy_addr %0 : $*P
+  br bb3
+bb3:
+  %3 = tuple ()
+  return %3 : $()
+}

From 4773e8911880f9849b711232fc2291b6220836a3 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Mon, 19 Dec 2016 11:16:04 -0800
Subject: [PATCH 6/7] Use preprocess macro instead of manually adding IRGen
 passes.

---
 lib/IRGen/IRGen.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/IRGen/IRGen.cpp b/lib/IRGen/IRGen.cpp
index 087af608ab641..3402e05c08133 100644
--- a/lib/IRGen/IRGen.cpp
+++ b/lib/IRGen/IRGen.cpp
@@ -632,8 +632,10 @@ void swift::irgen::deleteIRGenModule(
 static void runIRGenPreparePasses(SILModule &Module,
                                   irgen::IRGenModule &IRModule) {
   SILPassManager PM(&Module, &IRModule);
-  PM.registerIRGenPass(swift::PassKind::AllocStackHoisting,
-                       createAllocStackHoisting());
+#define PASS(ID, Name, Description)
+#define IRGEN_PASS(ID, Name, Description)                                      \
+    PM.registerIRGenPass(swift::PassKind::ID, irgen::create##ID());
+#include "swift/SILOptimizer/PassManager/Passes.def"
   PM.executePassPipelinePlan(
       SILPassPipelinePlan::getIRGenPreparePassPipeline());
 }

From a87f34372e9f29c42168648c0a1ad901de9d79ec Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Mon, 19 Dec 2016 11:18:56 -0800
Subject: [PATCH 7/7] Rename SILFunction::entryBB to getEntryBlock

---
 include/swift/SIL/SILFunction.h  | 4 ++--
 lib/IRGen/AllocStackHoisting.cpp | 4 ++--
 lib/IRGen/IRGenSIL.cpp           | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/swift/SIL/SILFunction.h b/include/swift/SIL/SILFunction.h
index 6f9670cb0aedf..01306d0273008 100644
--- a/include/swift/SIL/SILFunction.h
+++ b/include/swift/SIL/SILFunction.h
@@ -618,8 +618,8 @@ class SILFunction
   SILBasicBlock &front() { return *begin(); }
   const SILBasicBlock &front() const { return *begin(); }
 
-  SILBasicBlock *entryBB() { return &front(); }
-  const SILBasicBlock *entryBB() const { return &front(); }
+  SILBasicBlock *getEntryBlock() { return &front(); }
+  const SILBasicBlock *getEntryBlock() const { return &front(); }
 
   SILBasicBlock *createBasicBlock();
   SILBasicBlock *createBasicBlock(SILBasicBlock *After);
diff --git a/lib/IRGen/AllocStackHoisting.cpp b/lib/IRGen/AllocStackHoisting.cpp
index f9741a887219c..e04e3f0a1ca6c 100644
--- a/lib/IRGen/AllocStackHoisting.cpp
+++ b/lib/IRGen/AllocStackHoisting.cpp
@@ -119,7 +119,7 @@ void Partition::assignStackLocation(
   auto *AssignedLoc = Elts[0];
 
   // Move this assigned location to the beginning of the entry block.
-  auto *EntryBB = AssignedLoc->getFunction()->entryBB();
+  auto *EntryBB = AssignedLoc->getFunction()->getEntryBlock();
   AssignedLoc->removeFromParent();
   EntryBB->push_front(AssignedLoc);
 
@@ -382,7 +382,7 @@ void HoistAllocStack::hoist() {
     Merger.mergeSlots();
   } else {
     // Hoist alloc_stacks to the entry block and delete dealloc_stacks.
-    auto *EntryBB = F->entryBB();
+    auto *EntryBB = F->getEntryBlock();
     for (auto *AllocStack : AllocStackToHoist) {
       // Insert at the beginning of the entry block.
       AllocStack->removeFromParent();
diff --git a/lib/IRGen/IRGenSIL.cpp b/lib/IRGen/IRGenSIL.cpp
index 7b6a7d9bee973..b2c5416f0a3a6 100644
--- a/lib/IRGen/IRGenSIL.cpp
+++ b/lib/IRGen/IRGenSIL.cpp
@@ -3530,7 +3530,7 @@ void IRGenSILFunction::visitAllocStackInst(swift::AllocStackInst *i) {
   (void) Decl;
 
   bool isEntryBlock =
-      i->getParentBlock() == i->getFunction()->entryBB();
+      i->getParentBlock() == i->getFunction()->getEntryBlock();
   auto addr =
       type.allocateStack(*this, i->getElementType(), isEntryBlock, dbgname);